Extract (js embeded) table from HTML in R (or python)
New here? Learn about Bountify and follow @bountify to get notified of new bounties! x

The task is straightforward: The main table on https://marketchameleon.com/Screeners/OptionTrades (xpath = "//table[@id='opt_trades_screener_tbl']") can not be scraped using the usual techniques (curl, wget, xml2, beautifulsoup), but Mozilla add-ons such as Download Table as CSV can find it easily. Looking for a programmatic way preferably in R or in python to scrape all tabs of the main table. Speed is not really an issue, although it would be good if under ~1 min.

is Node.JS with Puppeteer an option, or strictly R/Python?
B44ken 2 months ago
R/Python definitively preferred, but anything else straightforward is welcome :)
optionsguy 2 months ago
2 months ago

Crowdsource coding tasks.

1 Solution


One way to solve it would be to start headless chrome from R or python and the inject scraping JS code via websocket and DevTools Protocol.
Here is proof of concept scraping code (go to marketchameleon.com and paste it into browser's console):

var da=[],oldXHROpen=window.XMLHttpRequest.prototype.open;window.XMLHttpRequest.prototype.open=function(b,a,c,d,e){this.addEventListener("load",function(){var a=JSON.parse(this.response);a.cipher&&(da.push(dcr_json(a.cipher)),makeDA())});return oldXHROpen.apply(this,arguments)};function strt(){var b=document.getElementsByName("opt_trades_screener_tbl_length")[0];b.selectedIndex=2;var a=document.createEvent("HTMLEvents");a.initEvent("change",!1,!0);b.dispatchEvent(a)}
function makeDA(){var b="";da.forEach(function(a){a.forEach(function(a){delete a.EventHtml;delete a.OptionExpirationMontageLink;delete a.Premium;delete a.SymbolLink;b+=JSON.stringify(a)+"\n"})});var a=document.createElement("div");a.style.width="100%";a.style.height="5em";a.style.lineHeight="5em";a.style.fontSize="200%";a.style.position="fixed";a.style.zIndex="100000";a.style.background="#fff";a.style.textAlign="center";var c=document.createElement("a");c.textContent="Download JSON_data.txt File";
c.setAttribute("href","data:text/plain;charset=utf-8,"+encodeURIComponent(b));c.setAttribute("download","JSON_data.txt");a.appendChild(c);document.body.appendChild(a)}strt();