Parse multiple pages with phantomjs -
i have made code parses url-s page. next, href every parsed url <div class="holder"><a href="these url-s"></a></div>
, output file , sepparate comma.
so far have made code. able find url-s need parsed , collects them comma sepparated file called output2.txt.
var resourcewait = 300, maxrenderwait = 10000, url = 'url parse href-s from'; var page = require('webpage').create(), count = 0, forcedrendertimeout, rendertimeout; page.viewportsize = { width: 1280, height : 1024 }; function dorender() { var fs = require('fs'); var path = 'output2.txt'; page.includejs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() { fs.write(path,page.evaluate(function() { return $('.urldiv').find('a') .map(function() { return this.href;}) .get() .join(','); }), 'w'); phantom.exit() }); } page.onresourcerequested = function (req) { count += 1; cleartimeout(rendertimeout); }; page.onresourcereceived = function (res) { if (!res.stage || res.stage === 'end') { count -= 1; if (count === 0) { rendertimeout = settimeout(dorender, resourcewait); } } }; page.open(url, function (status) { if (status !== "success") { phantom.exit(); } else { forcedrendertimeout = settimeout(function () { console.log(count); dorender(); }, maxrenderwait); } });
thanks in advance,
martti
Comments
Post a Comment