功能其實(shí)很見(jiàn)簡(jiǎn)單,通過(guò) phantomjs.exe 采集 url 加載的資源,通過(guò)子進(jìn)程的方式,啟動(dòng)nodejs 加載所有的資源,對(duì)于css的資源,匹配css內(nèi)容,下載里面的url資源
當(dāng)然功能還是很簡(jiǎn)單的,在響應(yīng)式設(shè)計(jì)和異步加載的情況下,還是有很多資源沒(méi)有能夠下載,需要根據(jù)實(shí)際情況處理下
首先當(dāng)然是下載 nodejs 和 phantomjs
下面是 phantomjs.exe 執(zhí)行的 down.js
var page = require('webpage').create(), system = require('system');var spawn = require("child_process").spawnif (system.args.length === 1) { console.log('Usage: netsniff.js <some URL>'); phantom.exit(1);} else { var urls = []; page.address = system.args[1]; page.onResourceReceived = function (res) { if (res.stage === 'start') { urls.push(res.url); } }; page.open(page.address, function (status) { var har; if (status !== 'success') { console.log('FAIL to load the address'); phantom.exit(1); } else { console.log('down resource ' + urls.length + ' urls.'); var child = spawn("node", ["--harmony", "downHtml.js", urls.join(',')]) child.stdout.on("data", function (data) { console.log(data); }) child.stderr.on("data", function (data) { console.log(data); }) child.on("exit", function (code) { phantom.exit(); }) } });}下面是對(duì)應(yīng)的node運(yùn)行的 downHtml.js
"use strict";var fs = require('fs');var http = require('http');var path = require('path');var r_url = require('url');var dirCache = {};//緩存減少判斷function makedir (pathStr, callback) { if (dirCache[pathStr] == 1) { callback(); } else { fs.exists(pathStr, function (exists) { if (exists == true) { dirCache[pathStr] == 1; callback(); } else { makedir(path.dirname(pathStr), function () { fs.mkdir(pathStr, function () { dirCache[pathStr] == 1; callback(); }) }); } }) }};var reg = /[:,]/s*url/(['"]?.*?(/1)/)/gvar reg2 = //((['"]?)(.*?)(/1)/)/var isDownMap = {};var downImgFromCss = function (URL) { http.get(URL, function(res) { //console.log(path.resolve(process.cwd(), 'index.min.css')) //res.pipe(fs.createWriteStream(path.resolve(process.cwd(), 'index.min.css'))); var body = ""; res.setEncoding('utf8'); res.on('data', function (chunk) { body += chunk; }); res.on('end', function () { var match = body.match(reg); for (var i = 0, len = match.length; i < len; i++){ var m = match[i].match(reg2); if (m && m[2]) { var url = m[2]; let imgUrl = r_url.resolve(URL, url); if (!isDownMap[imgUrl]) { var uo = r_url.parse(imgUrl); let filepath = CWD + '/' + uo.hostname + uo.pathname; makedir(path.dirname(filepath), function () { http.get(imgUrl, function (res) { res.pipe(fs.createWriteStream(filepath)); }) }) isDownMap[imgUrl] = 1; } } } }); });}var URLS = process.argv[2].split(',');var CWD = process.cwd();//下載資源URLS.forEach(function (URL) { var uo = r_url.parse(URL); var filepath; if (uo.pathname == '/' || uo.pathname == '') { filepath = CWD + '/' + uo.hostname + '/index.html'; } else { filepath = CWD + '/' + uo.hostname + uo.pathname; } makedir(path.dirname(filepath), function () { http.get(URL, function (res) { if (URL.indexOf('.css') != -1 || (res.headers["content-type"] && res.headers["content-type"].indexOf('text/css')!= -1)) { console.log('down images form css file:' + URL + '.'); downImgFromCss(URL); } res.pipe(fs.createWriteStream(filepath)); }) });});down.js downHtml.js 放在同一個(gè)文件夾下 通過(guò)下列 cmd 運(yùn)行
D:/phantomjs-2.0.0-windows/bin/phantomjs.exe down.js http://www.youku.com/
以上所述就是本文的全部?jī)?nèi)容了,希望大家能夠喜歡。
新聞熱點(diǎn)
疑難解答
圖片精選
網(wǎng)友關(guān)注