国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁(yè) > 編程 > JavaScript > 正文

nodejs通過(guò)phantomjs實(shí)現(xiàn)下載網(wǎng)頁(yè)

2019-11-20 12:34:01
字體:
來(lái)源:轉(zhuǎn)載
供稿:網(wǎng)友

功能其實(shí)很見(jiàn)簡(jiǎn)單,通過(guò) phantomjs.exe 采集 url 加載的資源,通過(guò)子進(jìn)程的方式,啟動(dòng)nodejs 加載所有的資源,對(duì)于css的資源,匹配css內(nèi)容,下載里面的url資源

當(dāng)然功能還是很簡(jiǎn)單的,在響應(yīng)式設(shè)計(jì)和異步加載的情況下,還是有很多資源沒(méi)有能夠下載,需要根據(jù)實(shí)際情況處理下

 首先當(dāng)然是下載 nodejs 和 phantomjs

下面是 phantomjs.exe 執(zhí)行的 down.js

var page = require('webpage').create(),  system = require('system');var spawn = require("child_process").spawnif (system.args.length === 1) {  console.log('Usage: netsniff.js <some URL>');  phantom.exit(1);} else {  var urls = [];  page.address = system.args[1];  page.onResourceReceived = function (res) {    if (res.stage === 'start') {      urls.push(res.url);    }  };  page.open(page.address, function (status) {    var har;    if (status !== 'success') {      console.log('FAIL to load the address');      phantom.exit(1);    } else {      console.log('down resource ' + urls.length + ' urls.');      var child = spawn("node", ["--harmony", "downHtml.js", urls.join(',')])      child.stdout.on("data", function (data) {       console.log(data);      })      child.stderr.on("data", function (data) {       console.log(data);      })      child.on("exit", function (code) {       phantom.exit();      })          }  });}

下面是對(duì)應(yīng)的node運(yùn)行的 downHtml.js

"use strict";var fs = require('fs');var http = require('http');var path = require('path');var r_url = require('url');var dirCache = {};//緩存減少判斷function makedir (pathStr, callback) {  if (dirCache[pathStr] == 1) {    callback();  } else {    fs.exists(pathStr, function (exists) {      if (exists == true) {        dirCache[pathStr] == 1;        callback();      } else {        makedir(path.dirname(pathStr), function () {          fs.mkdir(pathStr, function () {            dirCache[pathStr] == 1;            callback();          })        });      }    })  }};var reg = /[:,]/s*url/(['"]?.*?(/1)/)/gvar reg2 = //((['"]?)(.*?)(/1)/)/var isDownMap = {};var downImgFromCss = function (URL) {  http.get(URL, function(res) {    //console.log(path.resolve(process.cwd(), 'index.min.css'))    //res.pipe(fs.createWriteStream(path.resolve(process.cwd(), 'index.min.css')));    var body = "";    res.setEncoding('utf8');    res.on('data', function (chunk) {      body += chunk;    });    res.on('end', function () {      var match = body.match(reg);      for (var i = 0, len = match.length; i < len; i++){        var m = match[i].match(reg2);        if (m && m[2]) {          var url = m[2];          let imgUrl = r_url.resolve(URL, url);          if (!isDownMap[imgUrl]) {            var uo = r_url.parse(imgUrl);            let filepath = CWD + '/' + uo.hostname + uo.pathname;            makedir(path.dirname(filepath), function () {              http.get(imgUrl, function (res) {                res.pipe(fs.createWriteStream(filepath));              })            })            isDownMap[imgUrl] = 1;          }        }      }    });  });}var URLS = process.argv[2].split(',');var CWD = process.cwd();//下載資源URLS.forEach(function (URL) {  var uo = r_url.parse(URL);  var filepath;  if (uo.pathname == '/' || uo.pathname == '') {    filepath = CWD + '/' + uo.hostname + '/index.html';  } else {    filepath = CWD + '/' + uo.hostname + uo.pathname;  }  makedir(path.dirname(filepath), function () {    http.get(URL, function (res) {      if (URL.indexOf('.css') != -1 || (res.headers["content-type"] && res.headers["content-type"].indexOf('text/css')!= -1)) {        console.log('down images form css file:' + URL + '.');        downImgFromCss(URL);      }      res.pipe(fs.createWriteStream(filepath));    })  });});

down.js downHtml.js 放在同一個(gè)文件夾下 通過(guò)下列 cmd 運(yùn)行

D:/phantomjs-2.0.0-windows/bin/phantomjs.exe down.js http://www.youku.com/

以上所述就是本文的全部?jī)?nèi)容了,希望大家能夠喜歡。

發(fā)表評(píng)論 共有條評(píng)論
用戶名: 密碼:
驗(yàn)證碼: 匿名發(fā)表
主站蜘蛛池模板: 通许县| 长沙市| 威远县| 海晏县| 三门峡市| 赞皇县| 崇州市| 侯马市| 泰宁县| 工布江达县| 栾川县| 扶余县| 高平市| 门头沟区| 新巴尔虎左旗| 弋阳县| 盘山县| 汉阴县| 临潭县| 微山县| 图片| 平武县| 荆门市| 成武县| 金平| 松阳县| 保靖县| 景泰县| 迭部县| 武强县| 江西省| 福鼎市| 靖安县| 胶州市| 介休市| 康定县| 泰来县| 镇雄县| 东光县| 崇义县| 安福县|