国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 學(xué)院 > 開發(fā)設(shè)計(jì) > 正文

C#實(shí)現(xiàn)網(wǎng)頁爬蟲

2019-11-14 13:30:18
字體:
供稿:網(wǎng)友

HTTP請求工具類(功能:1、獲取網(wǎng)頁html;2、下載網(wǎng)絡(luò)圖片;):

using System;using System.Collections.Generic;using System.IO;using System.Linq;using System.Net;using System.Text;using System.Threading.Tasks;using System.Windows.Forms;namespace Utils{    /// <summary>    /// HTTP請求工具類    /// </summary>    public class HttPRequestUtil    {        /// <summary>        /// 獲取頁面html        /// </summary>        public static string GetPageHtml(string url)        {            // 設(shè)置參數(shù)            HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;            request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)";            //發(fā)送請求并獲取相應(yīng)回應(yīng)數(shù)據(jù)            HttpWebResponse response = request.GetResponse() as HttpWebResponse;            //直到request.GetResponse()程序才開始向目標(biāo)網(wǎng)頁發(fā)送Post請求            Stream responseStream = response.GetResponseStream();            StreamReader sr = new StreamReader(responseStream, Encoding.UTF8);            //返回結(jié)果網(wǎng)頁(html)代碼            string content = sr.ReadToEnd();            return content;        }        /// <summary>        /// Http下載文件        /// </summary>        public static void HttpDownloadFile(string url)        {            int pos = url.LastIndexOf("/") + 1;            string fileName = url.Substring(pos);            string path = application.StartupPath + "//download";            if (!Directory.Exists(path))            {                Directory.CreateDirectory(path);            }            string filePathName = path + "//" + fileName;            if (File.Exists(filePathName)) return;            // 設(shè)置參數(shù)            HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;            request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)";            request.Proxy = null;            //發(fā)送請求并獲取相應(yīng)回應(yīng)數(shù)據(jù)            HttpWebResponse response = request.GetResponse() as HttpWebResponse;            //直到request.GetResponse()程序才開始向目標(biāo)網(wǎng)頁發(fā)送Post請求            Stream responseStream = response.GetResponseStream();            //創(chuàng)建本地文件寫入流            Stream stream = new FileStream(filePathName, FileMode.Create);            byte[] bArr = new byte[1024];            int size = responseStream.Read(bArr, 0, (int)bArr.Length);            while (size > 0)            {                stream.Write(bArr, 0, size);                size = responseStream.Read(bArr, 0, (int)bArr.Length);            }            stream.Close();            responseStream.Close();        }    }}
View Code

多線程爬取網(wǎng)頁代碼:

using System;using System.Collections.Generic;using System.ComponentModel;using System.Data;using System.Drawing;using System.IO;using System.Linq;using System.Text;using System.Text.RegularExpressions;using System.Threading;using System.Threading.Tasks;using System.Windows.Forms;using Utils;namespace 爬蟲{    public partial class Form1 : Form    {        List<Thread> threadList = new List<Thread>();        Thread thread = null;        public Form1()        {            InitializeComponent();        }        private void button1_Click(object sender, EventArgs e)        {            DateTime dtStart = DateTime.Now;            button3.Enabled = true;            button2.Enabled = true;            button1.Enabled = false;            int page = 0;            int count = 0;            int personCount = 0;            lblPage.Text = "已完成頁數(shù):0";            int index = 0;            for (int i = 1; i <= 10; i++)            {                thread = new Thread(new ParameterizedThreadStart(delegate(object obj)                {                    for (int j = 1; j <= 10; j++)                    {                        try                        {                            index = (Convert.ToInt32(obj) - 1) * 10 + j;                            string pageHtml = HttpRequestUtil.GetPageHtml("http://tt.mop.com/c44/0/1_" + index.ToString() + ".html");                            Regex regA = new Regex("<a[//s]+class=/"J-userPic([^<>]*?)[//s]+href=/"([^/"]*?)/"");                            Regex regImg = new Regex("<p class=/"tc mb10/"><img[//s]+src=/"([^/"]*?)/"");                            MatchCollection mc = regA.Matches(pageHtml);                            foreach (Match match in mc)                            {                                int start = match.ToString().IndexOf("href=/"");                                string url = match.ToString().Substring(start + 6);                                int end = url.IndexOf("/"");                                url = url.Substring(0, end);                                if (url.IndexOf("/") == 0)                                {                                    string imgPageHtml = HttpRequestUtil.GetPageHtml("http://tt.mop.com" + url);                                    personCount++;                                    lblPerson.Invoke(new Action(delegate() { lblPerson.Text = "已完成條數(shù):" + personCount.ToString(); }));                                    MatchCollection mcImgPage = regImg.Matches(imgPageHtml);                                    foreach (Match matchImgPage in mcImgPage)                                    {                                        start = matchImgPage.ToString().IndexOf("src=/"");                                        string imgUrl = matchImgPage.ToString().Substring(start + 5);                                        end = imgUrl.IndexOf("/"");                                        imgUrl = imgUrl.Substring(0, end);                                        if (imgUrl.IndexOf("http://i1") == 0)                                        {                                            try                                            {                                                HttpRequestUtil.HttpDownloadFile(imgUrl);                                                count++;                                                lblNum.Invoke(new Action(delegate()                                                {                                                    lblNum.Text = "已下載圖片數(shù)" + count.ToString();                                                    DateTime dt = DateTime.Now;                                                    double time = dt.Subtract(dtStart).TotalSeconds;                                                    if (time > 0)                                                    {                                                        lblSpeed.Text = "速度:" + (count / time).ToString("0.0") + "張/秒";                                                    }                                                }));                                            }                                            catch { }                                            Thread.Sleep(1);                                        }                                    }                                }                            }                        }                        catch { }                        page++;                        lblPage.Invoke(new Action(delegate() { lblPage.Text = "已完成頁數(shù):" + page.ToString(); }));                        if (page == 100)                        {                            button1.Invoke(new Action(delegate() { button1.Enabled = true; }));                            MessageBox.Show("完成!");                        }                    }                }));                thread.Start(i);                threadList.Add(thread);            }        }        private void button2_Click(object sender, EventArgs e)        {            button1.Invoke(new Action(delegate()            {                foreach (Thread thread in threadList)                {                    if (thread.ThreadState == ThreadState.Suspended)                    {                        thread.Resume();                    }                    thread.Abort();                }                button1.Enabled = true;                button2.Enabled = false;                button3.Enabled = false;                button4.Enabled = false;            }));        }        private void Form1_FormClosing(object sender, FormClosingEventArgs e)        {            foreach (Thread thread in threadList)            {                thread.Abort();            }        }        private void button3_Click(object sender, EventArgs e)        {            foreach (Thread thread in threadList)            {                if (thread.ThreadState == ThreadState.Running)                {                    thread.Suspend();                }            }            button3.Enabled = false;            button4.Enabled = true;        }        private void button4_Click(object sender, EventArgs e)        {            foreach (Thread thread in threadList)            {                if (thread.ThreadState == ThreadState.Suspended)                {                    thread.Resume();                }            }            button3.Enabled = true;            button4.Enabled = false;        }    }}
View Code

截圖:

 


發(fā)表評論 共有條評論
用戶名: 密碼:
驗(yàn)證碼: 匿名發(fā)表
主站蜘蛛池模板: 栖霞市| 广州市| 永登县| 黑河市| 白玉县| 屏山县| 晴隆县| 石嘴山市| 观塘区| 鄂温| 花莲市| 民勤县| 大悟县| 南安市| 米脂县| 安徽省| 珠海市| 三门峡市| 格尔木市| 廊坊市| 瑞安市| 青田县| 漯河市| 红安县| 徐汇区| 曲沃县| 临西县| 彰化县| 西青区| 紫金县| 饶阳县| 崇州市| 厦门市| 合水县| 治多县| 宝鸡市| 拜城县| 华蓥市| 无为县| 左云县| 额敏县|