国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 編程 > C++ > 正文

linux c++模擬簡易網絡爬蟲實例

2020-01-26 14:06:19
字體:
來源:轉載
供稿:網友

廢話不多說,直接上代碼

/** To change this license header, choose License Headers in Project Properties.* To change this template file, choose Tools | Templates* and open the template in the editor.*//* * File: main.cpp* Author: yangchao**/#include <iostream>#include <string>#include <netdb.h>#include <string.h>#include <stdlib.h>using namespace std;void parseHostAndPagePath(const string url,string &hostUrl,string &pagePath){hostUrl=url;pagePath="/";int pos=hostUrl.find("http://");if(-1!=pos)hostUrl=hostUrl.replace(pos,7,"");pos=hostUrl.find("https://");if(-1!=pos)hostUrl=hostUrl.replace(pos,8,"");pos=hostUrl.find("/");if(-1!=pos){pagePath=hostUrl.substr(pos);hostUrl=hostUrl.substr(0,pos);}}string getPageContent(const string url){struct hostent *host;string hostUrl,pagePath;parseHostAndPagePath(url,hostUrl,pagePath);if(0==(host=gethostbyname(hostUrl.c_str()))){cout<<"gethostbyname error/n"<<endl;exit(1);}struct sockaddr_in pin;int port=80;bzero(&pin,sizeof(pin));pin.sin_family=AF_INET;pin.sin_port=htons(port);pin.sin_addr.s_addr=((struct in_addr*)(host->h_addr))->s_addr;int isock;if((isock=socket(AF_INET,SOCK_STREAM,0))==-1){cout<<"open socket error/n"<<endl;exit(1);}string requestHeader;requestHeader="GET "+pagePath+" HTTP/1.1/r/n";requestHeader+="Host: "+hostUrl+"/r/n";requestHeader+="Accept: */*/r/n";requestHeader+="User-Agent: Mozilla/4.0(compatible)/r/n";requestHeader+="connection:Keep-Alive/r/n";requestHeader+="/r/n";if(connect(isock,(const sockaddr*)&pin,sizeof(pin))==-1){cout<<"connect error/n"<<endl;exit(1);}if(send(isock,requestHeader.c_str(),requestHeader.size(),0)==-1){cout<<"send error/n"<<endl;exit(1);}struct timeval timeout={1,0};setsockopt(isock,SOL_SOCKET,SO_RCVTIMEO,(char*)&timeout,sizeof(struct timeval));char c;bool flag=true;while(recv(isock,&c,1,0)>0){if('/r'==c){continue;}else if('/n'==c){if(false==flag)break;flag=false;}else{flag=true;}}int len,BUFFER_SIZE=512;char buffer[BUFFER_SIZE];string pageContent="";while((len=recv(isock,buffer,BUFFER_SIZE-1,0))>0){buffer[len]='/0';pageContent+=buffer;}return pageContent;}int main(int argc, char** argv) {cout<<getPageContent("http://www.hao123.com")<<endl;return 0;}

以上這篇linux c++模擬簡易網絡爬蟲實例就是小編分享給大家的全部內容了,希望能給大家一個參考,也希望大家多多支持武林網。

發表評論 共有條評論
用戶名: 密碼:
驗證碼: 匿名發表
主站蜘蛛池模板: 临清市| 太仆寺旗| 荔浦县| 凉山| 镇江市| 常熟市| 正阳县| 淮阳县| 高安市| 镇雄县| 绍兴县| 蓝山县| 察隅县| 巩留县| 鹤庆县| 宣汉县| 墨竹工卡县| 尉犁县| 临夏县| 和林格尔县| 海盐县| 额敏县| 柘荣县| 延边| 扶沟县| 昭觉县| 绵阳市| 通化县| 白城市| 西华县| 惠安县| 南康市| 县级市| 稻城县| 肥东县| 河东区| 大英县| 西林县| 准格尔旗| 广安市| 井陉县|