
1 using System; 2 using System.Collections.Generic; 3 using System.Linq; 4 using System.Text; 5 using System.Net; 6 using System.IO; 7 using System.Collections.Specialized; 8 using System.Web; 9 10 namespace Common.Helpers 11 { 12 /// <summary> 13 /// 網絡訪問輔助類 14 /// </summary> 15 public class HttpWebClient : WebClient 16 { 17 #region 公共屬性 18 /// <summary> 19 /// 瀏覽器用戶標識,默認采用Chrome的標識 20 /// </summary> 21 public string UserAgent { get; set; } 22 /// <summary> 23 /// Cookie容器 24 /// </summary> 25 public CookieContainer CookieContainer { get; set; } 26 /// <summary> 27 /// 如果 POST 請求需要 100-Continue 響應,則為 true;否則為 false。 28 /// </summary> 29 public bool Expect100Continue { get; set; } 30 31 PRivate WebResponse m_LastWebResponse = null; 32 /// <summary> 33 /// 最后一次的響應對象 34 /// </summary> 35 public WebResponse LastWebResponse { get { return this.m_LastWebResponse; } } 36 37 private int m_Timeout = 120000; 38 /// <summary> 39 /// 超時時間,默認120000毫秒(120秒) 40 /// </summary> 41 public int Timeout 42 { 43 get { return m_Timeout; } 44 set { m_Timeout = value; } 45 } 46 47 private HttpWebClientSetting m_HttpWebClientSetting = null; 48 /// <summary> 49 /// WebClient設置項,該屬性始終不會為null 50 /// </summary> 51 public HttpWebClientSetting HttpWebClientSetting 52 { 53 get 54 { 55 if (m_HttpWebClientSetting == null) 56 { 57 m_HttpWebClientSetting = new HttpWebClientSetting(); 58 } 59 return m_HttpWebClientSetting; 60 } 61 set 62 { 63 m_HttpWebClientSetting = value ?? new HttpWebClientSetting(); 64 } 65 } 66 67 68 /// <summary> 69 /// 預處理Web請求對象的委托方法(會在每次獲取WebRequest對象后調用),默認值為null 70 /// </summary> 71 public Action<HttpWebRequest> PrepareProcessWebRequest { get; set; } 72 #endregion 73 74 #region 構造方法 75 public HttpWebClient() 76 : this(new CookieContainer()) 77 { 78 } 79 80 public HttpWebClient(CookieContainer cookieContainer) 81 { 82 this.CookieContainer = cookieContainer; 83 this.UserAgent = UserAgentValues.Firefox; 84 this.Expect100Continue = false; 85 } 86 #endregion 87 88 #region 重寫方法,增加對CookieContainer的支持 89 protected override WebRequest GetWebRequest(Uri address) 90 { 91 if (!string.IsNullOrEmpty(this.UserAgent)) 92 { 93 this.Headers.Add(HttpRequestHeader.UserAgent, this.UserAgent); 94 } 95 96 WebRequest request = base.GetWebRequest(address); 97 request.Timeout = this.Timeout; 98 99 if (request is HttpWebRequest)100 {101 HttpWebRequest httpRequest = request as HttpWebRequest;102 httpRequest.CookieContainer = this.CookieContainer;103 httpRequest.ServicePoint.Expect100Continue = this.Expect100Continue; // 取消100-continue104 105 //讀取自定義設置項106 if (this.HttpWebClientSetting != null)107 {108 httpRequest.AllowAutoRedirect = this.HttpWebClientSetting.AllowAutoRedirect;109 }110 111 //使用外部委托屬性處理Request對象112 if (this.PrepareProcessWebRequest != null)113 {114 this.PrepareProcessWebRequest(httpRequest);115 }116 }117 118 return request;119 }120 #endregion121 122 #region 重寫方法,增加對響應對象的訪問123 protected override WebResponse GetWebResponse(WebRequest request)124 {125 WebResponse response = base.GetWebResponse(request);126 this.m_LastWebResponse = response;127 return response;128 }129 #endregion130 131 #region (public) 向一個URL用POST提交數據,并返回其響應內容 PostData132 /// <summary>133 /// 向一個URL用POST提交數據,并返回其響應內容134 /// ZhangQingFeng 2014-12-14 Add135 /// EditLog:136 /// ZhangQingFeng 2015-05-12 Edit 因WebClient的UpdateValues方法中固定為UTF-8格式進行UrlEncode,因此此處需用UploadString方式來間接實現 --見微軟WebClient類源碼UploadValuesInternal方法中137 /// </summary>138 /// <param name="url">請求的URL</param>139 /// <param name="data">要提交的數據</param>140 /// <param name="encoding">請求所使用的編碼</param>141 /// <param name="responseEncoding">響應內容所使用的編碼,為null時使用請求的編碼</param>142 /// <returns>響應的內容</returns>143 public string PostData(string url, NameValueCollection data, Encoding encoding, Encoding responseEncoding)144 {145 WebClient client = this;146 147 /*148 client.Encoding = encoding ?? Encoding.UTF8;149 150 byte[] response = client.UploadValues(url, "POST", data ?? new NameValueCollection());151 152 string html = string.Empty;153 154 if (responseEncoding == null)155 {156 html = client.Encoding.GetString(response);157 }158 else159 {160 html = responseEncoding.GetString(response);161 }162 */163 164 client.Encoding = encoding ?? Encoding.UTF8;165 client.Headers.Add(HttpRequestHeader.ContentType, "application/x-www-form-urlencoded");166 167 string delimiter = String.Empty;168 StringBuilder values = new StringBuilder();169 foreach (string name in data.AllKeys)170 {171 values.Append(delimiter);172 values.Append(HttpUtility.UrlEncode(name, encoding));173 values.Append("=");174 values.Append(HttpUtility.UrlEncode(data[name], encoding));175 delimiter = "&";176 }177 178 byte[] arrData = client.UploadData(url, "POST", Encoding.ASCII.GetBytes(values.ToString()));179 string html = (responseEncoding ?? client.Encoding).GetString(arrData);180 181 return html;182 }183 184 /// <summary>185 /// 向一個URL用POST提交數據,并返回其響應內容186 /// ZhangQingFeng 2014-12-14 Add187 /// </summary>188 /// <param name="url">請求的URL</param>189 /// <param name="data">要提交的數據</param>190 /// <param name="encoding">請求和響應所使用的編碼</param>191 /// <returns>響應的內容</returns>192 public string PostData(string url, NameValueCollection data, Encoding encoding)193 {194 return PostData(url, data, encoding, null);195 }196 197 /// <summary>198 /// 向一個URL用POST提交數據,并返回其響應內容(使用this.Encoding來作請求編碼和響應編碼)199 /// ZhangQingFeng 2014-12-14 Add200 /// </summary>201 /// <param name="url">請求的URL</param>202 /// <param name="data">要提交的數據</param>203 /// <returns>響應的內容</returns>204 public string PostData(string url, NameValueCollection data)205 {206 return PostData(url, data, this.Encoding);207 }208 #endregion209 210 #region (public) 向一個URL用POST提交數據,并返回其響應內容 PostData211 /// <summary>212 /// 向一個URL用POST提交數據,并返回其響應內容213 /// ZhangQingFeng 2014-12-14 Add214 /// </summary>215 /// <param name="url">請求的URL</param>216 /// <param name="data">要提交的數據</param>217 /// <param name="encoding">請求和響應內容所使用的編碼</param>218 /// <returns>響應的內容</returns>219 public string PostData(string url, Dictionary<string, string> data, Encoding encoding, Encoding responseEncoding)220 {221 NameValueCollection postData = new NameValueCollection();222 if (data != null)223 {224 foreach (var item in data)225 {226 postData.Add(item.Key, item.Value);227 }228 }229 return PostData(url, postData, encoding, responseEncoding);230 }231 232 233 /// <summary>234 /// 向一個URL用POST提交數據,并返回其響應內容235 /// ZhangQingFeng 2014-12-14 Add236 /// </summary>237 /// <param name="url">請求的URL</param>238 /// <param name="data">要提交的數據</param>239 /// <param name="encoding">請求和響應所使用的編碼</param>240 /// <returns>響應的內容</returns>241 public string PostData(string url, Dictionary<string, string> data, Encoding encoding)242 {243 return PostData(url, data, encoding, null);244 }245 246 /// <summary>247 /// 向一個URL用POST提交數據,并返回其響應內容(使用this.Encoding來作請求編碼和響應編碼)248 /// ZhangQingFeng 2014-12-14 Add249 /// </summary>250 /// <param name="url">請求的URL</param>251 /// <param name="data">要提交的數據</param>252 /// <returns>響應的內容</returns>253 public string PostData(string url, Dictionary<string, string> data)254 {255 return PostData(url, data, this.Encoding);256 }257 #endregion258 259 #region 輔助類260 /// <summary>261 /// 瀏覽器用戶標識類262 /// </summary>263 public class UserAgentValues264 {265 public static readonly string FireFox = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0";266 public static readonly string Chrome = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36";267 public static readonly string IE8 = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2;)";268 }269 #endregion270 }271 272 /// <summary>273 /// HttpWebClient對象設置類274 /// </summary>275 public class HttpWebClientSetting276 {277 private bool m_AllowAutoRedirect = true;278 /// <summary>279 /// 當響應內容為重定向時客戶端是否自動重定向(如果該屬性為true,則取到的響應則為重定向后的內容,否則則為響應原文),默認值為true280 /// </summary>281 public bool AllowAutoRedirect282 {283 get { return m_AllowAutoRedirect; }284 set { m_AllowAutoRedirect = value; }285 }286 }287 }
在做頁面抓取的過程中,發現自帶的WebClient不夠靈活,因此做了一個實現。
關于在PostData方法中不使用UploadValues()方法的原因:
1.查看微軟的源代碼實現時發現,無論設置請求時的Encoding是否為GB2312,在使用WebClient的UploadValues()上傳內容時,其內在都是使用UTF-8編碼進行UrlEncode,因此傳到服務端中的數據中若包含有中文時則一定會亂碼,因此重寫PostData以規避此問題。
關于HttpWebClientSetting中的AllowAutoRedirect屬性:
在WebClient發起請求時,若響應內容為重定向,則WebClient會自動做重定向,因此該類提供此設置項以控制在訪問時是否自動做重定向(第二次訪問Refer后的網站時會將請求中的Refer頭置空,將該AllowAutoRedirect設置為false,然后手動從Response.Header中取出Location對象地址,設置Refer后再訪問,則可真實模擬瀏覽器訪問,從而避開一些網站的防抓取設置)
關于HttpWebClient中的LastWebResponse屬性:
當存在多次重定向時,系統記錄了最后一次返回的內容,從此內容的Header中取出ResponseUri,則可以取到最后返回響應的頁面真實地址,從而為下一次的設置請求Refer頭作準備。
大約就是如此,后期如有Bug會繼續更新。
新聞熱點
疑難解答