国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁(yè) > 編程 > Java > 正文

java正則表達(dá)式解析html示例分享

2019-11-26 15:40:01
字體:
來(lái)源:轉(zhuǎn)載
供稿:網(wǎng)友

復(fù)制代碼 代碼如下:

package work;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;

public class chuanboyi {

 public static void main(String[] args){
  // TODO Auto-generated method stub
  StringBuffer html = new StringBuffer();
  HttpClient httpclient = new HttpClient();
  //創(chuàng)建GET方法實(shí)例
  GetMethod getMethod = new GetMethod("http://www.survivalescaperooms.com");
  //使用系統(tǒng)提供的默認(rèn)恢復(fù)策略
  getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());
  try{
   //執(zhí)行GET方法
   int statusCode = httpclient.executeMethod(getMethod);
   if(statusCode != HttpStatus.SC_OK){
    System.out.println("Method is wrong " + getMethod.getStatusLine());
   }
   InputStream responseBody = getMethod.getResponseBodyAsStream();
   BufferedReader reader = new BufferedReader(new InputStreamReader(responseBody,"utf-8"));
   String line = reader.readLine();
   while(line != null){
    html.append(line).append("/n");
    line = reader.readLine();
   }
   reader.close();
   //正則表達(dá)式
   String regex = "<form name=/"compareForm/"[//s//S]+>[//s//S]+</form>.*<script.*>";
   String regexa ="(?<=<li>)[//s//S]+?(?=</li>)";
   Pattern pattern = Pattern.compile(regex);
         Matcher m = pattern.matcher(html);
         StringBuffer str = new StringBuffer();
         int i = 0;
         while(m.find()){
          str.append(m.group());
         }
         pattern = Pattern.compile(regexa);
         m = pattern.matcher(str);
         while(m.find()){
          attrs(m.group());
          i++;
         }
         System.out.println("共有"+i+"條數(shù)據(jù)!");
  }catch (HttpException e) {
   // TODO: handle exception
   System.out.println("Please check your provided http address!");
   e.printStackTrace();
  }catch (IOException e) {
   // TODO: handle exception
   System.out.println("the line is wrong!");
   e.printStackTrace();
  }finally{
   getMethod.releaseConnection();//釋放鏈接
  }
 }
 public static void attrs(String str){

  //獲取url的正則表達(dá)式
  String regexURL = "[a-z]+-[0-9]+//.html";
  //獲取Name的正則表達(dá)式
  String regexName = "(?<=title=/")[[//w-//s][^x00-xff]]+(?=/")";
  //獲取圖片的正則表達(dá)式
  String regexPicture = "images.*//.jpg";

  Pattern patternURL = Pattern.compile(regexURL);
  Pattern patternName = Pattern.compile(regexName);
  Pattern patternPicture = Pattern.compile(regexPicture);
  Matcher mURL = patternURL.matcher(str);
  Matcher mName = patternName.matcher(str);
  Matcher mPicture = patternPicture.matcher(str);
  if(mName.find()){
   System.out.println("名字:"+mName.group());
  }
  if(mURL.find()){
   System.out.println("鏈接:"+mURL.group());
  }
  if(mPicture.find()){
   System.out.println("圖片:"+mPicture.group());
  }
 } 
}

發(fā)表評(píng)論 共有條評(píng)論
用戶(hù)名: 密碼:
驗(yàn)證碼: 匿名發(fā)表
主站蜘蛛池模板: 张家港市| 东城区| 大同市| 绍兴市| 石家庄市| 博乐市| 五原县| 尤溪县| 象山县| 银川市| 阳新县| 普兰店市| 张家口市| 武宁县| 霍林郭勒市| 叶城县| 姜堰市| 安丘市| 克拉玛依市| 锦州市| 义马市| 建始县| 泸州市| 翁源县| 渭源县| 北安市| 泸定县| 清流县| 荃湾区| 榆树市| 渭南市| 寻乌县| 扶绥县| 浑源县| 电白县| 天气| 新竹县| 清原| 伊宁市| 仲巴县| 洪湖市|