Lucene第一個例子

2019-11-08 02:00:21

字體：大中小

來源：轉載

供稿：網友

引入jar包

public class FirstApp {	/**	 * 創建索引庫	 * 將Aritcle對象放入索引庫中的原始記錄表中，從而形成詞匯表	 */	@Test	public void createIndexDB() throws Exception{		//創建Article對象		Article article = new Article(3,"培訓","傳智是一家IT培訓機構");		//創建Document對象		Document document = new Document();		//將Article對象中的三個屬性值分別綁定到Document對象中		/*		 *參數一：document對象中的屬性名叫xid，article對象中的屬性名叫id，項目中提倡相同		 *參數二：document對象中的屬性xid的值，與article對象中相同		 *參數三：是否將xid屬性值存入由原始記錄表中轉存入詞匯表		 *       Store.YES表示該屬性值會存入詞匯表		 *       Store.NO表示該屬性值不會存入詞匯表		 *       項目中提倡非id值都存入詞匯表		 *參數四：是否將xid屬性值進行分詞算法		 *       Index.ANALYZED表示該屬性值會進行詞匯拆分		 *       Index.NOT_ANALYZED表示該屬性值不會進行詞匯拆分		 *       項目中提倡非id值都進行詞匯拆分		 *       目前將分詞理解為分匯拆分，目前認為一個漢字一個分詞拆分 		 */		document.add(new Field("xid",article.getId().toString(),Store.YES,Index.ANALYZED));		document.add(new Field("xtitle",article.getTitle(),Store.YES,Index.ANALYZED));		document.add(new Field("xcontent",article.getContent(),Store.YES,Index.ANALYZED));		//創建IndexWriter字符流對象		/*		 * 參數一：lucene索引庫最終應對于硬盤中的目錄，例如：c:/IndexDBDBDB		 * 參數二：采用什么策略將文本拆分，一個策略就是一個具體的實現類		 * 參數三：最多將文本拆分出多少詞匯，LIMITED表示1W個，即只取前1W個詞匯，如果不足1W個詞匯個，以實際為準		 */		Directory directory =  FSDirectory.open(new File("c:/itcast/IndexDBDBDB"));		Version version = Version.LUCENE_30;		Analyzer analyzer = new StandardAnalyzer(version);		MaxFieldLength maxFieldLength = MaxFieldLength.LIMITED;		IndexWriter indexWriter = new IndexWriter(directory,analyzer,maxFieldLength);		//將document對象寫入lucene索引庫		indexWriter.addDocument(document);		//關閉IndexWriter字符流對象		indexWriter.close();	}					/**	 * 根據關鍵字從索引庫中搜索符合條件的內容	 */	@Test	public void findIndexDB() throws Exception{		//準備工作		String keyWords = "培訓";		List<Article> articleList = new ArrayList<Article>();		Directory directory =  FSDirectory.open(new File("c:/itcast/IndexDBDBDB"));		Version version = Version.LUCENE_30;		Analyzer analyzer = new StandardAnalyzer(version);		MaxFieldLength maxFieldLength = MaxFieldLength.LIMITED;								//創建IndexSearcher字符流對象		IndexSearcher indexSearcher = new IndexSearcher(directory);		//創建查詢解析器對象		/*		 * 參數一：使用分詞器的版本，提倡使用該jar包中的最高版本		 * 參數二：爭對document對象中的哪個屬性進行搜索		 */		QueryParser queryParser = new QueryParser(version,"xcontent",analyzer);		//創建對象對象封裝查詢關鍵字		Query query = queryParser.parse(keywords);		//根據關鍵字，去索引庫中的詞匯表搜索		/*		 * 參數一：表示封裝關鍵字查詢對象，其它QueryParser表示查詢解析器		 * 參數二：MAX_RECORD表示如果根據關鍵字搜索出來的內容較多，只取前MAX_RECORD個內容		 *        不足MAX_RECORD個數的話，以實際為準		 */		int MAX_RECORD = 100;		TopDocs topDocs = indexSearcher.search(query,MAX_RECORD);		//迭代詞匯表中符合條件的編號 		for(int i=0;i<topDocs.scoreDocs.length;i++){			//取出封裝編號和分數的ScoreDoc對象			ScoreDoc scoreDoc = topDocs.scoreDocs[i];			//取出每一個編號，例如:0,1,2			int no = scoreDoc.doc;			//根據編號去索引庫中的原始記錄表中查詢對應的document對象			Document document = indexSearcher.doc(no);			//獲取document對象中的三個屬性值			String xid = document.get("xid");			String xtitle = document.get("xtitle");			String xcontent = document.get("xcontent");			//封裝到artilce對象中			Article article = new Article(Integer.parseInt(xid),xtitle,xcontent);			//將article對象加入到list集合中			articleList.add(article);		}		//迭代結果集		for(Article a:articleList){			System.out.PRintln(a);		}	}	}

上一篇：Struts2標簽嵌套以及遍歷雙層list

下一篇：Arduino簡單實例之二_光敏傳感器