国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 編程 > Java > 正文

java 字符串詞頻統(tǒng)計實例代碼

2019-11-26 16:12:55
字體:
供稿:網(wǎng)友

復(fù)制代碼 代碼如下:

package com.gpdi.action;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class WordsStatistics {

    class Obj {
        int count ;
        Obj(int count){
            this.count = count;
        }
    }

    public List<WordCount> statistics(String word) {
        List<WordCount> rs = new ArrayList<WordCount>();
        Map <String,Obj> map = new HashMap<String,Obj>();

        if(word == null ) {
            return null;
        }
        word = word.toLowerCase();
        word = word.replaceAll("'s", "");
        word = word.replaceAll(",", "");
        word = word.replaceAll("-", "");
        word = word.replaceAll("http://.", "");
        word = word.replaceAll("'", "");
        word = word.replaceAll(":", "");
        word = word.replaceAll("!", "");
        word = word.replaceAll("/n", "");

        String [] wordArray = word.split(" ");
        for(String simpleWord : wordArray) {
            simpleWord = simpleWord.trim(); 
            if (simpleWord != null && !simpleWord.equalsIgnoreCase("")) {
                Obj cnt = map.get(simpleWord);
                if ( cnt!= null ) {
                    cnt.count++;
                }else {
                    map.put(simpleWord, new Obj(1));
                }
            }
        }

        for(String key : map.keySet()) {
            WordCount wd = new WordCount(key,map.get(key).count);
            rs.add(wd);
        }

        Collections.sort(rs, new java.util.Comparator<WordCount>(){
            @Override
            public int compare(WordCount o1, WordCount o2) {
                int result = 0 ;
                if (o1.getCount() > o2.getCount() ) {
                    result = -1;
                }else if (o1.getCount() < o2.getCount()) {
                    result = 1;
                }else {
                    int strRs = o1.getWord().compareToIgnoreCase(o2.getWord());
                    if ( strRs > 0 ) {
                        result = 1;
                    }else {
                        result = -1 ;
                    }
                }
                return result;
            }

        });
        return rs;
    }

     
    public static void main(String args[]) {
        String word = "Pinterest is might be aa ab aa ab marketer's dream  - ths site is largely used to curate products " ;
        WordsStatistics s = new WordsStatistics();
        List<WordCount> rs = s.statistics(word);
        for(WordCount word1 : rs) {
            System.out.println(word1.getWord()+"*"+word1.getCount());
        }
    }

}

發(fā)表評論 共有條評論
用戶名: 密碼:
驗證碼: 匿名發(fā)表
主站蜘蛛池模板: 汉寿县| 甘孜| 雷山县| 垣曲县| 莱西市| 定结县| 中卫市| 湖口县| 松原市| 瑞昌市| 天水市| 英德市| 馆陶县| 岢岚县| 普陀区| 永吉县| 溆浦县| 灵川县| 安陆市| 汉川市| 怀来县| 休宁县| 丹阳市| 永泰县| 陇南市| 延边| 开远市| 舟山市| 礼泉县| 青冈县| 申扎县| 尤溪县| 高安市| 卢龙县| 阆中市| 仁布县| 黑河市| 巴彦县| 永宁县| 郧西县| 调兵山市|