這里的代碼屬于mahout in action中的內(nèi)容,只是我不了解其中的注解,所以就特意想學(xué)習(xí)一下相關(guān)的內(nèi)容。
package mia.recommender.ch02;//=分析導(dǎo)入包可以看出mahout的包分為主要類以及它們的實(shí)現(xiàn)類=import org.apache.mahout.cf.taste.impl.model.file.*;import org.apache.mahout.cf.taste.impl.neighborhood.*;import org.apache.mahout.cf.taste.impl.recommender.*;import org.apache.mahout.cf.taste.impl.similarity.*;import org.apache.mahout.cf.taste.model.*;import org.apache.mahout.cf.taste.neighborhood.*;import org.apache.mahout.cf.taste.recommender.*;import org.apache.mahout.cf.taste.similarity.*;import java.io.*;import java.util.*;class RecommenderIntro { public static void main(String[] args) throws Exception { //=如何從csv的文件中構(gòu)建mahout的數(shù)據(jù)表示,DataModel就是來表示<user,item,rating>的知識(shí)的= DataModel model = new FileDataModel(new File("intro.csv"));//=user-based的第一步就是找相似用戶,所以要定義用戶的相似性,包括用什么相似性度量,以及鄰居的參數(shù)= UserSimilarity similarity = new PearsonCorrelationSimilarity(model); UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model);//=一旦確定了相鄰用戶,那么一個(gè)普通的user-based推薦器就可以被構(gòu)建起來了= Recommender recommender = new GenericUserBasedRecommender( model, neighborhood, similarity);//=我們可以來使用它,這里是向用戶1推薦1個(gè)商品= List<RecommendedItem> recommendations = recommender.recommend(1, 1);//=推薦的結(jié)果可以輸出,這里是:RecommendedItem[item:104, value:4.257081]= for (RecommendedItem recommendation : recommendations) { System.out.PRintln(recommendation); } }}復(fù)制代碼復(fù)制代碼二, Evaluation復(fù)制代碼復(fù)制代碼package mia.recommender.ch02;import org.apache.mahout.cf.taste.impl.model.file.*;import org.apache.mahout.cf.taste.impl.neighborhood.*;import org.apache.mahout.cf.taste.impl.recommender.*;import org.apache.mahout.cf.taste.impl.similarity.*;import org.apache.mahout.cf.taste.model.*;import org.apache.mahout.cf.taste.neighborhood.*;import org.apache.mahout.cf.taste.recommender.*;import org.apache.mahout.cf.taste.similarity.*;import java.io.*;import java.util.*;import org.apache.mahout.cf.taste.common.TasteException;import org.apache.mahout.cf.taste.eval.RecommenderBuilder;import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;import org.apache.mahout.common.RandomUtils;/** * * @author wentingtu <wentingtu09 at Gmail dot com> */public class RecommenderEvalu{ public static void main(String[] args) throws IOException, TasteException { //=導(dǎo)入org.apache.mahout.common.RandomUtils;= //這個(gè)是產(chǎn)生唯一的種子使得在劃分訓(xùn)練和測(cè)試數(shù)據(jù)的時(shí)候具有唯一性= RandomUtils.useTestSeed(); DataModel model = new FileDataModel(new File("intro.csv")); //構(gòu)建評(píng)估器,這里用到的性能度量是每個(gè)sum( |預(yù)測(cè)值 - 真實(shí)值| ) / 值的個(gè)數(shù) RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator(); //=導(dǎo)入 org.apache.mahout.cf.taste.eval.RecommenderBuilder;= //這里要涉及用到了一個(gè)定義推薦器構(gòu)造方法的類:RecommenderBuilder RecommenderBuilder builder = new RecommenderBuilder() { //使用方法是重載buildRecommender函數(shù),函數(shù)里是構(gòu)造推薦器的方法 @Override public Recommender buildRecommender(DataModel model) throws TasteException { UserSimilarity similarity = new PearsonCorrelationSimilarity(model); UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model); return new GenericUserBasedRecommender(model, neighborhood, similarity); } }; //=導(dǎo)入 org.apache.mahout.cf.taste.eval.RecommenderEvaluator;= //調(diào)用評(píng)估器,輸入有上面構(gòu)造的推薦器方法,數(shù)據(jù)模型,訓(xùn)練/全部 比例,驗(yàn)證數(shù)據(jù)/數(shù)據(jù) 比例 double score = evaluator.evaluate(builder, null, model, 0.7, 1.0); //輸出評(píng)價(jià)結(jié)果:1.0 證明最后的估計(jì)結(jié)果是 AverageAbsoluteDifference = 1.0 System.out.println(score); }}復(fù)制代碼復(fù)制代碼復(fù)制代碼復(fù)制代碼package mia.recommender.ch02;import org.apache.mahout.cf.taste.impl.model.file.*;import org.apache.mahout.cf.taste.impl.neighborhood.*;import org.apache.mahout.cf.taste.impl.recommender.*;import org.apache.mahout.cf.taste.impl.similarity.*;import org.apache.mahout.cf.taste.model.*;import org.apache.mahout.cf.taste.neighborhood.*;import org.apache.mahout.cf.taste.recommender.*;import org.apache.mahout.cf.taste.similarity.*;import java.io.*;import org.apache.mahout.cf.taste.common.TasteException;import org.apache.mahout.cf.taste.eval.IRStatistics;import org.apache.mahout.cf.taste.eval.RecommenderBuilder;import org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator;import org.apache.mahout.cf.taste.impl.eval.GenericRecommenderIRStatsEvaluator;import org.apache.mahout.common.RandomUtils;/** * * @author Administrator */public class RecommenderEvaluPrecisionRecall { public static void main(String[] args) throws IOException, TasteException { RandomUtils.useTestSeed(); DataModel model = new FileDataModel(new File("intro.csv")); //=導(dǎo)入org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator;= //構(gòu)建評(píng)估器 RecommenderIRStatsEvaluator evaluator = new GenericRecommenderIRStatsEvaluator(); RecommenderBuilder recommenderBuilder = new RecommenderBuilder() { @Override public Recommender buildRecommender(DataModel model) throws TasteException { UserSimilarity similarity = new PearsonCorrelationSimilarity(model); UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model); return new GenericUserBasedRecommender(model, neighborhood, similarity); } }; //使用評(píng)估器,并設(shè)定評(píng)估期的參數(shù) //2表示"precision and recall at 2"即相當(dāng)于推薦top2,然后在top-2的推薦上計(jì)算準(zhǔn)確率和召回率 //既然涉及到準(zhǔn)確率和召回率,這里就有一個(gè)"hit"的定義,就是怎樣的一個(gè)推薦算是good //下面的參數(shù)設(shè)置是這樣定義"good"的:利用閾值threshold = μ + σ //即 user's average preference value μ plus one standard deviation σ //如果一個(gè)推薦,它的真實(shí)分值是高于threshold的,那么它就是"good" IRStatistics stats = evaluator.evaluate( recommenderBuilder, null, model, null, 2, GenericRecommenderIRStatsEvaluator.CHOOSE_THRESHOLD, 1.0); //輸出為0.75 1.0 System.out.println(stats.getPrecision()); System.out.println(stats.getRecall()); }}復(fù)制代碼復(fù)制代碼三,Set preference復(fù)制代碼復(fù)制代碼package mia.recommender.ch03;import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;import org.apache.mahout.cf.taste.model.Preference;import org.apache.mahout.cf.taste.model.PreferenceArray;/** * * @author Administrator */public class SetPrefinPreferenceArray { /** * @param args the command line arguments */ public static void main(String[] args) { PreferenceArray user1Prefs = new GenericUserPreferenceArray(2); user1Prefs.setUserID(0, 1L); user1Prefs.setItemID(0, 101L); user1Prefs.setValue(0, 2.0f); user1Prefs.setItemID(1, 102L); user1Prefs.setValue(1, 3.0f); Preference pref = user1Prefs.get(1); }}復(fù)制代碼復(fù)制代碼四,User-based CF復(fù)制代碼復(fù)制代碼package mia.recommender.ch05;import java.io.File;import java.io.IOException;import org.apache.mahout.cf.taste.common.TasteException;import org.apache.mahout.cf.taste.eval.RecommenderBuilder;import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;import org.apache.mahout.cf.taste.model.DataModel;import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;import org.apache.mahout.cf.taste.recommender.Recommender;import org.apache.mahout.cf.taste.similarity.UserSimilarity;import org.apache.mahout.common.RandomUtils;/** * * @author Administrator */public class UserBasedCF { public static void recommenderModelEvaluation(DataModel model) throws TasteException { RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator(); RandomUtils.useTestSeed(); RecommenderBuilder builder = new RecommenderBuilder() { //=============實(shí)驗(yàn)參數(shù)設(shè)置=============== //1.K近鄰 or 閾值近鄰 //近鄰:K? //閾值近鄰:threshold? //2.相似度量:Euclidean , Pearson , Log-likelihood , Tanimoto char similarityPattern = 'E';//'E' or 'P' or 'L' or 'T' char neighborhoodPattern = 'K';//'K' or 'T' int k = 2; double threshold = 0.5; @Override public Recommender buildRecommender(DataModel dm) throws TasteException { UserSimilarity similarity = null; UserNeighborhood neighborhood = null; switch (similarityPattern) { case 'E': { similarity = new EuclideanDistanceSimilarity(dm); } case 'P': { similarity = new PearsonCorrelationSimilarity(dm); } case 'L': { similarity = new LogLikelihoodSimilarity(dm); } case 'T': { similarity = new TanimotoCoefficientSimilarity(dm); } } switch (neighborhoodPattern) { case 'K': { neighborhood = new NearestNUserNeighborhood(k, similarity, dm); } case 'T': { neighborhood = new ThresholdUserNeighborhood(threshold, similarity, dm); } } return new GenericUserBasedRecommender(dm, neighborhood, similarity); } }; double score = evaluator.evaluate(builder, null, model, 0.7, 1.0); System.out.println(score); } /** * @param args the command line arguments */ public static void main(String[] args) throws IOException, TasteException { DataModel model = new FileDataModel(new File("data/dating/ratings.dat")); recommenderModelEvaluation(model); }}復(fù)制代碼復(fù)制代碼五,Item-based CF復(fù)制代碼復(fù)制代碼package mia.recommender.ch05;import java.io.File;import java.io.IOException;import org.apache.mahout.cf.taste.common.TasteException;import org.apache.mahout.cf.taste.eval.RecommenderBuilder;import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;import org.apache.mahout.cf.taste.model.DataModel;import org.apache.mahout.cf.taste.recommender.Recommender;import org.apache.mahout.cf.taste.similarity.ItemSimilarity;import org.apache.mahout.common.RandomUtils;/** * * @author Administrator */public class ItemBasedCF { public static void recommenderModelEvaluation(DataModel model) throws TasteException { RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator(); RandomUtils.useTestSeed(); RecommenderBuilder builder = new RecommenderBuilder() { //=============實(shí)驗(yàn)參數(shù)設(shè)置=============== //1.K近鄰 or 閾值近鄰 //近鄰:K? //閾值近鄰:threshold? //2.相似度量:Euclidean , Pearson , Log-likelihood , Tanimoto char similarityPattern = 'E';//'E' or 'P' or 'L' or 'T' @Override public Recommender buildRecommender(DataModel dm) throws TasteException { ItemSimilarity similarity = null; switch (similarityPattern) { case 'E': { similarity = new EuclideanDistanceSimilarity(dm); } case 'P': { similarity = new PearsonCorrelationSimilarity(dm); } case 'L': { similarity = new LogLikelihoodSimilarity(dm); } case 'T': { similarity = new TanimotoCoefficientSimilarity(dm); } } return new GenericItemBasedRecommender(dm, similarity); } }; double score = evaluator.evaluate(builder, null, model, 0.7, 1.0); System.out.println(score); } /** * @param args the command line arguments */ public static void main(String[] args) throws IOException, TasteException { DataModel model = new FileDataModel(new File("data/dating/ratings.dat")); recommenderModelEvaluation(model); }}復(fù)制代碼復(fù)制代碼六,Slope one CF復(fù)制代碼復(fù)制代碼package mia.recommender.ch05;import java.io.File;import java.io.IOException;import org.apache.mahout.cf.taste.common.TasteException;import org.apache.mahout.cf.taste.common.Weighting;import org.apache.mahout.cf.taste.eval.RecommenderBuilder;import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;import org.apache.mahout.cf.taste.impl.recommender.slopeone.MemoryDiffStorage;import org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender;import org.apache.mahout.cf.taste.model.DataModel;import org.apache.mahout.cf.taste.recommender.Recommender;import org.apache.mahout.cf.taste.recommender.slopeone.DiffStorage;import org.apache.mahout.common.RandomUtils;/** * * @author Administrator */public class SlopeOneCF { public static void recommenderModelEvaluation(DataModel model) throws TasteException { RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator(); RandomUtils.useTestSeed(); RecommenderBuilder builder = new RecommenderBuilder() { long diffStorageNb = 100000; @Override public Recommender buildRecommender(DataModel dm) throws TasteException { DiffStorage diffStorage = new MemoryDiffStorage(dm, Weighting.WEIGHTED, diffStorageNb); return new SlopeOneRecommender(dm, Weighting.WEIGHTED, Weighting.WEIGHTED, diffStorage); } }; } /** * @param args the command line arguments */ public static void main(String[] args) throws IOException, TasteException { DataModel model = new FileDataModel(new File("data/dating/ratings.dat")); recommenderModelEvaluation(model); }}復(fù)制代碼復(fù)制代碼七,一個(gè)示例復(fù)制代碼復(fù)制代碼package mia.recommender.ch05;import java.io.File;import java.io.IOException;import java.util.List;import org.apache.mahout.cf.taste.common.TasteException;import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;import org.apache.mahout.cf.taste.impl.model.PlusAnonymousUserDataModel;import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;import org.apache.mahout.cf.taste.model.DataModel;import org.apache.mahout.cf.taste.model.PreferenceArray;import org.apache.mahout.cf.taste.recommender.RecommendedItem;/** * * @author Administrator */public class LibimsetiWithAnonymousRecommender extends LibimsetiRecommender { private final PlusAnonymousUserDataModel plusAnonymousModel; public LibimsetiWithAnonymousRecommender() throws TasteException, IOException { this((DataModel) new FileDataModel(new File("data/dating/ratings.dat"))); } public LibimsetiWithAnonymousRecommender(DataModel model) throws TasteException, IOException { //調(diào)用父類LibimsetiRecommender的構(gòu)造函數(shù) super(new PlusAnonymousUserDataModel(model)); //得到PlusAnonymousUserDataModel對(duì)象 plusAnonymousModel = (PlusAnonymousUserDataModel) getDataModel(); } //設(shè)計(jì)這個(gè)推薦器的recommend方法:輸入:匿名用戶的評(píng)分信息 輸出:對(duì)此匿名用戶的推薦 public synchronized List<RecommendedItem> recommend( PreferenceArray anonymousUserPrefs, int topN) throws TasteException { //利用PlusAnonymousUserDataModel對(duì)象的setTempPrefs方法為將匿名用戶加入到數(shù)據(jù)中, //并且利用PlusAnonymousUserDataModel.TEMP_USER_ID作為其userID plusAnonymousModel.setTempPrefs(anonymousUserPrefs); //調(diào)用父類LibimsetiRecommender的recommend方法 //userID現(xiàn)在被PlusAnonymousUserDataModel.TEMP_USER_ID所代替了 List<RecommendedItem> recommendations = recommend(PlusAnonymousUserDataModel.TEMP_USER_ID, topN, null); //刪除PlusAnonymousUserDataModel.TEMP_USER_ID與匿名用戶的關(guān)聯(lián) plusAnonymousModel.clearTempPrefs(); return recommendations; } //創(chuàng)建當(dāng)前匿名用戶的偽數(shù)據(jù) public PreferenceArray creatAnAnonymousPrefs() { PreferenceArray anonymousPrefs = new GenericUserPreferenceArray(3); anonymousPrefs.setUserID(0, PlusAnonymousUserDataModel.TEMP_USER_ID); anonymousPrefs.setItemID(0, 123L); anonymousPrefs.setValue(0, 1.0f); anonymousPrefs.setItemID(1, 123L); anonymousPrefs.setValue(1, 3.0f); anonymousPrefs.setItemID(2, 123L); anonymousPrefs.setValue(2, 2.0f); return anonymousPrefs; } public static void main(String[] args) throws Exception { LibimsetiWithAnonymousRecommender recommender = new LibimsetiWithAnonymousRecommender(); List<RecommendedItem> recommendations = recommender.recommend(recommender.creatAnAnonymousPrefs(), 10); System.out.println(recommendations); }}復(fù)制代碼復(fù)制代碼復(fù)制代碼復(fù)制代碼package mia.recommender.ch05;import java.io.File;import java.io.IOException;import java.util.Collection;import java.util.List;import org.apache.mahout.cf.taste.common.Refreshable;import org.apache.mahout.cf.taste.common.TasteException;import org.apache.mahout.cf.taste.impl.common.FastIDSet;import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;import org.apache.mahout.cf.taste.model.DataModel;import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;import org.apache.mahout.cf.taste.recommender.IDRescorer;import org.apache.mahout.cf.taste.recommender.RecommendedItem;import org.apache.mahout.cf.taste.recommender.Recommender;import org.apache.mahout.cf.taste.similarity.UserSimilarity;/** * * @author Administrator */public class LibimsetiRecommender implements Recommender { private final Recommender libimsetiRecommender; private final DataModel model; private final FastIDSet men; private final FastIDSet women; //構(gòu)造函數(shù):一般而言,一個(gè)普適的自定義推薦器的輸入應(yīng)該是:DataModel和額外的知識(shí) //應(yīng)該將獨(dú)立于數(shù)據(jù)的東西構(gòu)建好:基本的pure CF推薦器 public LibimsetiRecommender() throws TasteException, IOException { this((DataModel) new FileDataModel(new File("data/dating/ratings.dat"))); } //應(yīng)該將獨(dú)立于數(shù)據(jù)的東西構(gòu)建好:基本的pure CF推薦器,即將libimsetiRecommender設(shè)為pure CF public LibimsetiRecommender(DataModel model) throws TasteException, IOException { UserSimilarity similarity = new EuclideanDistanceSimilarity(model); UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model); libimsetiRecommender = new GenericUserBasedRecommender(model, neighborhood, similarity); this.model = model; FastIDSet[] menWomen = GenderRescorer.generateMenWomen( new File(("gender.dat"))); men = menWomen[0]; women = menWomen[1]; } //用libimsetiRecommender進(jìn)行推薦時(shí)就加入了由gender信息定義的GenderRescorer public List<RecommendedItem> recommend(long userID, int topN) throws TasteException { IDRescorer rescorer = new GenderRescorer(men, women, userID, model); return libimsetiRecommender.recommend(userID, topN, rescorer); } //用libimsetiRecommender也提供了自定義IDRescorer進(jìn)行推薦的方法 public List<RecommendedItem> recommend(long userID, int topN, IDRescorer idr) throws TasteException { return libimsetiRecommender.recommend(userID, topN, idr); } //這里要注意,由于libimsetiRecommender真正進(jìn)行preference的估計(jì)是要受到GenderRescorer的rescore的影響的 public float estimatePreference(long userID, long itemID) throws TasteException { IDRescorer rescorer = new GenderRescorer(men, women, userID, model); return (float) rescorer.rescore( itemID, libimsetiRecommender.estimatePreference(userID, itemID)); } //這個(gè)可以直接借助于libimsetiRecommender的setPreference public void setPreference(long userID, long itemID, float value) throws TasteException { libimsetiRecommender.setPreference(userID, itemID, value); } //這個(gè)可以直接借助于libimsetiRecommender的removePreference public void removePreference(long userID, long itemID) throws TasteException { libimsetiRecommender.removePreference(userID, itemID); } //這個(gè)可以直接借助于libimsetiRecommender的getDataModel public DataModel getDataModel() { return libimsetiRecommender.getDataModel(); } //這個(gè)可以直接借助于libimsetiRecommender的refresh public void refresh(Collection<Refreshable> alreadyRefreshed) { libimsetiRecommender.refresh(alreadyRefreshed); }}復(fù)制代碼復(fù)制代碼復(fù)制代碼復(fù)制代碼package mia.recommender.ch05;import java.io.File;import java.io.IOException;import org.apache.mahout.cf.taste.common.TasteException;import org.apache.mahout.cf.taste.impl.common.FastIDSet;import org.apache.mahout.cf.taste.model.DataModel;import org.apache.mahout.cf.taste.model.PreferenceArray;import org.apache.mahout.cf.taste.recommender.IDRescorer;import org.apache.mahout.common.iterator.FileLineIterable;/** * * @author Administrator */public class GenderRescorer implements IDRescorer { private final FastIDSet men;//存放當(dāng)前數(shù)據(jù)模型對(duì)應(yīng)的所有male selectableUser private final FastIDSet women;//存放當(dāng)前數(shù)據(jù)模型對(duì)應(yīng)的所有female selectableUser private final FastIDSet usersRateMoreMen;// private final FastIDSet usersRateLessMen; private final boolean likeMen;//表明針對(duì)一個(gè)用戶(userID定義)一個(gè)profileID是否應(yīng)該過濾 public GenderRescorer( FastIDSet men, FastIDSet women, long userID, DataModel model) throws TasteException { this.men = men; this.women = women; this.usersRateMoreMen = new FastIDSet(); this.usersRateLessMen = new FastIDSet(); this.likeMen = ratesMoreMen(userID, model); } //產(chǎn)生數(shù)據(jù)對(duì)應(yīng)的men和women集合 public static FastIDSet[] generateMenWomen(File genderFile) throws IOException { FastIDSet men = new FastIDSet(50000); FastIDSet women = new FastIDSet(50000); for (String line : new FileLineIterable(genderFile)) { int comma = line.indexOf(','); char gender = line.charAt(comma + 1); if (gender == 'U') { continue; } long profileID = Long.parseLong(line.substring(0, comma)); if (gender == 'M') { men.add(profileID); } else { women.add(profileID); } } men.rehash(); women.rehash(); return new FastIDSet[]{men, women}; } //判斷userID對(duì)應(yīng)的用戶是不是更喜歡男性,從他/她評(píng)過分的那些用戶的性別來統(tǒng)計(jì) private boolean ratesMoreMen(long userID, DataModel model) throws TasteException { if (usersRateMoreMen.contains(userID)) { return true; } if (usersRateLessMen.contains(userID)) { return false; } PreferenceArray prefs = model.getPreferencesFromUser(userID); int menCount = 0; int womenCount = 0; for (int i = 0; i < prefs.length(); i++) { long profileID = prefs.get(i).getItemID(); if (men.contains(profileID)) { menCount++; } else if (women.contains(profileID)) { womenCount++; } } boolean ratesMoreMen = menCount > womenCount; if (ratesMoreMen) { usersRateMoreMen.add(userID); } else { usersRateLessMen.add(userID); } return ratesMoreMen; } //對(duì)于需要過濾的推薦,設(shè)置其值為NaN,這是因?yàn)樗麄儾皇遣荒芡扑]的,而是最差的推薦 public double rescore(long profileID, double originalScore) { return isFiltered(profileID) ? Double.NaN : originalScore; } //如果一個(gè)用戶是喜歡男性的,而推薦的又是女性,則這個(gè)推薦是應(yīng)該過濾掉的,反之亦然 public boolean isFiltered(long profileID) { return likeMen ? women.contains(profileID) : men.contains(profileID); }}復(fù)制代碼復(fù)制代碼復(fù)制代碼復(fù)制代碼package mia.recommender.ch05;import java.util.Collection;import org.apache.mahout.cf.taste.common.Refreshable;import org.apache.mahout.cf.taste.common.TasteException;import org.apache.mahout.cf.taste.impl.common.FastIDSet;import org.apache.mahout.cf.taste.similarity.ItemSimilarity;/** * * @author Administrator */public class GenderItemSimilarity implements ItemSimilarity { private final FastIDSet men; private final FastIDSet women; public GenderItemSimilarity(FastIDSet men, FastIDSet women) { this.men = men; this.women = women; } public double itemSimilarity(long profileID1, long profileID2) throws TasteException { Boolean profile1IsMan = isMan(profileID1); if (profile1IsMan == null) { return 0.0; } Boolean profile2IsMan = isMan(profileID2); if (profile2IsMan == null) { return 0.0; } return profile1IsMan == profile2IsMan ? 1.0 : -1.0; } private Boolean isMan(long profileID) { if (men.contains(profileID)) { return Boolean.TRUE; } if (women.contains(profileID)) { return Boolean.FALSE; } return null; } public double[] itemSimilarities(long itemID1, long[] itemID2s) throws TasteException{ double[] result = new double[itemID2s.length]; for (int i = 0; i < itemID2s.length; i++) { result[i] = itemSimilarity(itemID1, itemID2s[i]); } return result; } public long[] allSimilarItemIDs(long l) throws TasteException { throw new UnsupportedOperationException("Not supported yet."); } public void refresh(Collection<Refreshable> clctn) { throw new UnsupportedOperationException("Not supported yet."); } }頂0
新聞熱點(diǎn)
疑難解答
圖片精選
網(wǎng)友關(guān)注