国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁(yè) > 學(xué)院 > 開發(fā)設(shè)計(jì) > 正文

基于用戶的協(xié)同過(guò)濾算法

2019-11-14 17:28:06
字體:
來(lái)源:轉(zhuǎn)載
供稿:網(wǎng)友

基于用戶的協(xié)同過(guò)濾算法-參考《推薦系統(tǒng)實(shí)踐》一書,作者:項(xiàng)亮

  1 import random  2 import math  3 class UserBasedCF:  4     def __init__(self,datafile = None):  5         self.datafile = datafile  6         self.readData()  7         self.splitData(3,47)  8     def readData(self,datafile = None):  9         """ 10         read the data from the data file which is a data set 11         """ 12         self.datafile = datafile or self.datafile 13         self.data = [] 14         for line in open(self.datafile): 15             userid,itemid,record,_ = line.split() 16             self.data.append((userid,itemid,int(record))) 17     def splitData(self,k,seed,data=None,M = 8): 18         """ 19         split the data set 20         testdata is a test data set 21         traindata is a train set  22         test data set / train data set is 1:M-1 23         """ 24         self.testdata = {} 25         self.traindata = {} 26         data = data or self.data 27         random.seed(seed) 28         for user,item, record in self.data: 29             if random.randint(0,M) == k: 30                 self.testdata.setdefault(user,{}) 31                 self.testdata[user][item] = record  32             else: 33                 self.traindata.setdefault(user,{}) 34                 self.traindata[user][item] = record 35     def userSimilarity(self,train = None): 36         """ 37         One method of getting user similarity matrix 38         """ 39         train = train or self.traindata 40         self.userSim = dict() 41         for u in train.keys(): 42             for v in train.keys(): 43                 if u == v: 44                     continue 45                 self.userSim.setdefault(u,{}) 46                 self.userSim[u][v] = len(set(train[u].keys()) & set(train[v].keys())) 47                 self.userSim[u][v] /=math.sqrt(len(train[u]) * len(train[v]) *1.0) 48     def userSimilarityBest(self,train = None): 49         """ 50         the other method of getting user similarity which is better than above 51         you can get the method on page 46 52         In this experiment,we use this method 53         """ 54         train = train or self.traindata 55         self.userSimBest = dict() 56         item_users = dict() 57         for u,item in train.items(): 58             for i in item.keys(): 59                 item_users.setdefault(i,set()) 60                 item_users[i].add(u) 61         user_item_count = dict() 62         count = dict() 63         for item,users in item_users.items(): 64             for u in users: 65                 user_item_count.setdefault(u,0) 66                 user_item_count[u] += 1 67                 for v in users: 68                     if u == v:continue 69                     count.setdefault(u,{}) 70                     count[u].setdefault(v,0) 71                     count[u][v] += 1 72         for u ,related_users in count.items(): 73             self.userSimBest.setdefault(u,dict()) 74             for v, cuv in related_users.items(): 75                 self.userSimBest[u][v] = cuv / math.sqrt(user_item_count[u] * user_item_count[v] * 1.0) 76   77     def recommend(self,user,train = None,k = 8,nitem = 40): 78         train = train or self.traindata 79         rank = dict() 80         interacted_items = train.get(user,{}) 81         for v ,wuv in sorted(self.userSimBest[user].items(),key = lambda x : x[1],reverse = True)[0:k]: 82             for i , rvi in train[v].items(): 83                 if i in interacted_items: 84                     continue 85                 rank.setdefault(i,0) 86                 rank[i] += wuv 87         return dict(sorted(rank.items(),key = lambda x :x[1],reverse = True)[0:nitem]) 88     def recallAndPRecision(self,train = None,test = None,k = 8,nitem = 10): 89         """ 90         Get the recall and precision, the method you want to know is listed  91         in the page 43 92         """ 93         train  = train or self.traindata 94         test = test or self.testdata 95         hit = 0 96         recall = 0 97         precision = 0 98         for user in train.keys(): 99             tu = test.get(user,{})100             rank = self.recommend(user, train = train,k = k,nitem = nitem) 101             for item,_ in rank.items():102                 if item in tu:103                     hit += 1104             recall += len(tu)105             precision += nitem106         return (hit / (recall * 1.0),hit / (precision * 1.0))107     def coverage(self,train = None,test = None,k = 8,nitem = 10):108         train = train or self.traindata109         test = test or self.testdata110         recommend_items = set()111         all_items  = set()112         for user in train.keys():113             for item in train[user].keys():114                 all_items.add(item)115             rank = self.recommend(user, train, k = k, nitem = nitem)116             for item,_ in rank.items():117                 recommend_items.add(item)118         return len(recommend_items) / (len(all_items) * 1.0)119     def popularity(self,train = None,test = None,k = 8,nitem = 10):120         """121         Get the popularity122         the algorithm on page 44123         """124         train = train or self.traindata125         test = test or self.testdata126         item_popularity = dict()127         for user ,items in train.items():128             for item in items.keys():129                 item_popularity.setdefault(item,0)130                 item_popularity[item] += 1131         ret = 0132         n = 0133         for user in train.keys():134             rank = self.recommend(user, train, k = k, nitem = nitem)135             for item ,_ in rank.items():136                 ret += math.log(1+item_popularity[item])137                 n += 1138         return ret / (n * 1.0)139      140 def testRecommend():141     ubcf = UserBasedCF('u.data')142     ubcf.readData()143     ubcf.splitData(4,100)144     ubcf.userSimilarity()145     user = "345"146     rank = ubcf.recommend(user,k = 3)147     for i,rvi in rank.items():148          149         items = ubcf.testdata.get(user,{})150         record = items.get(i,0)151         print "%5s: %.4f--%.4f" %(i,rvi,record)152 def testUserBasedCF():153     cf  =  UserBasedCF('u.data')154     cf.userSimilarityBest()155     print "%3s%20s%20s%20s%20s" % ('K',"recall",'precision','coverage','popularity')156     for k in [5,10,20,40,80,160]:157         recall,precision = cf.recallAndPrecision( k = k)158         coverage = cf.coverage(k = k)159         popularity = cf.popularity(k = k)160         print "%3d%19.3f%%%19.3f%%%19.3f%%%20.3f" % (k,recall * 100,precision * 100,coverage * 100,popularity)161          162 if __name__ == "__main__":163     testUserBasedCF()


發(fā)表評(píng)論 共有條評(píng)論
用戶名: 密碼:
驗(yàn)證碼: 匿名發(fā)表
主站蜘蛛池模板: 喀喇沁旗| 沛县| 布尔津县| 泗水县| 固始县| 蚌埠市| 濮阳县| 祁东县| 牟定县| 正阳县| 额济纳旗| 苗栗县| 许昌县| 华亭县| 宁国市| 开平市| 锦屏县| 天祝| 榕江县| 平塘县| 思茅市| 旅游| 包头市| 调兵山市| 巫山县| 许昌县| 临海市| 河北省| 额尔古纳市| 乌兰察布市| 裕民县| 扎囊县| 丰台区| 云霄县| 黔东| 阳曲县| 洞口县| 长春市| 商南县| 小金县| 安仁县|