国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 編程 > Python > 正文

機器學習python,k近鄰分類器,三維作圖

2019-11-06 07:54:48
字體:
來源:轉載
供稿:網友
#-*-coding:utf-8 -*-from numpy import *import Operatorimport matplotlibimport matplotlib.pyplot as pltfrom mpl_toolkits.mplot3d import Axes3D#讀取文件數據def file2matrix(filename):    fr=open(filename)#打開文件    arrayOLines=fr.readlines()#將文件讀入一個字符串列表,在列表中每個字符串就是一行    numberOFlines=len(arrayOLines)#讀入字符串列表的數量,即文件的行數    returnMat=zeros((numberOFlines,3))#創建numberOFlines行3列的numpy矩陣    classLabelVector=[]#創建標簽數組    index=0    for line in arrayOLines:        line=line.strip()#刪除每行兩側的空格        listFormLine=line.split('/t')#將每行的字符串列表以‘/t’為間隔分為序列        returnMat[index,:]=listFormLine[0:3]#將每一行數據存入returnMat數組中        classLabelVector.append(int(listFormLine[-1]))#將每一行的最后一列即標簽存入classLabelVector中        index+=1    return returnMat,classLabelVector#返回樣本特征矩陣與標簽向量#歸一化數據def autoNorm(dataset):    minVals=dataset.min(0)#列中最小值    maxVals=dataset.max(0)#列中的最大值    ranges=maxVals-minVals    normDataSet=zeros(shape(dataset))#創建與樣本特征矩陣同大小的數值全是0的矩陣    m=dataset.shape[0]#m是dataset的列數,即樣本特征的維數    normDataSet=dataset-tile(minVals,(m,1))#tile()是將minVals復制成m行3列,即與dataset同大小的矩陣    normDataSet=normDataSet/tile(ranges,(m,1))    return normDataSet,ranges,minVals#返回歸一化的樣本特征矩陣,范圍,每列最小值#K近鄰分類def classify(inX,dataSet,labels,k):    dataSetSize=dataSet.shape[0]#讀取樣本的特征矩陣的維數    diffMat=tile(inX,(dataSetSize,1))-dataSet#計算測試數據與每一個樣本特征矩陣的歐氏距離    sqDiffMat=diffMat**2    sqDistances=sqDiffMat.sum(axis=1)#每一行的相加    distances=sqDistances**0.5    sortedDistIndicies=distances.argsort()#測試數據與每一個樣本特征矩陣的歐氏距離從小到大排列后,將原樣本的索引值賦值給sortedDistIndicies    classCount={}#創建字典    for i in range(k):        voteIlabel=labels[sortedDistIndicies[i]]#將sortedDistIndicies相對應的標簽賦值給voteIlabel        classCount[voteIlabel]=classCount.get(voteIlabel,0)+1#get是取字典里的元素,                              #如果之前這個voteIlabel是有的,那么就返回字典里這個voteIlabel里的值,                              #如果沒有就返回0(后面寫的),這行代碼的意思就是算離目標點距離最近的k個點的類別,                        #這個點是哪個類別哪個類別就加1        sortedClassCount=sorted(classCount.iteritems(),key=operator.itemgetter(1),reverse=True)#key=operator.itemgetter(1)的意思是按照字典里的第一個排序,                                          #{A:1,B:2},要按照第1個(AB是第0個),即‘1’‘2’排序。reverse=True是降序排序        return sortedClassCount[0][0]#返回發生頻率最高的元素標簽def datingClassTest():      hoRatio=0.10      datingDataMat,datingLabels=file2matrix(r'F:/ML_use/datingTestSet2.txt')      normMat,ranges,minVals=autoNorm(datingDataMat)      m=normMat.shape[0]      numTestVecs=int(m*hoRatio)      errorCount=0.0      for i in range(numTestVecs):         classifierResult=classify(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)         PRint "the classifier came back with: %d,the real answer is: %d"%(classifierResult,datingLabels[i])         if(classifierResult!=datingLabels[i]):            errorCount+=1.0      print "the total error rate is:%f"%(errorCount/float(numTestVecs))def classifyPerson():   resultList=['not at all','in small doses','in large doses']   percentTats=float(raw_input("percentage of time spent playing vidio games?"))   ffMines=float(raw_input("frequent flier miles earned per year?"))   iceCream=float(raw_input("liters of ice cream consumed per year?"))   datingDataMat,datingLabels=file2matrix(r'F:/ML_use/datingTestSet2.txt')   normMat,ranges,minVals=autoNorm(datingDataMat)   inArr=array([ffMines,percentTats,iceCream])   classifierResult=classify((inArr-minVals)/ranges,normMat,datingLabels,3)   print "you will probably like this person:",resultList[classifierResult-1]   dataArr = array(datingDataMat)   n = shape(dataArr)[0]   xcord1 = []; ycord1 = [];zcord1=[]   xcord2 = []; ycord2 = [];zcord2=[]   xcord3 = []; ycord3 = [];zcord3=[]   for i in range(n):      if int(datingLabels[i])== 1:         xcord1.append(dataArr[i,0]); ycord1.append(dataArr[i,1]);zcord1.append(dataArr[i,2])      elif int(datingLabels[i])== 2:         xcord2.append(dataArr[i,0]); ycord2.append(dataArr[i,1]);zcord2.append(dataArr[i,2])      elif int(datingLabels[i])== 3:         xcord3.append(dataArr[i,0]); ycord3.append(dataArr[i,1]);zcord3.append(dataArr[i,2])   fig = plt.figure()   ax = fig.add_subplot(111, projection='3d')   ax.set_title('KNN')   type1=ax.scatter(xcord1, ycord1,zcord1, s=30, c='red', marker='s')   type2=ax.scatter(xcord2, ycord2,zcord2, s=30, c='green',marker='o')   type3=ax.scatter(xcord3, ycord3,zcord3, s=30, c='b',marker='+')   ax.scatter(inArr[0], inArr[1],inArr[2], s=100, c='k', marker='8')   plt.figtext(0.02,0.92,'class1:Did Not Like',color='red')   plt.figtext(0.02,0.90,'class2:Liked in Small Doses',color='green')   plt.figtext(0.02,0.88,'class3:Liked in Large Doses',color='b')   ax.set_zlabel('frequent flier miles earned per year')   ax.set_ylabel('percentage of time spent playing vidio games')   ax.set_xlabel('liters of ice cream consumed per year')   plt.show()classifyPerson()
發表評論 共有條評論
用戶名: 密碼:
驗證碼: 匿名發表
主站蜘蛛池模板: 九龙坡区| 荔浦县| 台中县| 澎湖县| 阿克苏市| 科技| 昭觉县| 独山县| 云安县| 武邑县| 通化县| 六枝特区| 留坝县| 栾城县| 东兰县| 呼伦贝尔市| 青神县| 晋城| 霍州市| 静宁县| 涪陵区| 巴林右旗| 武汉市| 大邑县| 白银市| 北流市| 垣曲县| 成都市| 北流市| 浑源县| 军事| 武安市| 罗田县| 德格县| 迁西县| 惠州市| 凤城市| 富顺县| 澎湖县| 民勤县| 鲁山县|