KNN
KNN是一种无监督机器学习算法,算法假定相似的事物彼此接近,其中,用‘距离’来表征事物的相似性。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55import math
import numpy as np
def KNN(data , quary , k , distance_fn , choice_fn):
neighbor_distance_and_index = []
for index, example in enumerate(data):
distance = distance_fn(example[:-1] , quary) #将要预测的 quary 与 data 里的每一个值进行
#距离计算
neighbor_distance_and_index.append((distance , index))
sorted_neighbor_diatance_and_index = sorted(neighbor_distance_and_index) #将距离排序
k_nearest_distance_and_index = sorted_neighbor_diatance_and_index[:k] #取出前k个最相似的值
k_nearset_labels = [data[i][1] for distance , i in k_nearest_distance_and_index]
return k_nearest_distance_and_index , choice_fn(k_nearset_labels)
def mean(labels): #将最终结果取平均值
return sum(labels) / len(labels)
def euclidean_distance(example , quary): #用欧式距离
sum_euclidean_distance = 0
for i in range(len(example)):
sum_euclidean_distance += math.pow(example[i]- quary[i] , 2)
#当(example[i]- quary[i]) 的输入不是单一值而是张量之间的运算时,不能用math
#而用
#sum_euclidean_distance += np.sum((example[i]- quary[i]) ** 2 , dim = ) dim 是维度
return math.sqrt(sum_euclidean_distance)
#return np.sqrt(sum_euclidean_distance)
def main():
reg_data = [
[65.75, 112.99],
[71.52, 136.49],
[69.40, 153.03],
[68.22, 142.34],
[67.79, 144.30],
[68.70, 123.30],
[69.80, 141.49],
[70.01, 136.46],
[67.90, 112.37],
[66.49, 127.45],
]
reg_quary = [60]
reg_k_nearest_neighbors, reg_prediction = KNN(reg_data , reg_quary , k = 2 ,
distance_fn = euclidean_distance , choice_fn = mean)
print(reg_prediction)
if __name__ == '__main__':
main()
如果k=1,那么模型预测误差就很大,而增加k的值,由于进行多次投票,最终预测变得稳定
