首页 > 编程 > C++ > 正文

C++单刷《机器学习实战》——kNN算法完整代码

2019-11-06 07:13:44
字体:
来源:转载
供稿:网友
#include <iostream>#include <cmath>#include<map>#include<string>#include<sstream>#include<fstream>#include<vector>#include<algorithm>using namespace std;double group[4][2] = { { 1.0, 1.1 }, { 1.0, 1.0 }, { 0, 0 }, { 0, 0.1 } };string labels[4] = { "A", "A", "B", "B" };struct man{double fly;double game;double icecream;string eval;};void sort(double* data, int n, int k)          //冒泡排序,采用冒泡排序的目的是以最快速度找到最大的前k个值//data:要排序的数组,n:数组大小,k:要找到的前k个值{int temp;for (int i = 0; i < k; i++){for (int j = i+1; j < n; j++){if (*(data + i) > *(data + j)){temp = *(data + i);*(data + i) = *(data + j);*(data + j) = temp;}}}}void sortIndex(double* data, int* sorted_index2, int n)//排序,并返回排序后的原数组索引//data:原始数组,sorted_index2:排序后的原数组索引,n:数组大小{int index = 0;int* sorted_index = new int[n];for (int i = 0; i < n; i++){index = 0;for (int j = 0; j < n; j++){if (data[i] > data[j])index++;else if (data[i] == data[j] && i > j)index++;}sorted_index[i] = index;}for (int i = 0; i < n; i++){for (int j = 0; j < n; j++){if (i == sorted_index[j])sorted_index2[i] = j;}}delete sorted_index;}vector<man> readFile(const char* file_name)//从文件中读取数据并存入结构体数组{string data_str;vector<man> data_list;fstream file;file.open(file_name,ios::in);if (file.is_open()){while (getline(file, data_str)){man data;istringstream record(data_str);record >> data.fly;record >> data.game;record >> data.icecream;record >> data.eval;data_list.push_back(data);}} return data_list;}void data2matrix(vector<man> data_list, double* dataSet, string labels[], double& length_fly, double& length_game, double& length_icecream)//将结构体数组转化为二维矩阵,并归一化//data_list:结构体数组,dataSet:转化为的二维矩阵,labels:标签数组,length_fly....:样本特征最大值与最小值之差{int index = 0;auto it = data_list.begin();double min_fly = it->fly;double max_fly = it->fly;double min_game = it->game;double max_game = it->game;double min_icecream = it->icecream;double max_icecream = it->icecream;for (; it != data_list.end(); ++it){if (min_fly > it->fly)min_fly = it->fly;if (max_fly < it->fly)max_fly = it->fly;if (min_game > it->game)min_game = it->game;if (max_game < it->game)max_game = it->game;if (min_icecream > it->icecream)min_icecream = it->icecream;if (max_icecream < it->icecream)max_icecream = it->icecream;}    length_fly = max_fly - min_fly;length_game = max_game - min_game;length_icecream = max_icecream - min_icecream;for (auto it = data_list.begin(); it != data_list.end(); ++it){*(dataSet + index * 3) = it->fly / length_fly;*(dataSet + index * 3 + 1) = it->game / length_game;*(dataSet + index * 3 + 2) = it->icecream / length_icecream;labels[index] = it->eval;++index;}}void data2matrix2(man person, double* data,double length_fly, double length_game, double length_icecream){data[0] = person.fly / length_fly;data[1] = person.game / length_game;data[2] = person.icecream / length_icecream;}string classify(double* inX,double* dataSet,string labels[],int k,int size,int dataSetSize)//kNN分类算法//inX:未分类的输入数据,dataSet:样本集,labels:标签,k:k值,size:数据的特征数量,dataSetSize:样本集数量  {double sum = 0;double* diff_array = new double[size];double* diff_all = new double[dataSetSize];int* sorted_index = new int[dataSetSize];string label;map<string, int> label_count;for (int i = 0; i < dataSetSize; i++)//计算当前点与各样本点的欧式距离,并存入数组diff_array{sum = 0;for (int j = 0; j < size; j++){diff_array[j] = *(inX + j) - *(dataSet + i*size + j);sum += (diff_array[j] * diff_array[j]);}diff_all[i] = sqrt(sum);}//排序,并返回排序后的原数组索引sortIndex(diff_all, sorted_index, dataSetSize);for (int i = 0; i < k; i++)//计算前k个索引对应标签的出现次数,存入关联容器label_count{label = labels[sorted_index[i]];++label_count[label];}//找出出现次数最多的标签,返回auto map_it = label_count.begin();label = map_it->first;int max_count = map_it->second;for (; map_it != label_count.end(); map_it++){if (max_count < map_it->second){max_count = map_it->second;label = map_it->first;}}delete diff_array;delete diff_all;delete sorted_index;return label;}int main(){/*string result;string line;double point[2];cout << "please input the coodinate of the pixel" << endl;while (getline(cin, line)){istringstream record(line);record >> point[0];record >> point[1];result = classify(point, &group[0][0], labels, 3, 2, 4);cout << "the result is: " << result << endl;cout << "please input the coodinate of the pixel" << endl;}*/vector<man> data_list;data_list = readFile("datingTestSet.txt");int size = 3;int dataSetSize = data_list.size();double* dataSet = new double[dataSetSize*3];string* labels = new string[dataSetSize];double length_fly = 0;double length_game = 0;double length_icecream = 0;data2matrix(data_list, dataSet, labels, length_fly, length_game, length_icecream);//测试,ratio为测试集占数据集总量double ratio = 0.1;string result;int error_count = 0;int num_test = dataSetSize * ratio;string label;for (int i = 0; i < num_test; i++){result = classify(dataSet + i * 3, dataSet + num_test * 3, labels + num_test, 3, 3, dataSetSize - num_test);cout <<i<<"times  "<<"The classifier came back with: "<< result <<",the real answer is "<<labels[i]<< endl;label = labels[i];if (result != label){++error_count;}}double err_rate = (double)error_count / (double)num_test;cout << "The total error rate is: " << err_rate << endl;man person;double* data = new double[3];string line;cout << "Please input the time of fly,game and the consume of icrcreame" << endl;while (getline(cin, line)){cout << "Please input the time of fly,game and the consume of icrcreame" << endl;istringstream record(line);record >> person.fly;record >> person.game;record >> person.icecream;data2matrix2(person, data, length_fly, length_game, length_icecream);result = classify(data, dataSet, labels, 3, 3, dataSetSize);person.eval = result;cout << result << endl;}delete dataSet;delete data;return 0;}
发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表

图片精选