1、机器学习课内实验报告(1) ID算法实现决策树2015 - 2016学年 第 2 学期专业:智能科学与技术班级:智能1301班学号:06133029姓名:张争辉一、 实验目的:理解ID3算法的基本原理,并且编程实现。二、 实验要求:使用C/C+/MATLAB实现ID3算法。输入:若干行,每行 5 个字符串,表示Outlook Temperature Humidity Wind Play ball如上表。输出:决策树。实验结果如下:输入: Sunny Hot High Weak No Sunny Hot High Strong No Overcast Hot High Weak Yes Rain
2、 Mild High Weak Yes Rain Cool Normal Weak Yes Rain Cool Normal Strong No Overcast Cool Normal Strong Yes Sunny Mild High Weak No Sunny Cool Normal Weak Yes Rain Mild Normal Weak Yes Sunny Mild Normal Strong Yes Overcast Mild High Strong Yes Overcast Hot Normal Weak Yes Rain Mild High Strong No输出:Out
3、look Rain Wind Strong No Weak Yes Overcast Yes Sunny Humidity Normal Yes High No 三、 具体实现:实现算法如下:#include #include #include #include using namespace std;#define ROW 14#define COL 5#define log2 0.69314718055typedef struct TNode char data15; char weight15; TNode * firstchild,*nextsibling;*tree;typedef
4、struct LNode char OutLook15; char Temperature15; char Humidity15; char Wind15; char PlayTennis5; LNode *next;*link;typedef struct AttrNode char attributes15;/属性 int attr_Num;/属性的个数 AttrNode *next;*Attributes;char * ExamplesROWCOL = /OverCast,Cool,High,Strong,No, /Rain,Hot,Normal,Strong,Yes, Sunny,Ho
5、t,High,Weak,No, Sunny,Hot,High,Strong,No, OverCast,Hot,High,Weak,Yes, Rain,Mild,High,Weak,Yes, Rain,Cool,Normal,Weak,Yes, Rain,Cool,Normal,Strong,No, OverCast,Cool,Normal,Strong,Yes, Sunny,Mild,High,Weak,No, Sunny,Cool,Normal,Weak,Yes, Rain,Mild,Normal,Weak,Yes, Sunny,Mild,Normal,Strong,Yes, OverCas
6、t,Mild,Normal,Strong,Yes, OverCast,Hot,Normal,Weak,Yes, Rain,Mild,High,Strong,No ;char * Attributes_kind4 = OutLook,Temperature,Humidity,Wind;int Attr_kind4 = 3,3,2,2;char * OutLook_kind3 = Sunny,OverCast,Rain;char * Temperature_kind3 = Hot,Mild,Cool;char * Humidity_kind2 = High,Normal;char * Wind_k
7、ind2 = Weak,Strong;/*int i_Exampple145 = 0,0,0,0,1, 0,0,0,1,1, 1,0,0,1,0, 2,1,0,0,0, 2,2,1,0,0, 2,2,1,1,1, 1,2,1,1,0, 0,1,0,0,1, 0,2,1,0,0, 2,1,1,0,0, 0,1,1,1,0, 1,1,1,1,0, 1,1,1,0,0, 2,1,0,0,1 ;*/void treelists(tree T);void InitAttr(Attributes &attr_link,char * Attributes_kind,int Attr_kind);void I
8、nitLink(link &L,char * ExamplesCOL);void ID3(tree &T,link L,link Target_Attr,Attributes attr);void PN_Num(link L,int &positve,int &negative);double Gain(int positive,int negative,char * atrribute,link L,Attributes attr_L);void main() link LL,p; Attributes attr_L,q; tree T; T = new TNode; T-firstchil
9、d = T-nextsibling = NULL; strcpy(T-weight,); strcpy(T-data,); attr_L = new AttrNode; attr_L-next = NULL; LL = new LNode; LL-next = NULL; /成功建立两个链表 InitLink(LL,Examples); InitAttr(attr_L,Attributes_kind,Attr_kind); ID3(T,LL,NULL,attr_L); cout决策树以广义表形式输出如下:endl; treelists(T);/以广义表的形式输出树/coutGain(9,5,O
10、utLook,LL,attr_L)endl; coutendl;/以广义表的形式输出树void treelists(tree T) tree p; if(!T) return; coutweight; coutdata; p = T-firstchild; if (p) coutnextsibling; if (p)cout,; cout); void InitAttr(Attributes &attr_link,char * Attributes_kind,int Attr_kind) Attributes p; for (int i =0;i next = NULL; strcpy(p-a
11、ttributes,Attributes_kindi); p-attr_Num = Attr_kindi; p-next = attr_link-next; attr_link-next = p; void InitLink(link &LL,char * ExamplesCOL) link p; for (int i = 0;i next = NULL; strcpy(p-OutLook,Examplesi0); strcpy(p-Temperature,Examplesi1); strcpy(p-Humidity,Examplesi2); strcpy(p-Wind,Examplesi3)
12、; strcpy(p-PlayTennis,Examplesi4); p-next = LL-next; LL-next = p; void PN_Num(link L,int &positve,int &negative) positve = 0; negative = 0; link p; p = L-next; while (p) if (strcmp(p-PlayTennis,No) = 0) negative+; else if(strcmp(p-PlayTennis,Yes) = 0) positve+; p = p-next; /计算信息增益/link L: 样本集合S/attr
13、_L:属性集合double Gain(int positive,int negative,char * atrribute,link L,Attributes attr_L) int atrr_kinds;/每个属性中的值的个数 Attributes p = attr_L-next; link q = L-next; int attr_th = 0;/第几个属性 while (p) if (strcmp(p-attributes,atrribute) = 0) atrr_kinds = p-attr_Num; break; p = p-next; attr_th+; double entrop
14、y,gain=0; double p1 = 1.0*positive/(positive + negative); double p2 = 1.0*negative/(positive + negative); entropy = -p1*log(p1)/log2 - p2*log(p2)/log2;/集合熵 gain = entropy; /获取每个属性值在训练样本中出现的个数 /获取每个属性值所对应的正例和反例的个数 /声明一个3*atrr_kinds的数组 int * kinds= new int * 3; for (int j =0;j 3;j+) kindsj = new intat
15、rr_kinds;/保存每个属性值在训练样本中出现的个数 /初始化 for (int j = 0;j 3;j+) for (int i =0;i atrr_kinds;i+) kindsji = 0; while (q) if (strcmp(OutLook,atrribute) = 0) for (int i = 0;i OutLook,OutLook_kindi) = 0) kinds0i+; if(strcmp(q-PlayTennis,Yes) = 0) kinds1i+; else kinds2i+; else if (strcmp(Temperature,atrribute) =
16、0) for (int i = 0;i Temperature,Temperature_kindi) = 0) kinds0i+; if(strcmp(q-PlayTennis,Yes) = 0) kinds1i+; else kinds2i+; else if (strcmp(Humidity,atrribute) = 0) for (int i = 0;i Humidity,Humidity_kindi) = 0) kinds0i+; if(strcmp(q-PlayTennis,Yes) = 0) kinds1i+;/ else kinds2i+; else if (strcmp(Win
17、d,atrribute) = 0) for (int i = 0;i Wind,Wind_kindi) = 0) kinds0i+; if(strcmp(q-PlayTennis,Yes) = 0) kinds1i+; else kinds2i+; q = q-next; /计算信息增益 double * gain_kind = new doubleatrr_kinds; int positive_kind = 0,negative_kind = 0; for (int j = 0;j next; Link-next = NULL; while (p) q = p; p = p-next; f
18、ree(q); void ID3(tree &T,link L,link Target_Attr,Attributes attr) Attributes p,max,attr_child,p1; link q,link_child,q1; tree r,tree_p; int positive =0,negative =0; PN_Num(L,positive,negative); /初始化两个子集合 attr_child = new AttrNode; attr_child-next = NULL; link_child = new LNode; link_child-next = NULL
19、; if (positive = 0)/全是反例 strcpy(T-data,No); return; else if( negative = 0)/全是正例 strcpy(T-data,Yes); return; p = attr-next; /属性链表 double gain,g = 0; /*/ /* 建立属性子集合与训练样本子集合有两个方案: 一:在原来链表的基础上进行删除; 二:另外申请空间进行存储子集合; 采用第二种方法虽然浪费了空间,但也省了很多事情,避免了变量之间的应用混乱 */ /*/ if(p) while (p) gain = Gain(positive,negative,p-attributes,L,attr); coutattributes gain g) g = gain; max = p;/寻找信息增益最大的属性 p = p-next; strcpy(T-data,max-attributes);/增加决策树的节点
copyright@ 2008-2023 冰点文库 网站版权所有
经营许可证编号:鄂ICP备19020893号-2