0805030115 梅超亮数据挖掘 Apriori算法.docx

资源描述

0805030115 梅超亮数据挖掘 Apriori算法.docx

《0805030115 梅超亮数据挖掘 Apriori算法.docx》由会员分享，可在线阅读，更多相关《0805030115 梅超亮数据挖掘 Apriori算法.docx（15页珍藏版）》请在冰点文库上搜索。

0805030115 梅超亮数据挖掘 Apriori算法.docx

0805030115梅超亮数据挖掘Apriori算法

武汉工程大学

计算机科学与工程学院

《数据挖掘与知识发现》实验报告

专业班级

08智能1班

实验地点

计算机大楼419

学生学号

0805030115

指导教师

吕品

学生姓名

梅超亮

实验时间

2011-12-19

实验项目

实验2经典数据挖掘算法的实现

实验类别

操作性（）验证性（）设计性（）综合性（√）其它（）

实验目的及要求

1.进一步熟悉高级语言编程;

2．掌握使用Apriori算法从事物数据库中挖掘频繁项集的方法；掌握使用ID3算法对任意元组进行分类；掌握使用k-means算法给每一类帖上标签;

3.任选其中一个算法实现.

成绩评定表

类别

评分标准

分值

得分

合计

上机表现

积极出勤、遵守纪律

主动完成实验设计任务

30分

程序代码

比较规范、基本正确

功能达到实验要求

30分

实验报告

及时递交、填写规范

内容完整、体现收获

40分

说明：

评阅教师：

日期：

2011年月日

实验内容

1.算法思想

（1）首先遍历一次数据库，得到所有单元素的

最后的总结果:

2．源程序使用的数据结构

结构体Shop用来记录每个商店所拥有的商品名、该商店名

structShop

{

charTID[5];//商店Id

charGoods[5];//该商店所有的商品名

}AllElectronics[9];

结构体GoodNode记录链表的节点。

每个节点有以下内容：

商品名、该节点的支持度、下一个节点的地址。

structGoodNode

{

charGoods[10];

intsup;

structGoodNode*next;

};

结构体slistGoodV表示的是候选项目集的链表

structslistGoodV

{

structGoodNode*head;

structGoodNode*tail;

intlen;

}TALL;

3．源程序

#include

structShop

{

charTID[5];

charGoods[5];

}AllElectronics[9];

structGoodNode

{

charGoods[10];

intsup;

structGoodNode*next;

};

structslistGoodV

{

structGoodNode*head;

structGoodNode*tail;

intlen;

}TALL;

向候选项目集的链表中的尾部插入节点newnode

structGoodNode*AddTail（structGoodNode*newnode）

{

if（TALL.len==0）

{

TALL.head=newnode;

TALL.tail=newnode;

TALL.head->next=NULL;

TALL.tail->next=NULL;

}

else

{

TALL.tail->next=newnode;

TALL.tail->next->next=NULL;

TALL.tail=newnode;

}

TALL.len++;

returnTALL.head;

}

产生第一个链表

voidProduceListA（）

{

chartmp[10];

structGoodNode*tem;

structGoodNode*flag,*flag2,*tail;

flag=TALL.head;

tail=TALL.tail;

while（flag!

=tail）

{

flag2=flag->next;

while（flag2!

=tail）

{

memset（tmp,0,10）;

strcpy（tmp,flag->Goods）;

strcat（tmp,flag2->Goods）;

tem=（structGoodNode*）malloc（sizeof（structGoodNode））;

strcpy（tem->Goods,tmp）;

tem->sup=0;

AddTail（tem）;

flag2=flag2->next;

}

memset（tmp,0,10）;

strcpy（tmp,flag->Goods）;

strcat（tmp,flag2->Goods）;

tem=（structGoodNode*）malloc（sizeof（structGoodNode））;

strcpy（tem->Goods,tmp）;

tem->sup=0;

AddTail（tem）;

flag=flag->next;

}

flag=TALL.head;

while（flag->sup!

=0）

{

TALL.head=TALL.head->next;

free（flag）;

flag=TALL.head;

}

在前一个链表基础上产生下一个链表

voidProduceListB（）

{

chartmp[10];

structGoodNode*tem;

structGoodNode*flag,*flag2,*tail;

flag=TALL.head;

tail=TALL.tail;

while（flag!

=tail）

{

flag2=flag->next;

while（flag2!

=tail）

{

if（flag->Goods[0]!

=flag2->Goods[0]）

break;

memset（tmp,0,10）;

strcpy（tmp,flag->Goods）;

tmp[2]=flag2->Goods[1];

tmp[3]='\0';

tem=（structGoodNode*）malloc（sizeof（structGoodNode））;

strcpy（tem->Goods,tmp）;

tem->sup=0;

AddTail（tem）;

flag2=flag2->next;

}

if（flag->Goods[0]==flag2->Goods[0]）

{

memset（tmp,0,10）;

strcpy（tmp,flag->Goods）;

tmp[2]=flag2->Goods[1];

tmp[3]='\0';

tem=（structGoodNode*）malloc（sizeof（structGoodNode））;

strcpy（tem->Goods,tmp）;

tem->sup=0;

AddTail（tem）;

}

flag=flag->next;

}

flag=TALL.head;

while（flag->sup!

=0）

{

TALL.head=TALL.head->next;

free（flag）;

flag=TALL.head;

}

判断链表中的节点的商品名是否在某个商店中

intIfInItem（structShop*shop,structGoodNode*node）

{

inti=0;

intk=0;

intflag=0;

while（node->Goods[i]!

='\0'）

{

k=0;

flag=0;

while（shop->Goods[k]!

='\0'）

{

if（node->Goods[i]==shop->Goods[k]）

{

flag=1;

break;

}

k++;

}

if（flag==0）

{

return0;

}

i++;

}

return1;

}

计算链表中每个节点的支持度

voidAddSup（）

{

structGoodNode*tmp;

tmp=TALL.head;

inti=0;

intj;

while（tmp!

=NULL）

{

for（j=0;j<9;j++）

{

if（IfInItem（&AllElectronics[j],tmp）==1）

{

tmp->sup++;

}

tmp=tmp->next;

}

删除链表中的节点

voidDeleteNode（intk）//k为支持度

{

structGoodNode*tmp,*tmp2;

tmp=TALL.head;

while（tmp->next!

=NULL）

{

if（tmp->next->sup

{

tmp2=tmp->next;

tmp->next=tmp2->next;

free（tmp2）;

}

else

tmp=tmp->next;

TALL.len--;

}

if（TALL.head->sup

{

tmp=TALL.head;

TALL.head=TALL.head->next;

free（tmp）;

TALL.len--;

}

boolJudgeSame（char*temp）

{

structGoodNode*tmp;

tmp=TALL.head;

while（tmp!

=NULL）

{

while（strcmp（tmp->Goods,temp）==0）

{

tmp->sup++;

returntrue;

}

tmp=tmp->next;

}

returnfalse;

}

打印链表

voidProduceList（）

{

inti;

intj;

intk=0;

chartmp[10];

structGoodNode*tem;

for（i=0;i<9;i++）

{

for（j=0;j<4;j++）

{

memset（tmp,0,10）;

k=0;

if（AllElectronics[i].Goods[j]=='\0'）

break;

tmp[k++]=AllElectronics[i].Goods[j];

tmp[k++]='\0';

if（JudgeSame（tmp）==true）

continue;

tem=（structGoodNode*）malloc（sizeof（structGoodNode））;

strcpy（tem->Goods,tmp）;

tem->sup=0;

AddTail（tem）;

}

voidprintTall（）

{

structGoodNode*tmp;

tmp=TALL.head;

while（tmp!

=NULL）

{

printf（"%s\t%d\n",tmp->Goods,tmp->sup）;

tmp=tmp->next;

}

初始化商店的商品信息

voidInitShop（）

{

strcpy（AllElectronics[0].TID,"T100"）;

strcpy（AllElectronics[0].Goods,"ABE"）;

strcpy（AllElectronics[1].TID,"T200"）;

strcpy（AllElectronics[1].Goods,"BD"）;

strcpy（AllElectronics[2].TID,"T300"）;

strcpy（AllElectronics[2].Goods,"BC"）;

strcpy（AllElectronics[3].TID,"T400"）;

strcpy（AllElectronics[3].Goods,"ABD"）;

strcpy（AllElectronics[4].TID,"T500"）;

strcpy（AllElectronics[4].Goods,"AC"）;

strcpy（AllElectronics[5].TID,"T600"）;

strcpy（AllElectronics[5].Goods,"BC"）;

strcpy（AllElectronics[6].TID,"T700"）;

strcpy（AllElectronics[6].Goods,"AC"）;

strcpy（AllElectronics[7].TID,"T800"）;

strcpy（AllElectronics[7].Goods,"ABCE"）;

strcpy（AllElectronics[8].TID,"T900"）;

strcpy（AllElectronics[8].Goods,"ABC"）;

}

intmain（）

{

TALL.head=NULL;

TALL.tail=NULL;

TALL.len=0;

InitShop（）;

inti,j=0;

printf（"商店信息\n"）;

printf（"商店名\t商品\n"）;

for（i=0;i<9;i++）

{

printf（"%s\t",AllElectronics[i].TID）;

printf（"%s\t",AllElectronics[i].Goods）;

printf（"\n"）;

}

printf（"第一次产生频繁项集\n"）;

printf（"项集\t支持度\n"）;

ProduceList（）;

printTall（）;

printf（"剔除支持度小于\n"）;

DeleteNode

（2）;

printTall（）;

printf（"第二次产生频繁项集\n"）;

printf（"项集\t支持度\n"）;

ProduceListA（）;

AddSup（）;

printTall（）;

printf（"第三次产生频繁项集\n"）;

printf（"项集\t支持度\n"）;

ProduceListB（）;

AddSup（）;

printTall（）;

return0;

}

实验总结

在这次实验中的难点是：

关联规则挖掘的第二阶段——产生关联规则（AssociationRules）。

从高频项目组产生关联规则，是利用前一步骤的高频k-项目组来产生规则，在最小信赖度（MinimumConfidence）的条件门槛下，经由高频k-项目组{A,B}所产生的规则AB，其信赖度可经由公式求得，若数据结构设置的不好，后面的计算量将不是局部的改变，而是伴随后面所有的候选项目集。

所以我的想法是纵方向设置一个项目集链表，横方向是字符串型的数组。

后面的项目集有前一个的候选项目集产生，所以链表会不断变化，最后就得出了大项目集。

这次实验花费了很长时间，也学到了很多知识，很感谢吕品老师的悉心指导和同学们的热情帮助，让我按时完成了这次实验。

展开阅读全文

0805030115 梅超亮 数据挖掘 Apriori算法.docx

0805030115 梅超亮数据挖掘 Apriori算法.docx