Apriori算法Word下载.docx
《Apriori算法Word下载.docx》由会员分享,可在线阅读,更多相关《Apriori算法Word下载.docx(14页珍藏版)》请在冰点文库上搜索。
![Apriori算法Word下载.docx](https://file1.bingdoc.com/fileroot1/2023-5/1/d4a76f0f-772e-4b35-82a2-1bc50b19f929/d4a76f0f-772e-4b35-82a2-1bc50b19f9291.gif)
usingnamespacestd;
typedefstructItem//只有一个词的频繁项
{
stringsItem;
intiSupport;
}ITEM;
typedefvector<
VEC_STR;
VEC_STR>
VEC_VEC_STR;
typedefstructMultiItem//高层的频繁项
VEC_STRvsItem;
}MULTIITEM;
ITEM>
VEC_ITEM;
//只有一个词的频繁项集合
MULTIITEM>
VEC_MULTIITEM;
//高层的频繁项集合
typedefmap<
string,int>
MAP_STR_INT;
//存储词语及其出现频率
voidreadFile(ifstream&
conststring&
VEC_STR&
);
voidcountWord(VEC_STR*,MAP_STR_INT&
constcharseparator='
\\'
voidgenerateLevel1Set(MAP_STR_INT*,VEC_ITEM&
voidgenerateLevel2(VEC_ITEM*,VEC_MULTIITEM&
voidcycGenerator(VEC_MULTIITEM*,VEC_STR&
ofstream&
voidgenerateHighLevelSet(VEC_MULTIITEM*,VEC_MULTIITEM&
voidgenerateInitialHigh(VEC_MULTIITEM*,VEC_VEC_STR&
voidpruning(VEC_VEC_STR*,VEC_MULTIITEM*,VEC_MULTIITEM&
boolfind(VEC_MULTIITEM*,VEC_STR*);
voidcountSupport(VEC_STR*,VEC_MULTIITEM&
voidgenerateFrequentSet(VEC_MULTIITEM*,VEC_MULTIITEM&
voidprintFrequentSet(VEC_ITEM*,ostream&
os=cout);
voidprintFrequentSet(VEC_MULTIITEM*,ostream&
constintMINSUPPORT=2;
//最小支持度
intmain()
//从源文件读取数据
ifstreaminfile;
VEC_STRvs_word;
readFile(infile,"
input.txt"
vs_word);
infile.close();
//计算所有词语的出现频率
MAP_STR_INTword_count;
countWord(&
vs_word,word_count);
//生成单个词语的频繁项集合
VEC_ITEMlevel1Set;
generateLevel1Set(&
word_count,level1Set);
//生成具有两个词语的频繁项集合
VEC_MULTIITEMlevel2,level2Set;
generateLevel2(&
level1Set,level2);
countSupport(&
vs_word,level2);
generateFrequentSet(&
level2,level2Set);
//生成具有三个词语的频繁项集合
VEC_MULTIITEMlevel3Set;
generateHighLevelSet(&
level2Set,level3Set,vs_word);
//输出单个词的频繁项到文件
ofstreamoutfile;
outfile.open("
out.txt"
if(!
outfile)
cout<
<
"
不能打开文件!
endl;
printFrequentSet(&
level1Set,outfile);
//循环产生高层的频繁项集合并输出到文件
cycGenerator(&
level2Set,vs_word,outfile);
cout<
OK!
return0;
}
/**从源文件读取词语
*每一行作为一个字符串存入向量中
*/
infile,conststring&
filename,VEC_STR&
vs_word)
infile.clear();
infile.open(filename.c_str());
infile)
Unabletoopenthisfile!
stringword;
while(getline(infile,word))
vs_word.push_back(word);
/**计算每个词语的支持度
*从字符串中提取出所有词语,与其支持度一道存入map中
voidcountWord(VEC_STR*vs_word,MAP_STR_INT&
word_count,constcharseparator)
stringsentence,word;
for(unsignedinti=0;
i<
vs_word->
size();
++i)
{
sentence=(*vs_word)[i];
while(sentence.find(separator)!
=-1)
{
word=sentence.substr(0,sentence.find(separator));
++word_count[word];
sentence=sentence.substr(sentence.find(separator)+1,sentence.size()-1);
}
++word_count[sentence];
}
/**找出频繁1项集的集合
voidgenerateLevel1Set(MAP_STR_INT*pWord_Count,VEC_ITEM&
level1Set)
ITEMitem;
MAP_STR_INT:
:
const_iteratormap_it=pWord_Count->
begin();
while(map_it!
=pWord_Count->
end())
if(map_it->
second>
=MINSUPPORT)
item.sItem=map_it->
first;
item.iSupport=map_it->
second;
level1Set.push_back(item);
++map_it;
/**由频繁1项集生成初始2项集
voidgenerateLevel2(VEC_ITEM*pLevel1Set,VEC_MULTIITEM&
initialLevel2)
VEC_STRvsTemp;
MULTIITEMmultiTemp;
unsignedintlevel1SetSize=pLevel1Set->
level1SetSize-1;
vsTemp.push_back((*pLevel1Set)[i].sItem);
for(unsignedintj=i+1;
j<
level1SetSize;
++j)
vsTemp.push_back((*pLevel1Set)[j].sItem);
multiTemp.vsItem=vsTemp;
multiTemp.iSupport=0;
initialLevel2.push_back(multiTemp);
vsTemp.pop_back();
vsTemp.clear();
/**循环产生频繁项集合并输出
voidcycGenerator(VEC_MULTIITEM*pLowLevelSet,VEC_STR&
vs_word,ofstream&
os)
VEC_MULTIITEMhighLevelSet,setTemp;
printFrequentSet(pLowLevelSet,os);
while(pLowLevelSet->
size()!
=0)
setTemp.clear();
generateHighLevelSet(pLowLevelSet,setTemp,vs_word);
highLevelSet=setTemp;
printFrequentSet(&
highLevelSet,os);
pLowLevelSet=&
highLevelSet;
/**由低层的频繁项集生成高层的频繁项集合
voidgenerateHighLevelSet(VEC_MULTIITEM*pLowLevelSet,VEC_MULTIITEM&
highLevelSet,VEC_STR&
VEC_VEC_STRvvsTemp;
VEC_MULTIITEMvmiTemp;
generateInitialHigh(pLowLevelSet,vvsTemp);
pruning(&
vvsTemp,pLowLevelSet,vmiTemp);
vs_word,vmiTemp);
vmiTemp,highLevelSet);
/**从低层的频繁项集生成初始的高层项集合
voidgenerateInitialHigh(VEC_MULTIITEM*pLowLevelSet,VEC_VEC_STR&
highLevelSet)
unsignedintlevel1SetSize=pLowLevelSet->
unsignedintk=0;
for(;
k<
(*pLowLevelSet)[i].vsItem.size()-1;
++k)
{
if((*pLowLevelSet)[i].vsItem[k]==(*pLowLevelSet)[j].vsItem[k])
vsTemp.push_back((*pLowLevelSet)[i].vsItem[k]);
else
break;
}
if(k==(*pLowLevelSet)[i].vsItem.size()-1)
vsTemp.push_back((*pLowLevelSet)[i].vsItem[k]);
vsTemp.push_back((*pLowLevelSet)[j].vsItem[k]);
highLevelSet.push_back(vsTemp);
vsTemp.clear();
/**剪枝步
voidpruning(VEC_VEC_STR*pInitialSet,VEC_MULTIITEM*pLowLevelSet,VEC_MULTIITEM&
prunedSet)
MULTIITEMmiTemp;
pInitialSet->
unsignedintj=0;
unsignedintsizeI=(*pInitialSet)[i].size();
for(;
sizeI;
for(unsignedintk=0;
if(k!
=j)
vsTemp.push_back((*pInitialSet)[i][k]);
if(!
find(pLowLevelSet,&
vsTemp))
break;
if(j==sizeI)
miTemp.vsItem=(*pInitialSet)[i];
miTemp.iSupport=0;
prunedSet.push_back(miTemp);
/**在低层的频繁项集中查询高层的初始频繁项的所有子集的函数
boolfind(VEC_MULTIITEM*pLowSet,VEC_STR*pSubSet)
pLowSet->
unsignedintsizeI=(*pLowSet)[i].vsItem.size();
if((*pLowSet)[i].vsItem[j]!
=(*pSubSet)[j])
returntrue;
returnfalse;
/**计算生成的初始频繁项集中各项的支持度
voidcountSupport(VEC_STR*pVs_Word,VEC_MULTIITEM&
initialSet)
intflag;
pVs_Word->
for(unsignedintj=0;
initialSet.size();
flag=1;
initialSet[j].vsItem.size();
if((*pVs_Word)[i].find(initialSet[j].vsItem[k],0)==-1)
{
flag=0;
}
if(flag==1)
++initialSet[j].iSupport;
/**从初始项集合中提取出频繁项集合
voidgenerateFrequentSet(VEC_MULTIITEM*pInitialSet,VEC_MULTIITEM&
frequentSet)
if((*pInitialSet)[i].iSupport>
frequentSet.push_back((*pInitialSet)[i]);
/**打印一项频繁集合
voidprintFrequentSet(VEC_ITEM*pLevel1Set,ostream&
//os<
频繁项"
;
支持度\n"
pLevel1Set->
os<
(*pLevel1Set)[i].sItem<
\t"
(*pLevel1Set)[i].iSupport<
os<
/**打印高层频繁项集合
voidprintFrequentSet(VEC_MULTIITEM*pFrequentSet,ostream&
pFrequentSet->
unsignedintj=0;
(*pFrequentSet)[i].vsItem.size()-1;
os<
(*pFrequentSet)[i].vsItem[j]<
&
(*pFrequentSet)[i].iSupport<
运行结果截图