1、using namespace std;typedef struct Item/只有一个词的频繁项 string sItem; int iSupport;ITEM;typedef vector VEC_VEC_STR;typedef struct MultiItem/高层的频繁项 VEC_STR vsItem;MULTIITEM;ITEM VEC_ITEM;/只有一个词的频繁项集合MULTIITEM VEC_MULTIITEM;/高层的频繁项集合typedef map MAP_STR_INT;/存储词语及其出现频率void readFile(ifstream &, const string &
2、, VEC_STR &);void countWord(VEC_STR *, MAP_STR_INT &, const char separator=void generateLevel1Set(MAP_STR_INT *, VEC_ITEM &void generateLevel2(VEC_ITEM *, VEC_MULTIITEM &void cycGenerator(VEC_MULTIITEM *, VEC_STR &, ofstream &void generateHighLevelSet(VEC_MULTIITEM *, VEC_MULTIITEM &void generateIni
3、tialHigh(VEC_MULTIITEM *, VEC_VEC_STR &void pruning(VEC_VEC_STR *, VEC_MULTIITEM *, VEC_MULTIITEM &bool find(VEC_MULTIITEM *, VEC_STR *);void countSupport(VEC_STR *, VEC_MULTIITEM &void generateFrequentSet(VEC_MULTIITEM *, VEC_MULTIITEM &void printFrequentSet(VEC_ITEM *, ostream &os=cout);void print
4、FrequentSet(VEC_MULTIITEM *, ostream &const int MINSUPPORT = 2;/最小支持度int main() /从源文件读取数据 ifstream infile; VEC_STR vs_word; readFile(infile,input.txt,vs_word); infile.close(); /计算所有词语的出现频率 MAP_STR_INT word_count; countWord(&vs_word, word_count); /生成单个词语的频繁项集合 VEC_ITEM level1Set; generateLevel1Set(&w
5、ord_count, level1Set); /生成具有两个词语的频繁项集合 VEC_MULTIITEM level2, level2Set; generateLevel2(&level1Set, level2); countSupport(&vs_word, level2); generateFrequentSet(&level2, level2Set); /生成具有三个词语的频繁项集合 VEC_MULTIITEM level3Set; generateHighLevelSet(&level2Set, level3Set, vs_word); /输出单个词的频繁项到文件 ofstream o
6、utfile; outfile.open(out.txt if(!outfile) cout不能打开文件!endl; printFrequentSet(&level1Set, outfile); /循环产生高层的频繁项集合并输出到文件 cycGenerator(&level2Set, vs_word, outfile); coutOK! return 0;/*从源文件读取词语*每一行作为一个字符串存入向量中*/infile, const string &filename, VEC_STR &vs_word) infile.clear(); infile.open(filename.c_str(
7、);infile)Unable to open this file! string word; while(getline(infile, word) vs_word.push_back(word);/*计算每个词语的支持度*从字符串中提取出所有词语,与其支持度一道存入map中void countWord(VEC_STR *vs_word, MAP_STR_INT &word_count, const char separator) string sentence,word; for(unsigned int i=0; isize(); +i) sentence = (*vs_word)i;
8、while(sentence.find(separator)!=-1) word = sentence.substr(0,sentence.find(separator); +word_countword; sentence = sentence.substr(sentence.find(separator)+1, sentence.size()-1); +word_countsentence; /*找出频繁1项集的集合void generateLevel1Set(MAP_STR_INT *pWord_Count, VEC_ITEM &level1Set) ITEM item; MAP_STR
9、_INT:const_iterator map_it = pWord_Count-begin(); while(map_it != pWord_Count-end() if(map_it-second = MINSUPPORT) item.sItem = map_it-first; item.iSupport = map_it-second; level1Set.push_back(item); +map_it;/*由频繁1项集生成初始2项集void generateLevel2(VEC_ITEM *pLevel1Set, VEC_MULTIITEM &initialLevel2) VEC_S
10、TR vsTemp; MULTIITEM multiTemp; unsigned int level1SetSize = pLevel1Set-level1SetSize-1; vsTemp.push_back(*pLevel1Set)i.sItem); for(unsigned int j=i+1; jsize() != 0) setTemp.clear(); generateHighLevelSet(pLowLevelSet, setTemp, vs_word); highLevelSet = setTemp; printFrequentSet(&highLevelSet, os); pL
11、owLevelSet = &highLevelSet;/*由低层的频繁项集生成高层的频繁项集合void generateHighLevelSet(VEC_MULTIITEM *pLowLevelSet, VEC_MULTIITEM &highLevelSet, VEC_STR & VEC_VEC_STR vvsTemp; VEC_MULTIITEM vmiTemp; generateInitialHigh(pLowLevelSet, vvsTemp); pruning(&vvsTemp, pLowLevelSet, vmiTemp);vs_word, vmiTemp);vmiTemp, hig
12、hLevelSet);/*从低层的频繁项集生成初始的高层项集合void generateInitialHigh(VEC_MULTIITEM *pLowLevelSet, VEC_VEC_STR &highLevelSet) unsigned int level1SetSize = pLowLevelSet- unsigned int k = 0; for(; k unsigned int j = 0; unsigned int sizeI = (*pInitialSet)i.size(); for(;sizeI; for(unsigned int k=0; if(k!=j) vsTemp.pu
13、sh_back(*pInitialSet)ik); if(!find(pLowLevelSet, &vsTemp) break; if(j=sizeI) miTemp.vsItem = (*pInitialSet)i; miTemp.iSupport = 0; prunedSet.push_back(miTemp);/*在低层的频繁项集中查询高层的初始频繁项的所有子集的函数bool find(VEC_MULTIITEM *pLowSet, VEC_STR *pSubSet)pLowSet- unsigned int sizeI = (*pLowSet)i.vsItem.size(); if(*
14、pLowSet)i.vsItemj != (*pSubSet)j) return true; return false;/*计算生成的初始频繁项集中各项的支持度void countSupport(VEC_STR *pVs_Word, VEC_MULTIITEM &initialSet) int flag;pVs_Word- for(unsigned int j=0;initialSet.size(); flag =1;initialSetj.vsItem.size(); if(*pVs_Word)i.find(initialSetj.vsItemk, 0) = -1) flag = 0; if
15、(flag =1) +initialSetj.iSupport;/*从初始项集合中提取出频繁项集合void generateFrequentSet(VEC_MULTIITEM *pInitialSet, VEC_MULTIITEM &frequentSet) if(*pInitialSet)i.iSupport frequentSet.push_back(*pInitialSet)i);/*打印一项频繁集合void printFrequentSet(VEC_ITEM *pLevel1Set, ostream & /os os(*pLevel1Set)i.sItemt(*pLevel1Set)i.iSupport os unsigned int j=0;(*pFrequentSet)i.vsItem.size()-1; os(*pFrequentSet)i.vsItemj&(*pFrequentSet)i.iSupport运行结果截图
copyright@ 2008-2023 冰点文库 网站版权所有
经营许可证编号:鄂ICP备19020893号-2