if(ch[j]==ch[len]){
tag=0;
break;
}
}
if(tag){//*leaf_num=0不用
leaves[++*leaf_num].c=ch[len];
leaves[*leaf_num].weight=1;
for(k=len+1;ch[k]!
='\0';k++)//在之后的字符串中累计权值
if(ch[len]==ch[k])
leaves[*leaf_num].weight++;//权值累加
}
}
*ch_len=len;//字符串长度
}
3.创建HuffmanTree,并存储
创建:
voidCreateHuffmanTree(Huffmanht,LeafNodew,intn){
inti,j;
ints1,s2;
//初始化哈夫曼树
for(i=1;i<=n;i++){
ht[i].weight=w[i].weight;
ht[i].parent=0;
ht[i].LChild=0;
ht[i].RChild=0;
}
for(i=n+1;i<=2*n-1;i++){
ht[i].weight=0;
ht[i].parent=0;
ht[i].LChild=0;
ht[i].RChild=0;
}
for(i=n+1;i<=2*n-1;i++){
for(j=1;j<=i-1;j++)
if(!
ht[j].parent)
break;
s1=j;//找到第一个双亲为零的结点
for(;j<=i-1;j++)
if(!
ht[j].parent)
s1=ht[s1].weight>ht[j].weight?
j:
s1;
ht[s1].parent=i;
ht[i].LChild=s1;
for(j=1;j<=i-1;j++)
if(!
ht[j].parent)
break;
s2=j;//找到第二个双亲为零的结点
for(;j<=i-1;j++)
if(!
ht[j].parent)
s2=ht[s2].weight>ht[j].weight?
j:
s2;
ht[s2].parent=i;
ht[i].RChild=s2;
ht[i].weight=ht[s1].weight+ht[s2].weight;//权值累加
}
}
存储:
voidsave_HufTree(Huffmanht,LeafNodele,intln){
inti;
FILE*HufTree=Fopen("HufTree.dat","a");
CreateHuffmanTree(ht,le,ln);
printf("\n哈夫曼树\n");
printf("编号权值parentLChildRChild\n");
//fputs("编号|权值|parent|LChild|RChild\n",HufTree);
for(i=1;i<=2*ln-1;i++){/*打印Huffman树的信息*/
printf("%d\t%d\t%d\t%d\t%d\n",
i,(ht)[i].weight,(ht)[i].parent,(ht)[i].LChild,(ht)[i].RChild);
fprintf(HufTree,"%d|%d|%d|%d|%d\n",
i,(ht)[i].weight,(ht)[i].parent,(ht)[i].LChild,(ht)[i].RChild);
}
Fclose(HufTree);
printf("哈弗曼树已保存至HufTree.dat\n");
}
4.读取哈夫曼树至新的结构体数组
voidread_HufTree(Huffmanht){//记得从1开始存储
inti=1,j;
FILE*HufTree=Fopen("HufTree.dat","r");
while((fscanf(HufTree,"%d|%d|%d|%d|%d\n",
&j,&((ht)[i].weight),&((ht)[i].parent),&((ht)[i].LChild),&((ht)[i].RChild)))==5){
//printf("%d\t%d\t%d\t%d\n",(ht)[i].weight,(ht)[i].parent,(ht)[i].LChild,(ht)[i].RChild);
i++;
}
Fclose(HufTree);
printf("已从HufTree.dat读取哈弗曼树\n");
}
5.根据哈夫曼树生成每个叶子结点的编码
voidCrtHuffmanNodeCode(Huffmanht,charch[],HuffmanCodecode_of_leaf,LeafNodeweight,intm,intn){
inti,c,p,start;
char*cd;
cd=(char*)malloc(n*sizeof(char));
cd[n-1]='\0';//末尾置0
for(i=1;i<=n;i++){
start=n-1;//cd串每次从末尾开始
c=i;
p=ht[i].parent;//p在n+1至2n-1
while(p){//沿父亲方向遍历,直到为0
start--;//依次向前置值
if(ht[p].LChild==c)//与左子相同,置0
cd[start]='0';
else//否则置1
cd[start]='1';
c=p;
p=ht[p].parent;
}
weight[i].num=n-start;//二进制码的长度(包含末尾0)
code_of_leaf[i]=(char*)malloc((n-start)*sizeof(char));
strcpy(code_of_leaf[i],&cd[start]);//将二进制字符串拷贝到指针数组code_of_leaf中
}
free(cd);//释放cd内存
}
6.保存每个叶子结点的信息
voidsave_HufCode(Huffmanht,char*ch,HuffmanCodecode_of_leaf,LeafNodeleaves,intch_len,intleaf_num){
inti;
FILE*HufCode=Fopen("HufCode.txt","a");
CrtHuffmanNodeCode(ht,ch,code_of_leaf,leaves,ch_len,leaf_num);//叶子结点的编码
printf("\n每个叶子节点的前缀码\n");//打印叶子结点的编码
for(i=1;i<=leaf_num;i++){
printf("%c:
%s\n",leaves[i].c,code_of_leaf[i]);
fprintf(HufCode,"%c:
%s\n",leaves[i].c,code_of_leaf[i]);
}
Fclose(HufCode);
printf("每个叶子节点的前缀码已保存至HufCode.txt\n");
}
7.读取每个叶子节点的信息到新的字符指针数组
voidread_HufCode(HuffmanCodecode_of_leaf){
inti=1;
charc,tem[10];
FILE*HufCode=Fopen("HufCode.txt","r");
while((fscanf(HufCode,"%c:
%s\n",&c,tem))==2){
intlen=strlen(tem);
code_of_leaf[i]=(char*)malloc(len*sizeof(char));
strcpy(code_of_leaf[i],tem);
//printf("%c:
%s\n",c,code_of_leaf[i]);
i++;
}
Fclose(HufCode);
printf("已从HufCode.txt读取每个叶子节点信息\n");
}
8.生成所有字符的编码
voidCrtHuffmanCode(charch[],HuffmanCodecode_of_leaf,HuffmanCodecode_of_ch,LeafNodeleaves,intleaf_num,intch_len){
inti,k;
for(i=0;ifor(k=1;k<=leaf_num;k++)/*从weight[k].c中查找与ch[i]相等的下标K*/
if(ch[i]==leaves[k].c)
break;
code_of_ch[i]=(char*)malloc((leaves[k].num)*sizeof(char));
strcpy(code_of_ch[i],code_of_leaf[k]);//拷贝二进制编码
}
}
9.保存字符串的编码信息(压缩)
FILE*Fopen(charconst*filename,charconst*mode){
FILE*idlist;
idlist=fopen(filename,mode);
if(idlist==NULL){
perror(filename);
exit(EXIT_FAILURE);
}else{
returnidlist;
}
}
10.解码并保存到str2.txt
voidTrsHuffmanTree(Huffmanht,LeafNodew,HuffmanCodehc,intn,intm){
inti=0,j,p;
FILE*str2=Fopen("str2.txt","a");
printf("\n经解压得原文件中的字符串:
");
while(ip=2*n-1;//从父亲节点向下遍历直到叶子节点
for(j=0;hc[i][j]!
='\0';j++){
if(hc[i][j]=='0')
p=ht[p].LChild;
else
p=ht[p].RChild;
}
printf("%c",w[p].c);/*打印原信息*/
fputc(w[p].c,str2);
i++;
}
Fclose(str2);
printf("\n已将解压得到的字符串保存至str2.txt\n");
}
11.释放huffman编码内存
voidFreeHuffmanCode(HuffmanCodeh1,HuffmanCodeh2,HuffmanCodehc,intleaf_num,intch_len){
inti;
for(i=1;i<=leaf_num;i++){//释放叶子结点的编码
free(h1[i]);
free(h2[i]);
}
for(i=0;ifree(hc[i]);
}
四、运行测试与分析及运行界面
1.文件str1.txt内容:
2.运行程序,读取文件
3.统计叶子节点的权值
4.根据权值生成哈夫曼树,保存至HufTree.dat,并用新的结构体数组读取哈夫曼树
5.HufTree.dat内容
6.根据哈夫曼树生成叶子节点的前缀码,保存至HufCode.txt,之后用新的结构体数组读取HufCode.txt
7.HufCode.txt内容
8.根据前缀码生成哈夫曼编码,保存至CodeFile.dat
9.CodeFile.dat内容
10.根据CodeFile.dat解压,获得原字符串,并保存至str2.txt
11.str2.txt内容
五、实验收获与思考
通过使用哈夫曼树实现文件压缩,使我充分理解哈夫曼树的构造方法以及实际应用,巩固了课堂知识,同时认识到自己的不足。
在编程中发现问题,通过查询求助解决问题,使我不断地我加深数据结构的学习。
六、附录(原程序)
#include
#include
#include
#defineN100
#defineM2*N-1
typedefchar*HuffmanCode[2*M];//haffman编码
typedefstruct{
unsignedintweight;//权值
unsignedintparent,LChild,RChild;
}HTNode,Huffman[M+1];//huffman树
typedefstructNode{
intweight;//叶子结点的权值
charc;//叶子结点
intnum;//叶子结点的二进制码的长度
}LeafNode[N];
//产生叶子结点的字符和权值
voidCreateLeaf(charch[],int*ch_len,LeafNodeleaves,int*leaf_num){
intlen,j,k;
inttag;
*leaf_num=0;//叶子节点个数
//统计字符出现个数,放入CW
for(len=0;ch[len]!
='\0';len++){//遍历字符串ch[]
tag=1;
for(j=0;jif(ch[j]==ch[len]){
tag=0;
break;
}
}
if(tag){//*leaf_num=0不用
leaves[++*leaf_num].c=ch[len];
leaves[*leaf_num].weight=1;
for(k=len+1;ch[k]!
='\0';k++)//在之后的字符串中累计权值
if(ch[len]==ch[k])
leaves[*leaf_num].weight++;//权值累加
}
}
*ch_len=len;//字符串长度
}
//创建HuffmanTree
voidCreateHuffmanTree(Huffmanht,LeafNodew,intn){
inti,j;
ints1,s2;
//初始化哈夫曼树
for(i=1;i<=n;i++){
ht[i].weight=w[i].weight;
ht[i].parent=0;
ht[i].LChild=0;
ht[i].RChild=0;
}
for(i=n+1;i<=2*n-1;i++){
ht[i].weight=0;
ht[i].parent=0;
ht[i].LChild=0;
ht[i].RChild=0;
}
for(i=n+1;i<=2*n-1;i++){
for(j=1;j<=i-1;j++)
if(!
ht[j].parent)
break;
s1=j;//找到第一个双亲为零的结点
for(;j<=i-1;j++)
if(!
ht[j].parent)
s1=ht[s1].weight>ht[j].weight?
j:
s1;
ht[s1].parent=i;
ht[i].LChild=s1;
for(j=1;j<=i-1;j++)
if(!
ht[j].parent)
break;
s2=j;//找到第二个双亲为零的结点
for(;j<=i-1;j++)
if(!
ht[j].parent)
s2=ht[s2].weight>ht[j].weight?
j:
s2;
ht[s2].parent=i;
ht[i].RChild=s2;
ht[i].weight=ht[s1].weight+ht[s2].weight;//权值累加
}
}
//生成每个叶子结点的编码
voidCrtHuffmanNodeCode(Huffmanht,charch[],HuffmanCodecode_of_leaf,LeafNodeweight,intm,intn){
inti,c,p,start;
char*cd;
cd=(char*)malloc(n*sizeof(char));
cd[n-1]='\0';//末尾置0
for(i=1;i<=n;i++){
start=n-1;//cd串每次从末尾开始
c=i;
p=ht[i].parent;//p在n+1至2n-1
while(p){//沿父亲方向遍历,直到为0
start--;//依次向前置值
if(ht[p].LChild==c)//与左子相同,置0
cd[start]='0';
else//否则置1
cd[start]='1';
c=p;
p=ht[p].parent;
}
weight[i].num=n-start;//二进制码的长度(包含末尾0)
code_of_leaf[i]=(char*)malloc((n-start)*sizeof(char));
strcpy(code_of_leaf[i],&cd[start]);//将二进制字符串拷贝到指针数组code_of_leaf中
}
free(cd);//释放cd内存
}
//生成所有字符的编码
voidCrtHuffmanCode(charch[],HuffmanCodecode_of_leaf,HuffmanCodecode_of_ch,LeafNodeleaves,intleaf_num,intch_len){
inti,k;
for(i=0;ifor(k=1;k<=leaf_num;k++)/*从weight[k].c中查找与ch[i]相等的下标K*/
if(ch[i]==leaves[k].c)
break;
code_of_ch[i]=(char*)malloc((leaves[k].num)*sizeof(char));
strcpy(code_of_ch[i],code_of_leaf[k]);//拷贝二进制编码
}
}
FILE*Fopen