LZSS压缩算法C语言实现Word格式.docx
《LZSS压缩算法C语言实现Word格式.docx》由会员分享,可在线阅读,更多相关《LZSS压缩算法C语言实现Word格式.docx(17页珍藏版)》请在冰点文库上搜索。
#defineNILN
/*upperlimitforg_match_len.Changedfrom18to16forbinary
compatabilitywithMicrosoftCOMPRESS.EXEandEXPAND.EXE
#defineF18*/
#defineF16
/*encodestringintopositionandlength
ifmatch_lengthisgreaterthanthis:
*/
#defineTHRESHOLD2
/*theseassumelittle-endianCPUlikeIntelx86
--needbyte-swapfunctionforbigendianCPU*/
#defineREAD_LE32(X)*(uint32_t*)(X)
#defineWRITE_LE32(X,Y)*(uint32_t*)(X)=(Y)
/*thisassumessizeof(long)==4*/
typedefunsignedlonguint32_t;
/*text(input)sizecounter,code(output)sizecounter,
andcounterforreportingprogressevery1Kbytes*/
staticunsignedlongg_text_size,g_code_size,g_print_count;
/*ringbufferofsizeN,withextraF-1bytes
tofacilitatestringcomparison*/
staticunsignedcharg_ring_buffer[N+F-1];
/*positionandlengthoflongestmatch;
setbyinsert_node()*/
staticunsignedg_match_pos,g_match_len;
/*left&
rightchildren&
parent--theseconstitutebinarysearchtree*/
staticunsignedg_left_child[N+1],g_right_child[N+257],g_parent[N+1];
/*input&
outputfiles*/
staticFILE*g_infile,*g_outfile;
/*****************************************************************************
initializetrees
*****************************************************************************/
staticvoidinit_tree(void)
{
unsignedi;
/*Fori=0toN-1,g_right_child[i]andg_left_child[i]willbetherightand
leftchildrenofnodei.Thesenodesneednotbeinitialized.
Also,g_parent[i]istheparentofnodei.Theseareinitializedto
NIL(=N),whichstandsfor'
notused.'
Fori=0to255,g_right_child[N+i+1]istherootofthetree
forstringsthatbeginwithcharacteri.Theseareinitialized
toNIL.Notethereare256trees.*/
for(i=N+1;
i<
=N+256;
i++)
g_right_child[i]=NIL;
for(i=0;
N;
g_parent[i]=NIL;
}
InsertsstringoflengthF,g_ring_buffer[r..r+F-1],intooneofthe
trees(g_ring_buffer[r]'
thtree)andreturnsthelongest-matchposition
andlengthviatheglobalvariablesg_match_posandg_match_len.
Ifg_match_len=F,thenremovestheoldnodeinfavorofthenew
one,becausetheoldonewillbedeletedsooner.
Noterplaysdoublerole,astreenodeandpositioninbuffer.
staticvoidinsert_node(intr)
unsignedchar*key;
unsignedi,p;
intcmp;
cmp=1;
key=&
g_ring_buffer[r];
p=N+1+key[0];
g_right_child[r]=g_left_child[r]=NIL;
g_match_len=0;
while
(1)
{
if(cmp>
=0)
{
if(g_right_child[p]!
=NIL)
p=g_right_child[p];
else
{
g_right_child[p]=r;
g_parent[r]=p;
return;
}
}
else
if(g_left_child[p]!
p=g_left_child[p];
g_left_child[p]=r;
for(i=1;
F;
cmp=key[i]-g_ring_buffer[p+i];
if(cmp!
break;
if(i>
g_match_len)
g_match_pos=p;
g_match_len=i;
if(g_match_len>
=F)
}
g_parent[r]=g_parent[p];
g_left_child[r]=g_left_child[p];
g_right_child[r]=g_right_child[p];
g_parent[g_left_child[p]]=r;
g_parent[g_right_child[p]]=r;
if(g_right_child[g_parent[p]]==p)
g_right_child[g_parent[p]]=r;
else
g_left_child[g_parent[p]]=r;
g_parent[p]=NIL;
/*removep*/
deletesnodepfromtree
staticvoiddelete_node(unsignedp)
unsignedq;
if(g_parent[p]==NIL)
return;
/*notintree*/
if(g_right_child[p]==NIL)
q=g_left_child[p];
elseif(g_left_child[p]==NIL)
q=g_right_child[p];
if(g_right_child[q]!
doq=g_right_child[q];
while(g_right_child[q]!
=NIL);
g_right_child[g_parent[q]]=g_left_child[q];
g_parent[g_left_child[q]]=g_parent[q];
g_left_child[q]=g_left_child[p];
g_parent[g_left_child[p]]=q;
g_right_child[q]=g_right_child[p];
g_parent[g_right_child[p]]=q;
g_parent[q]=g_parent[p];
g_right_child[g_parent[p]]=q;
g_left_child[g_parent[p]]=q;
staticvoidcompress(void)
unsignedi,len,r,s,last_match_length,code_buf_ptr;
unsignedcharcode_buf[17],mask;
intc;
init_tree();
/*initializetrees*/
/*code_buf[1..16]saveseightunitsofcode,andcode_buf[0]worksas
eightflags,"
1"
representingthattheunitisanunencodedletter(1byte),
"
0"
aposition-and-lengthpair(2bytes).Thus,eightunitsrequireatmost
16bytesofcode.*/
code_buf[0]=0;
code_buf_ptr=mask=1;
s=0;
r=N-F;
/*Clearthebufferwithanycharacterthatwillappearoften.*/
memset(g_ring_buffer+s,'
'
r-s);
/*ReadFbytesintothelastFbytesofthebuffer*/
for(len=0;
len<
len++)
c=getc(g_infile);
if(c==EOF)
break;
g_ring_buffer[r+len]=c;
g_text_size=len;
if(g_text_size==0)/*textofsizezero*/
/*InserttheFstrings,eachofwhichbeginswithoneormore'
space'
characters.Notetheorderinwhichthesestringsareinserted.
Thisway,degeneratetreeswillbelesslikelytooccur.*/
for(i=1;
=F;
insert_node(r-i);
/*Finally,insertthewholestringjustread.Theglobalvariables
g_match_lenandg_match_posareset.*/
insert_node(r);
do
/*g_match_lenmaybespuriouslylongneartheendoftext.*/
if(g_match_len>
len)
g_match_len=len;
/*Notlongenoughmatch.Sendonebyte.*/
if(g_match_len<
=THRESHOLD)
g_match_len=1;
code_buf[0]|=mask;
/*'
sendonebyte'
flag*/
code_buf[code_buf_ptr]=g_ring_buffer[r];
/*Senduncoded.*/
code_buf_ptr++;
/*Sendpositionandlengthpair.Noteg_match_len>
THRESHOLD.*/
code_buf[code_buf_ptr]=(unsignedchar)g_match_pos;
code_buf[code_buf_ptr]=(unsignedchar)
(((g_match_pos>
>
4)&
0xF0)|
(g_match_len-(THRESHOLD+1)));
/*Shiftmaskleftonebit.*/
mask<
<
=1;
if(mask==0)
/*Sendatmost8unitsofcodetogether*/
for(i=0;
code_buf_ptr;
putc(code_buf[i],g_outfile);
g_code_size+=code_buf_ptr;
code_buf[0]=0;
code_buf_ptr=mask=1;
last_match_length=g_match_len;
for(i=0;
last_match_length;
c=getc(g_infile);
if(c==EOF)
/*Deleteoldstringsandreadnewbytes*/
delete_node(s);
g_ring_buffer[s]=c;
/*Ifthepositionisneartheendofbuffer,extendthebuffer
tomakestringcomparisoneasier.*/
if(s<
F-1)
g_ring_buffer[s+N]=c;
/*Sincethisisaringbuffer,incrementthepositionmoduloN.*/
s=(s+1)&
(N-1);
r=(r+1)&
/*Registerthestringing_ring_buffer[r..r+F-1]*/
insert_node(r);
/*Reportsprogresseachtimetheg_text_sizeexceedsmultiplesof1024.*/
g_text_size+=i;
if(g_text_size>
g_print_count)
printf("
%12ld\r"
g_text_size);
g_print_count+=1024;
while(i++<
last_match_length)/*Aftertheendoftext,*/
/*noneedtoread,but*/
len--;
if(len)
insert_node(r);
/*buffermaynotbeempty.*/
}while(len>
0);
/*untillengthofstringtobeprocessediszero*/
/*Sendremainingcode.*/
if(code_buf_ptr>
1)
putc(code_buf[i],g_outfile);
g_code_size+=code_buf_ptr;
/*Encodingisdone.*/
printf("
In:
%ldbytes\n"
Out:
g_code_size);
/*xxx-floating-pointmath:
Out/In:
%.3f\n"
(double)g_code_size/g_text_size);
Justthereverseofcompress()
staticvoiddecompress(void)
unsignedr,flags;
intc,i,j,k;
memset(g_ring_buffer,'
N-F);
for(flags=0;
;
flags>
=1)
/*Getabyte.Foreachbitofthisbyte:
1=copyonebyteliterally,frominputtooutput
0=gettwomorebytesdescribinglength