LZSS压缩算法C语言实现.docx
《LZSS压缩算法C语言实现.docx》由会员分享,可在线阅读,更多相关《LZSS压缩算法C语言实现.docx(17页珍藏版)》请在冰点文库上搜索。
LZSS压缩算法C语言实现
/*----------------------------------------------------------------------------
LZSS.C--ADataCompressionProgram
4/6/1989HaruhikoOkumura
Use,distribute,andmodifythisprogramfreely.
Pleasesendmeyourimprovedversions.
PC-VANSCIENCE
NIFTY-ServePAF01022
CompuServe74050,1022
SomechangesmadeJune,2003byChrisGiese
,
-ChangedFfrom16to18,forcompatabilitywithMicrosoftCOMPRESS/EXPAND
EXPAND.EXEisontheinstalldisksforMS-DOSversion6andWindows3.1
COMPRESS.EXEisintheWin3.1SDK,andcomeswithBorlandC++3.1
-Changedcompress/expandcharsoncommandlinefrome/dtoc/u
-Wherepossibleandcorrect,changedintstounsigned
-Madeallfunctionsstatic
-Changedformatting,indenting,globalvariablenames,andfunctionnames
-Triedtosimplify/clarifycodeinsomeareas
----------------------------------------------------------------------------*/
#include
#include
#include
#include
/*sizeofringbuffer*/
#defineN4096
/*indexforrootofbinarysearchtrees*/
#defineNILN
/*upperlimitforg_match_len.Changedfrom18to16forbinary
compatabilitywithMicrosoftCOMPRESS.EXEandEXPAND.EXE
#defineF18*/
#defineF16
/*encodestringintopositionandlength
ifmatch_lengthisgreaterthanthis:
*/
#defineTHRESHOLD2
/*theseassumelittle-endianCPUlikeIntelx86
--needbyte-swapfunctionforbigendianCPU*/
#defineREAD_LE32(X)*(uint32_t*)(X)
#defineWRITE_LE32(X,Y)*(uint32_t*)(X)=(Y)
/*thisassumessizeof(long)==4*/
typedefunsignedlonguint32_t;
/*text(input)sizecounter,code(output)sizecounter,
andcounterforreportingprogressevery1Kbytes*/
staticunsignedlongg_text_size,g_code_size,g_print_count;
/*ringbufferofsizeN,withextraF-1bytes
tofacilitatestringcomparison*/
staticunsignedcharg_ring_buffer[N+F-1];
/*positionandlengthoflongestmatch;setbyinsert_node()*/
staticunsignedg_match_pos,g_match_len;
/*left&rightchildren&parent--theseconstitutebinarysearchtree*/
staticunsignedg_left_child[N+1],g_right_child[N+257],g_parent[N+1];
/*input&outputfiles*/
staticFILE*g_infile,*g_outfile;
/*****************************************************************************
initializetrees
*****************************************************************************/
staticvoidinit_tree(void)
{
unsignedi;
/*Fori=0toN-1,g_right_child[i]andg_left_child[i]willbetherightand
leftchildrenofnodei.Thesenodesneednotbeinitialized.
Also,g_parent[i]istheparentofnodei.Theseareinitializedto
NIL(=N),whichstandsfor'notused.'
Fori=0to255,g_right_child[N+i+1]istherootofthetree
forstringsthatbeginwithcharacteri.Theseareinitialized
toNIL.Notethereare256trees.*/
for(i=N+1;i<=N+256;i++)
g_right_child[i]=NIL;
for(i=0;ig_parent[i]=NIL;
}
/*****************************************************************************
InsertsstringoflengthF,g_ring_buffer[r..r+F-1],intooneofthe
trees(g_ring_buffer[r]'thtree)andreturnsthelongest-matchposition
andlengthviatheglobalvariablesg_match_posandg_match_len.
Ifg_match_len=F,thenremovestheoldnodeinfavorofthenew
one,becausetheoldonewillbedeletedsooner.
Noterplaysdoublerole,astreenodeandpositioninbuffer.
*****************************************************************************/
staticvoidinsert_node(intr)
{
unsignedchar*key;
unsignedi,p;
intcmp;
cmp=1;
key=&g_ring_buffer[r];
p=N+1+key[0];
g_right_child[r]=g_left_child[r]=NIL;
g_match_len=0;
while
(1)
{
if(cmp>=0)
{
if(g_right_child[p]!
=NIL)
p=g_right_child[p];
else
{
g_right_child[p]=r;
g_parent[r]=p;
return;
}
}
else
{
if(g_left_child[p]!
=NIL)
p=g_left_child[p];
else
{
g_left_child[p]=r;
g_parent[r]=p;
return;
}
}
for(i=1;i{
cmp=key[i]-g_ring_buffer[p+i];
if(cmp!
=0)
break;
}
if(i>g_match_len)
{
g_match_pos=p;
g_match_len=i;
if(g_match_len>=F)
break;
}
}
g_parent[r]=g_parent[p];
g_left_child[r]=g_left_child[p];
g_right_child[r]=g_right_child[p];
g_parent[g_left_child[p]]=r;
g_parent[g_right_child[p]]=r;
if(g_right_child[g_parent[p]]==p)
g_right_child[g_parent[p]]=r;
else
g_left_child[g_parent[p]]=r;
g_parent[p]=NIL;/*removep*/
}
/*****************************************************************************
deletesnodepfromtree
*****************************************************************************/
staticvoiddelete_node(unsignedp)
{
unsignedq;
if(g_parent[p]==NIL)
return;/*notintree*/
if(g_right_child[p]==NIL)
q=g_left_child[p];
elseif(g_left_child[p]==NIL)
q=g_right_child[p];
else
{
q=g_left_child[p];
if(g_right_child[q]!
=NIL)
{
doq=g_right_child[q];
while(g_right_child[q]!
=NIL);
g_right_child[g_parent[q]]=g_left_child[q];
g_parent[g_left_child[q]]=g_parent[q];
g_left_child[q]=g_left_child[p];
g_parent[g_left_child[p]]=q;
}
g_right_child[q]=g_right_child[p];
g_parent[g_right_child[p]]=q;
}
g_parent[q]=g_parent[p];
if(g_right_child[g_parent[p]]==p)
g_right_child[g_parent[p]]=q;
else
g_left_child[g_parent[p]]=q;
g_parent[p]=NIL;
}
/*****************************************************************************
*****************************************************************************/
staticvoidcompress(void)
{
unsignedi,len,r,s,last_match_length,code_buf_ptr;
unsignedcharcode_buf[17],mask;
intc;
init_tree();/*initializetrees*/
/*code_buf[1..16]saveseightunitsofcode,andcode_buf[0]worksas
eightflags,"1"representingthattheunitisanunencodedletter(1byte),
"0"aposition-and-lengthpair(2bytes).Thus,eightunitsrequireatmost
16bytesofcode.*/
code_buf[0]=0;
code_buf_ptr=mask=1;
s=0;
r=N-F;
/*Clearthebufferwithanycharacterthatwillappearoften.*/
memset(g_ring_buffer+s,'',r-s);
/*ReadFbytesintothelastFbytesofthebuffer*/
for(len=0;len{
c=getc(g_infile);
if(c==EOF)
break;
g_ring_buffer[r+len]=c;
}
g_text_size=len;
if(g_text_size==0)/*textofsizezero*/
return;
/*InserttheFstrings,eachofwhichbeginswithoneormore'space'
characters.Notetheorderinwhichthesestringsareinserted.
Thisway,degeneratetreeswillbelesslikelytooccur.*/
for(i=1;i<=F;i++)
insert_node(r-i);
/*Finally,insertthewholestringjustread.Theglobalvariables
g_match_lenandg_match_posareset.*/
insert_node(r);
do
{
/*g_match_lenmaybespuriouslylongneartheendoftext.*/
if(g_match_len>len)
g_match_len=len;
/*Notlongenoughmatch.Sendonebyte.*/
if(g_match_len<=THRESHOLD)
{
g_match_len=1;
code_buf[0]|=mask;/*'sendonebyte'flag*/
code_buf[code_buf_ptr]=g_ring_buffer[r];/*Senduncoded.*/
code_buf_ptr++;
/*Sendpositionandlengthpair.Noteg_match_len>THRESHOLD.*/
}
else
{
code_buf[code_buf_ptr]=(unsignedchar)g_match_pos;
code_buf_ptr++;
code_buf[code_buf_ptr]=(unsignedchar)
(((g_match_pos>>4)&0xF0)|
(g_match_len-(THRESHOLD+1)));
code_buf_ptr++;
}
/*Shiftmaskleftonebit.*/
mask<<=1;
if(mask==0)
{
/*Sendatmost8unitsofcodetogether*/
for(i=0;iputc(code_buf[i],g_outfile);
g_code_size+=code_buf_ptr;
code_buf[0]=0;
code_buf_ptr=mask=1;
}
last_match_length=g_match_len;
for(i=0;i{
c=getc(g_infile);
if(c==EOF)
break;
/*Deleteoldstringsandreadnewbytes*/
delete_node(s);
g_ring_buffer[s]=c;
/*Ifthepositionisneartheendofbuffer,extendthebuffer
tomakestringcomparisoneasier.*/
if(sg_ring_buffer[s+N]=c;
/*Sincethisisaringbuffer,incrementthepositionmoduloN.*/
s=(s+1)&(N-1);
r=(r+1)&(N-1);
/*Registerthestringing_ring_buffer[r..r+F-1]*/
insert_node(r);
}
/*Reportsprogresseachtimetheg_text_sizeexceedsmultiplesof1024.*/
g_text_size+=i;
if(g_text_size>g_print_count)
{
printf("%12ld\r",g_text_size);
g_print_count+=1024;
}
while(i++{
delete_node(s);/*noneedtoread,but*/
s=(s+1)&(N-1);
r=(r+1)&(N-1);
len--;
if(len)
insert_node(r);/*buffermaynotbeempty.*/
}
}while(len>0);/*untillengthofstringtobeprocessediszero*/
/*Sendremainingcode.*/
if(code_buf_ptr>1)
{
for(i=0;iputc(code_buf[i],g_outfile);
g_code_size+=code_buf_ptr;
}
/*Encodingisdone.*/
printf("In:
%ldbytes\n",g_text_size);
printf("Out:
%ldbytes\n",g_code_size);
/*xxx-floating-pointmath:
*/
printf("Out/In:
%.3f\n",(double)g_code_size/g_text_size);
}
/*****************************************************************************
Justthereverseofcompress()
*****************************************************************************/
staticvoiddecompress(void)
{
unsignedr,flags;
intc,i,j,k;
memset(g_ring_buffer,'',N-F);
r=N-F;
for(flags=0;;flags>>=1)
{
/*Getabyte.Foreachbitofthisbyte:
1=copyonebyteliterally,frominputtooutput
0=gettwomorebytesdescribinglength