sum=sum+(Integer)numlist.get(i);
}
System.out.println("--------------------显示结果-------------------");
//返回指定个数的汉字频率统计结果
ArrayListfreqlist=frequency(chlist,numlist,sum,100);
//计算熵值
floatsh=entropy(freqlist);
//计算指定个汉字的字频总和
floatfre1=freqSum(freqlist,1);
floatfre2=freqSum(freqlist,20);
floatfre3=freqSum(freqlist,100);
floatfre4=freqSum(freqlist,600);
floatfre5=freqSum(freqlist,2000);
floatfre6=freqSum(freqlist,3000);
floatfre7=freqSum(freqlist,6000);
ArrayListfreal=newArrayList();
freal.add(fre1);
freal.add(fre2);
freal.add(fre3);
freal.add(fre4);
freal.add(fre5);
freal.add(fre6);
freal.add(fre7);
ArrayListnal=newArrayList();
nal.add
(1);
nal.add(20);
nal.add(100);
nal.add(600);
nal.add(2000);
nal.add(3000);
nal.add(6000);
System.out.println("--------------------程序结束-------------------");
//生成Excel的类
try{
//打开文件
WritableWorkbookbook=Workbook.createWorkbook(newFile("统计结果.xls"));
//生成工作表,参数0表示这是第一页
WritableSheetsheet=book.createSheet(sum+"字",0);
/*
*生成一个保存数字的单元格 必须使用Number的完整包路径,否则有语法歧义
*/
//表头
Labellabel1=newLabel(0,0,"字符");
sheet.addCell(label1);
Labellabel2=newLabel(1,0,"频率");
sheet.addCell(label2);
for(inti=0;i<100;i++)
{
//中文字符
Labellabel=newLabel(0,i+1,chlist.get(i).toString());
sheet.addCell(label);
//出现的频率
jxl.write.Numbernumber=newjxl.write.Number(1,i+1,(Float)freqlist.get(i));
sheet.addCell(number);
}
//写入熵值
Labellsh=newLabel(0,101,"熵值");
sheet.addCell(lsh);
jxl.write.Numbernsh=newjxl.write.Number(1,101,sh);
sheet.addCell(nsh);
//写入字频总和
for(inti=0;i{
if((Float)freal.get(i)!
=0f)
{
Labellfreq=newLabel(0,102+i,"前"+nal.get(i).toString()+"个汉字字频总和");
sheet.addCell(lfreq);
jxl.write.Numbernfreq=newjxl.write.Number(1,102+i,(Float)freal.get(i));
sheet.addCell(nfreq);
}
}
//写入数据
book.write();
//并关闭文件
book.close();
}catch(Exceptione){
System.out.println(e);
}
}
publicstaticArrayListreadFromTable(Stringfilename){
ArrayListchlist=newArrayList();
Filefile=newFile(filename);
Readerreader=null;
try{
//一次读一个字符
reader=newInputStreamReader(newFileInputStream(file));
inttempint;
while((tempint=reader.read())!
=-1){
//判断读到的字符是否是中文
if((tempint>='\u4e00'&&tempint<='\u9fa5')
||(tempint>='\uf900'&&tempint<='\ufa2d')){
chartempchar=(char)tempint;
//System.out.println(tempchar);
//System.out.println("list.size:
"+chlist.size());
//判断该字符是否出现过
inti=0;
for(i=0;i//一旦重复,跳出循环
charc='';
Objectob=chlist.get(i);
if(obinstanceofCharacter){
c=(Character)ob;
}
//System.out.println("c:
"+c);
if(tempchar==c){
//System.out.println("重复!
");
break;
}
}
//字符从未出现过
if(i==chlist.size()){
//System.out.println("新字符!
");
chlist.add(tempchar);
}
}
}
reader.close();
}catch(Exceptione){
e.printStackTrace();
}
returnchlist;
}
/**
*该函数用于从文件中读取中文字符,并返回它出现的次数
*
*@paramfilename
*@return
*/
publicstaticArrayListreadFromFile(Stringfilename,ArrayListchtable){
Filefile=newFile(filename);
Readerreader=null;
ArrayListnumlist=newArrayList();
//初始化字符出现的次数集合
for(inti=0;inumlist.add(0);
}
try{
//一次读一个字符
reader=newInputStreamReader(newFileInputStream(file));
inttempint;
intsum=0;
while((tempint=reader.read())!
=-1){
//判断读到的字符是否是中文
if((tempint>='\u4e00'&&tempint<='\u9fa5')
||(tempint>='\uf900'&&tempint<='\ufa2d')){
chartempchar=(char)tempint;
//System.out.println(tempchar);
//System.out.println("list.size:
"+chlist.size());
//判断该字符是否在字表里
inti=0;
for(i=0;i//在字表里,统计重复次数并跳出循环
charc='';
Objectob=chtable.get(i);
if(obinstanceofCharacter){
c=(Character)ob;
}
//System.out.println("c:
"+c);
if(tempchar==c){
intnum=(Integer)numlist.get(i)+1;
numlist.set(i,num);
break;
}
}
}
}
reader.close();
}catch(Exceptione){
e.printStackTrace();
}
returnnumlist;
}
/**
*该函数用来对汉字出现的次数进行从大到小的排序,返回排序结果
*
*@paramchlist
*@paramnumlist
*/
publicstaticArrayListsort(ArrayListchtable,ArrayListnumlist){
ArrayListchlist=chtable;
for(inti=0;ifor(intj=i+1;jintlisti=(Integer)numlist.get(i);
intlistj=(Integer)numlist.get(j);
if(listinumlist.set(i,listj);
numlist.set(j,listi);
charchi=(Character)chlist.get(i);
charchj=(Character)chlist.get(j);
chlist.set(i,chj);
chlist.set(j,chi);
}
}
}
returnchlist;
}
/**
*该函数用来计算各个汉字出现的频率,并且显示出指定个数的结果
*
*@paramchlist
*@paramnumlist
*@paramsum
*@paramcount
*/
publicstaticArrayListfrequency(ArrayListchlist,ArrayListnumlist,intsum,
intcount){
ArrayListfreqlist=newArrayList();
//计算频率
for(intj=0;jfloatfreq=(Integer)numlist.get(j)/(float)sum;
freqlist.add(freq);
}
//按指定格式输出(保留6位有效数字)
for(intj=0;jSystem.out.println("字符:
"+chlist.get(j));
System.out.println("出现次数:
"+numlist.get(j));
System.out.println("频率:
"+freqlist.get(j));
System.out.println("------------");
}
System.out.println("中文字符总数:
"+sum);
returnfreqlist;
}
/**
*该函数用来计算熵值
*
*@paramfreqlist
*/
publicstaticfloatentropy(ArrayListfreqlist){
floatsum=0f;
for(inti=0;ifloatfreq=(Float)freqlist.get(i);
if(freq!
=0)//规定0long0=0;
sum+=freq*(Math.log((double)freq)/Math.log((double)2));
}
DecimalFormatdf=newDecimalFormat("0.000000");//格式化小数,不足的补0
Stringresult=df.format(-sum);//返回的是String类型的
floatsh=Float.parseFloat(result);
System.out.println("熵值为:
"+result);
returnsh;
}
/**
*计算指定个数的汉字字频总和
*
*@paramfreqlist
*@paramcount
*/
publicstaticfloatfreqSum(ArrayListfreqlist,intcount){
floatfreqsum=0f;
if(count>freqlist.size())
return0f;
for(inti=0;ifreqsum+=(Float)freqlist.get(i);
}
System.out.println("前"+count+"个汉字字频总和为:
"+freqsum);
returnfreqsum;
}
}