汉字字频统计Word文件下载.docx
《汉字字频统计Word文件下载.docx》由会员分享,可在线阅读,更多相关《汉字字频统计Word文件下载.docx(11页珍藏版)》请在冰点文库上搜索。
![汉字字频统计Word文件下载.docx](https://file1.bingdoc.com/fileroot1/2023-5/8/17a098b4-dcca-452c-a276-a1b4bad09807/17a098b4-dcca-452c-a276-a1b4bad098071.gif)
for(inti=0;
i<
numlist.size();
i++){
sum=sum+(Integer)numlist.get(i);
}
--------------------显示结果-------------------"
//返回指定个数的汉字频率统计结果
ArrayListfreqlist=frequency(chlist,numlist,sum,100);
//计算熵值
floatsh=entropy(freqlist);
//计算指定个汉字的字频总和
floatfre1=freqSum(freqlist,1);
floatfre2=freqSum(freqlist,20);
floatfre3=freqSum(freqlist,100);
floatfre4=freqSum(freqlist,600);
floatfre5=freqSum(freqlist,2000);
floatfre6=freqSum(freqlist,3000);
floatfre7=freqSum(freqlist,6000);
ArrayListfreal=newArrayList();
freal.add(fre1);
freal.add(fre2);
freal.add(fre3);
freal.add(fre4);
freal.add(fre5);
freal.add(fre6);
freal.add(fre7);
ArrayListnal=newArrayList();
nal.add
(1);
nal.add(20);
nal.add(100);
nal.add(600);
nal.add(2000);
nal.add(3000);
nal.add(6000);
--------------------程序结束-------------------"
//生成Excel的类
try{
//打开文件
WritableWorkbookbook=Workbook.createWorkbook(newFile("
统计结果.xls"
));
//生成工作表,参数0表示这是第一页
WritableSheetsheet=book.createSheet(sum+"
字"
0);
/*
*生成一个保存数字的单元格 必须使用Number的完整包路径,否则有语法歧义
*/
//表头
Labellabel1=newLabel(0,0,"
字符"
sheet.addCell(label1);
Labellabel2=newLabel(1,0,"
频率"
sheet.addCell(label2);
for(inti=0;
i<
100;
i++)
{
//中文字符
Labellabel=newLabel(0,i+1,chlist.get(i).toString());
sheet.addCell(label);
//出现的频率
jxl.write.Numbernumber=newjxl.write.Number(1,i+1,(Float)freqlist.get(i));
sheet.addCell(number);
}
//写入熵值
Labellsh=newLabel(0,101,"
熵值"
sheet.addCell(lsh);
jxl.write.Numbernsh=newjxl.write.Number(1,101,sh);
sheet.addCell(nsh);
//写入字频总和
freal.size();
if((Float)freal.get(i)!
=0f)
{
Labellfreq=newLabel(0,102+i,"
前"
+nal.get(i).toString()+"
个汉字字频总和"
sheet.addCell(lfreq);
jxl.write.Numbernfreq=newjxl.write.Number(1,102+i,(Float)freal.get(i));
sheet.addCell(nfreq);
}
//写入数据
book.write();
//并关闭文件
book.close();
}catch(Exceptione){
System.out.println(e);
}
publicstaticArrayListreadFromTable(Stringfilename){
ArrayListchlist=newArrayList();
Filefile=newFile(filename);
Readerreader=null;
//一次读一个字符
reader=newInputStreamReader(newFileInputStream(file));
inttempint;
while((tempint=reader.read())!
=-1){
//判断读到的字符是否是中文
if((tempint>
='
\u4e00'
&
&
tempint<
\u9fa5'
)
||(tempint>
\uf900'
\ufa2d'
)){
chartempchar=(char)tempint;
//System.out.println(tempchar);
//System.out.println("
list.size:
+chlist.size());
//判断该字符是否出现过
inti=0;
for(i=0;
chlist.size();
//一旦重复,跳出循环
charc='
'
;
Objectob=chlist.get(i);
if(obinstanceofCharacter){
c=(Character)ob;
}
//System.out.println("
c:
+c);
if(tempchar==c){
//System.out.println("
重复!
break;
}
//字符从未出现过
if(i==chlist.size()){
新字符!
chlist.add(tempchar);
reader.close();
e.printStackTrace();
returnchlist;
/**
*该函数用于从文件中读取中文字符,并返回它出现的次数
*
*@paramfilename
*@return
*/
publicstaticArrayListreadFromFile(Stringfilename,ArrayListchtable){
ArrayListnumlist=newArrayList();
//初始化字符出现的次数集合
chtable.size();
numlist.add(0);
intsum=0;
//判断该字符是否在字表里
//在字表里,统计重复次数并跳出循环
Objectob=chtable.get(i);
intnum=(Integer)numlist.get(i)+1;
numlist.set(i,num);
returnnumlist;
*该函数用来对汉字出现的次数进行从大到小的排序,返回排序结果
*@paramchlist
*@paramnumlist
publicstaticArrayListsort(ArrayListchtable,ArrayListnumlist){
ArrayListchlist=chtable;
for(intj=i+1;
j<
j++){
intlisti=(Integer)numlist.get(i);
intlistj=(Integer)numlist.get(j);
if(listi<
listj){
numlist.set(i,listj);
numlist.set(j,listi);
charchi=(Character)chlist.get(i);
charchj=(Character)chlist.get(j);
chlist.set(i,chj);
chlist.set(j,chi);
*该函数用来计算各个汉字出现的频率,并且显示出指定个数的结果
*@paramsum
*@paramcount
publicstaticArrayListfrequency(ArrayListchlist,ArrayListnumlist,intsum,
intcount){
ArrayListfreqlist=newArrayList();
//计算频率
for(intj=0;
floatfreq=(Integer)numlist.get(j)/(float)sum;
freqlist.add(freq);
//按指定格式输出(保留6位有效数字)
freqlist.size()&
count;
System.out.println("
字符:
+chlist.get(j));
出现次数:
+numlist.get(j));
频率:
+freqlist.get(j));
------------"
中文字符总数:
+sum);
returnfreqlist;
*该函数用来计算熵值
*@paramfreqlist
publicstaticfloatentropy(ArrayListfreqlist){
floatsum=0f;
freqlist.size();
floatfreq=(Float)freqlist.get(i);
if(freq!
=0)//规定0long0=0;
sum+=freq*(Math.log((double)freq)/Math.log((double)2));
DecimalFormatdf=newDecimalFormat("
0.000000"
//格式化小数,不足的补0
Stringresult=df.format(-sum);
//返回的是String类型的
floatsh=Float.parseFloat(result);
熵值为:
+result);
returnsh;
*计算指定个数的汉字字频总和
publicstaticfloatfreqSum(ArrayListfreqlist,intcount){
floatfreqsum=0f;
if(count>
freqlist.size())
return0f;
freqsum+=(Float)freqlist.get(i);
+count+"
个汉字字频总和为:
+freqsum);
returnfreqsum;
}