C45算法建立决策树JAVA练习Word格式.docx
《C45算法建立决策树JAVA练习Word格式.docx》由会员分享,可在线阅读,更多相关《C45算法建立决策树JAVA练习Word格式.docx(18页珍藏版)》请在冰点文库上搜索。
>
attributeList){
System.out.println("
当前的DATA为"
);
for(inti=0;
i<
data.size();
i++){ArrayList<
temp=data.get(i);
for(intj=0;
j<
temp.size();
j++){System.out.print(temp.get(j)+"
"
}
System.out.println();
当前的ATTR为"
attributeList.size();
i++){System.out.print(attributeList.get(i)+"
System.out.println("
TreeNodenode=newTreeNode();
Stringresult=InfoGain.IsPure(InfoGain.getTarget(data));
if(result!
=null){node.setNodeName("
leafNode"
node.setTargetFunValue(result);
returnnode;
if(attributeList.size()==0){node.setTargetFunValue(result);
returnnode;
}else{
InfoGaingain=newInfoGain(data,attributeList);
doublemaxGain=0.0;
intattrIndex=-1;
i++){doubletempGain=gain.getGainRatio(i);
if(maxGain<
tempGain){maxGain=tempGain;
attrIndex=i;
选择出的最大增益率属性为:
attributeList.get(attrIndex));
node.setAttributeValue(attributeList.get(attrIndex));
List<
resultData=null;
Map<
String,Long>
attrvalueMap=gain.getAttributeValue(attrIndex);
for(Map.Entry<
String,Long>
entry:
attrvalueMap.entrySet()){
resultData=gain.getData4Value(entry.getKey(),
attrIndex);
if(resultData.size()==0){
leafNode.setNodeName(attributeList.get(attrIndex));
leafNode.setTargetFunValue(result);
leafNode.setAttributeValue(entry.getKey());
for(intj=0;
j<
resultData.size();
j++){
resultData.get(j).remove(attrIndex);
resultAttr=new
(attributeList);
resultAttr.remove(attrIndex);
leafNode=createDT(resultData,resultAttr);
node.getChildTreeNode().add(leafNode);
node.getPathName().add(entry.getKey());
classTreeNode{
privateStringattributeValue;
privateList<
TreeNode>
childTreeNode;
pathName;
privateStringtargetFunValue;
privateStringnodeName;
publicTreeNode(StringnodeName){
this.nodeName=nodeName;
this.childTreeNode=newArrayList<
();
this.pathName=newArrayList<
publicTreeNode(){
publicStringgetAttributeValue(){
returnattributeValue;
publicvoidsetAttributeValue(StringattributeValue){this.attributeValue=attributeValue;
publicList<
getChildTreeNode(){returnchildTreeNode;
publicvoidsetChildTreeNode(List<
childTreeNode){this.childTreeNode=childTreeNode;
publicStringgetTargetFunValue(){
returntargetFunValue;
publicvoidsetTargetFunValue(StringtargetFunValue){this.targetFunValue=targetFunValue;
publicStringgetNodeName(){
returnnodeName;
publicvoidsetNodeName(StringnodeName){this.nodeName=nodeName;
getPathName(){
returnpathName;
publicvoidsetPathName(List<
pathName){this.pathName=pathName;
增益率计算类(取log的时候底用的是e,没用2)
importjava.util.HashMap;
importjava.util.HashSet;
importjava.util.Iterator;
importjava.util.Set;
//C4.5实现
publicclassInfoGain{
data;
attribute;
publicInfoGain(List<
data,List<
attribute){
this.data=newArrayList<
i++){
t=newArrayList<
j++){t.add(temp.get(j));
this.data.add(t);
this.attribute=newArrayList<
for(intk=0;
k<
attribute.size();
k++){this.attribute.add(attribute.get(k));
/*this.data=data;
this.attribute=attribute;
*/
//获得熵
publicdoublegetEntropy(){
Map<
targetValueMap=getTargetValue();
Set<
targetkey=targetValueMap.keySet();
doubleentropy=0.0;
for(Stringkey:
targetkey){
doublep=MathUtils.div((double)targetValueMap.get(key),
(double)data.size());
entropy+=(-1)*p*Math.log(p);
returnentropy;
//获得InfoApublicdoublegetInfoAttribute(intattributeIndex){
attributeValueMap=getAttributeValue(attributeIndex);
doubleinfoA=0.0;
attributeValueMap.entrySet()){
intsize=data.size();
doubleattributeP=MathUtils.div((double)entry.getValue()(double)size);
targetValueMap=getAttributeValueTargetValue(entry.getKey(),attributeIndex);
longtotalCount=0L;
entryValue:
targetValueMap.entrySet()){totalCount+=entryValue.getValue();
}doublevalueSum=0.0;
for(Map.Entry<
entryTargetValue:
targetValueMap.entrySet()){
doublep=MathUtils.div((double)entryTargetValue.getValue(),(double)totalCount);
valueSum+=Math.log(p)*p;
infoA+=(-1)*attributeP*valueSum;
returninfoA;
//得到属性值在决策空间的比例publicMap<
getAttributeValueTargetValue(StringattributeName,intattributeIndex){
targetValueMap=newHashMap<
Iterator<
iterator=data.iterator();
while(iterator.hasNext()){
tempList=iterator.next();
if(attributeName.equalsIgnoreCase(tempList.get(attributeIndex))){
intsize=tempList.size();
Stringkey=tempList.get(size-1);
Longvalue=targetValueMap.get(key);
targetValueMap.put(key,value!
=
returntargetValueMap;
null?
++value:
1L);
//得到属性在决策空间上的数量
publicMap<
getAttributeValue(
intattributeIndex){
attributeValueMap=HashMap<
for(ArrayList<
note:
data){Stringkey=note.get(attributeIndex);
Longvalue=attributeValueMap.get(key);
attributeValueMap.put(key,value!
returnattributeValueMap;
new
getData4Value(StringattrValue,
int
attrIndex){
resultData=newArrayList<
for(;
iterator.hasNext();
){
templist=iterator.next();
if(templist.get(attrIndex).equalsIgnoreCase(attrValue)){ArrayList<
temp=(ArrayList<
)templist.clone();
resultData.add(temp);
returnresultData;
//获得增益率
publicdoublegetGainRatio(intattributeIndex){
returnMathUtils.div(getGain(attributeIndex),getSplitInfo(attributeIndex));
//获得增益量
publicdoublegetGain(intattributeIndex){
returngetEntropy()-getInfoAttribute(attributeIndex);
//得到惩罚因子
publicdoublegetSplitInfo(intattributeIndex){
doublesplitA=0.0;
doubleattributeP=MathUtils.div((double)entry.getValue()
(double)size);
splitA+=attributeP*Math.log(attributeP)*(-1);
returnsplitA;
//得到目标函数在当前集合范围内的离散的值
getTargetValue(){
Stringkey=tempList.get(tempList.size()-1);
Longvalue=targetValueMap.get(key);
targetValueMap.put(key,value!
=null?
1L);
}
//获得TARGET值
publicstaticList<
getTarget(List<
list=newArrayList<
temp:
data){intindex=temp.size()-1;
Stringvalue=temp.get(index);
list.add(value);
returnlist;
//判断当前纯度是否100%
publicstaticStringIsPure(List<
list){
Set<
set=newHashSet<
for(Stringname:
list){set.add(name);
}if(set.size()>
1)returnnull;
iterator=set.iterator();
returniterator.next();
测试类,数据集读取以上的分别放到2个List中。
importC45Test.DecisionTree.TreeNode;
publicclassMainC45{
privatestaticfinalList<
dataList=ArrayList<
attributeList=new
publicstaticvoidmain(Stringargs[]){
DecisionTreedt=newDecisionTree();
TreeNodenode=dt.createDT(configData(),configAttribute());
大数运算工具类
importjava.math.BigDecimal;
publicabstractclassMathUtils{
//默认余数长度
privatestaticfinalintDIV_SCALE=10;
//受限于DOUBLE长度
publicstaticdoubleadd(doublevalue1,doublevalue2){
BigDecimalbig1=newBigDecimal(String.valueOf(value1));
BigDecimalbig2=newBigDecimal(String.valueOf(value2));
returnbig1.add(big2).doubleValue();
//大数加法
publicstaticdoubleadd(Stringvalue1,Stringvalue2){
BigDecimalbig1=newBigDecimal(value1);
BigDecimalbig2=newBigDecimal(value2);
publicstaticdoublediv(doublevalue1,doublevalue2){
BigD