wekajava,wekajava教程

本文目錄一覽:

求助 weka 的ID3演算法java源碼

/*

* This program is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

* the Free Software Foundation; either version 2 of the License, or

* (at your option) any later version.

*

* This program is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

* GNU General Public License for more details.

*

* You should have received a copy of the GNU General Public License

* along with this program; if not, write to the Free Software

* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

*/

/*

* Id3.java

* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand

*

*/

package weka.classifiers.trees;

import weka.classifiers.Classifier;

import weka.classifiers.Sourcable;

import weka.core.Attribute;

import weka.core.Capabilities;

import weka.core.Instance;

import weka.core.Instances;

import weka.core.NoSupportForMissingValuesException;

import weka.core.RevisionUtils;

import weka.core.TechnicalInformation;

import weka.core.TechnicalInformationHandler;

import weka.core.Utils;

import weka.core.Capabilities.Capability;

import weka.core.TechnicalInformation.Field;

import weka.core.TechnicalInformation.Type;

import java.util.Enumeration;

/**

!– globalinfo-start —

* Class for constructing an unpruned decision tree based on the ID3 algorithm. Can only deal with nominal attributes. No missing values allowed. Empty leaves may result in unclassified instances. For more information see: br/

* br/

* R. Quinlan (1986). Induction of decision trees. Machine Learning. 1(1):81-106.

* p/

!– globalinfo-end —

*

!– technical-bibtex-start —

* BibTeX:

* pre

* #64;article{Quinlan1986,

* author = {R. Quinlan},

* journal = {Machine Learning},

* number = {1},

* pages = {81-106},

* title = {Induction of decision trees},

* volume = {1},

* year = {1986}

* }

* /pre

* p/

!– technical-bibtex-end —

*

!– options-start —

* Valid options are: p/

*

* pre -D

* If set, classifier is run in debug mode and

* may output additional info to the console/pre

*

!– options-end —

*

* @author Eibe Frank (eibe@cs.waikato.ac.nz)

* @version $Revision: 6404 $

*/

public class Id3

extends Classifier

implements TechnicalInformationHandler, Sourcable {

/** for serialization */

static final long serialVersionUID = -2693678647096322561L;

/** The node’s successors. */

private Id3[] m_Successors;

/** Attribute used for splitting. */

private Attribute m_Attribute;

/** Class value if node is leaf. */

private double m_ClassValue;

/** Class distribution if node is leaf. */

private double[] m_Distribution;

/** Class attribute of dataset. */

private Attribute m_ClassAttribute;

/**

* Returns a string describing the classifier.

* @return a description suitable for the GUI.

*/

public String globalInfo() {

return “Class for constructing an unpruned decision tree based on the ID3 “

+ “algorithm. Can only deal with nominal attributes. No missing values “

+ “allowed. Empty leaves may result in unclassified instances. For more “

+ “information see: \n\n”

+ getTechnicalInformation().toString();

}

/**

* Returns an instance of a TechnicalInformation object, containing

* detailed information about the technical background of this class,

* e.g., paper reference or book this class is based on.

*

* @return the technical information about this class

*/

public TechnicalInformation getTechnicalInformation() {

TechnicalInformation result;

result = new TechnicalInformation(Type.ARTICLE);

result.setValue(Field.AUTHOR, “R. Quinlan”);

result.setValue(Field.YEAR, “1986”);

result.setValue(Field.TITLE, “Induction of decision trees”);

result.setValue(Field.JOURNAL, “Machine Learning”);

result.setValue(Field.VOLUME, “1”);

result.setValue(Field.NUMBER, “1”);

result.setValue(Field.PAGES, “81-106”);

return result;

}

/**

* Returns default capabilities of the classifier.

*

* @return the capabilities of this classifier

*/

public Capabilities getCapabilities() {

Capabilities result = super.getCapabilities();

result.disableAll();

// attributes

result.enable(Capability.NOMINAL_ATTRIBUTES);

// class

result.enable(Capability.NOMINAL_CLASS);

result.enable(Capability.MISSING_CLASS_VALUES);

// instances

result.setMinimumNumberInstances(0);

return result;

}

/**

* Builds Id3 decision tree classifier.

*

* @param data the training data

* @exception Exception if classifier can’t be built successfully

*/

public void buildClassifier(Instances data) throws Exception {

// can classifier handle the data?

getCapabilities().testWithFail(data);

// remove instances with missing class

data = new Instances(data);

data.deleteWithMissingClass();

makeTree(data);

}

/**

* Method for building an Id3 tree.

*

* @param data the training data

* @exception Exception if decision tree can’t be built successfully

*/

private void makeTree(Instances data) throws Exception {

// Check if no instances have reached this node.

if (data.numInstances() == 0) {

m_Attribute = null;

m_ClassValue = Instance.missingValue();

m_Distribution = new double[data.numClasses()];

return;

}

// Compute attribute with maximum information gain.

double[] infoGains = new double[data.numAttributes()];

Enumeration attEnum = data.enumerateAttributes();

while (attEnum.hasMoreElements()) {

Attribute att = (Attribute) attEnum.nextElement();

infoGains[att.index()] = computeInfoGain(data, att);

}

m_Attribute = data.attribute(Utils.maxIndex(infoGains));

// Make leaf if information gain is zero.

// Otherwise create successors.

if (Utils.eq(infoGains[m_Attribute.index()], 0)) {

m_Attribute = null;

m_Distribution = new double[data.numClasses()];

Enumeration instEnum = data.enumerateInstances();

while (instEnum.hasMoreElements()) {

Instance inst = (Instance) instEnum.nextElement();

m_Distribution[(int) inst.classValue()]++;

}

Utils.normalize(m_Distribution);

m_ClassValue = Utils.maxIndex(m_Distribution);

m_ClassAttribute = data.classAttribute();

} else {

Instances[] splitData = splitData(data, m_Attribute);

m_Successors = new Id3[m_Attribute.numValues()];

for (int j = 0; j m_Attribute.numValues(); j++) {

m_Successors[j] = new Id3();

m_Successors[j].makeTree(splitData[j]);

}

}

}

/**

* Classifies a given test instance using the decision tree.

*

* @param instance the instance to be classified

* @return the classification

* @throws NoSupportForMissingValuesException if instance has missing values

*/

public double classifyInstance(Instance instance)

throws NoSupportForMissingValuesException {

if (instance.hasMissingValue()) {

throw new NoSupportForMissingValuesException(“Id3: no missing values, “

+ “please.”);

}

if (m_Attribute == null) {

return m_ClassValue;

} else {

return m_Successors[(int) instance.value(m_Attribute)].

classifyInstance(instance);

}

}

/**

* Computes class distribution for instance using decision tree.

*

* @param instance the instance for which distribution is to be computed

* @return the class distribution for the given instance

* @throws NoSupportForMissingValuesException if instance has missing values

*/

public double[] distributionForInstance(Instance instance)

throws NoSupportForMissingValuesException {

if (instance.hasMissingValue()) {

throw new NoSupportForMissingValuesException(“Id3: no missing values, “

+ “please.”);

}

if (m_Attribute == null) {

return m_Distribution;

} else {

return m_Successors[(int) instance.value(m_Attribute)].

distributionForInstance(instance);

}

}

/**

* Prints the decision tree using the private toString method from below.

*

* @return a textual description of the classifier

*/

public String toString() {

if ((m_Distribution == null) (m_Successors == null)) {

return “Id3: No model built yet.”;

}

return “Id3\n\n” + toString(0);

}

/**

* Computes information gain for an attribute.

*

* @param data the data for which info gain is to be computed

* @param att the attribute

* @return the information gain for the given attribute and data

* @throws Exception if computation fails

*/

private double computeInfoGain(Instances data, Attribute att)

throws Exception {

double infoGain = computeEntropy(data);

Instances[] splitData = splitData(data, att);

for (int j = 0; j att.numValues(); j++) {

if (splitData[j].numInstances() 0) {

infoGain -= ((double) splitData[j].numInstances() /

(double) data.numInstances()) *

computeEntropy(splitData[j]);

}

}

return infoGain;

}

/**

* Computes the entropy of a dataset.

*

* @param data the data for which entropy is to be computed

* @return the entropy of the data’s class distribution

* @throws Exception if computation fails

*/

private double computeEntropy(Instances data) throws Exception {

double [] classCounts = new double[data.numClasses()];

Enumeration instEnum = data.enumerateInstances();

while (instEnum.hasMoreElements()) {

Instance inst = (Instance) instEnum.nextElement();

classCounts[(int) inst.classValue()]++;

}

double entropy = 0;

for (int j = 0; j data.numClasses(); j++) {

if (classCounts[j] 0) {

entropy -= classCounts[j] * Utils.log2(classCounts[j]);

}

}

entropy /= (double) data.numInstances();

return entropy + Utils.log2(data.numInstances());

}

/**

* Splits a dataset according to the values of a nominal attribute.

*

* @param data the data which is to be split

* @param att the attribute to be used for splitting

* @return the sets of instances produced by the split

*/

private Instances[] splitData(Instances data, Attribute att) {

Instances[] splitData = new Instances[att.numValues()];

for (int j = 0; j att.numValues(); j++) {

splitData[j] = new Instances(data, data.numInstances());

}

Enumeration instEnum = data.enumerateInstances();

while (instEnum.hasMoreElements()) {

Instance inst = (Instance) instEnum.nextElement();

splitData[(int) inst.value(att)].add(inst);

}

for (int i = 0; i splitData.length; i++) {

splitData[i].compactify();

}

return splitData;

}

/**

* Outputs a tree at a certain level.

*

* @param level the level at which the tree is to be printed

* @return the tree as string at the given level

*/

private String toString(int level) {

StringBuffer text = new StringBuffer();

if (m_Attribute == null) {

if (Instance.isMissingValue(m_ClassValue)) {

text.append(“: null”);

} else {

text.append(“: ” + m_ClassAttribute.value((int) m_ClassValue));

}

} else {

for (int j = 0; j m_Attribute.numValues(); j++) {

text.append(“\n”);

for (int i = 0; i level; i++) {

text.append(“| “);

}

text.append(m_Attribute.name() + ” = ” + m_Attribute.value(j));

text.append(m_Successors[j].toString(level + 1));

}

}

return text.toString();

}

/**

* Adds this tree recursively to the buffer.

*

* @param id the unqiue id for the method

* @param buffer the buffer to add the source code to

* @return the last ID being used

* @throws Exception if something goes wrong

*/

protected int toSource(int id, StringBuffer buffer) throws Exception {

int result;

int i;

int newID;

StringBuffer[] subBuffers;

buffer.append(“\n”);

buffer.append(” protected static double node” + id + “(Object[] i) {\n”);

// leaf?

if (m_Attribute == null) {

result = id;

if (Double.isNaN(m_ClassValue)) {

buffer.append(” return Double.NaN;”);

} else {

buffer.append(” return ” + m_ClassValue + “;”);

}

if (m_ClassAttribute != null) {

buffer.append(” // ” + m_ClassAttribute.value((int) m_ClassValue));

}

buffer.append(“\n”);

buffer.append(” }\n”);

} else {

buffer.append(” checkMissing(i, ” + m_Attribute.index() + “);\n\n”);

buffer.append(” // ” + m_Attribute.name() + “\n”);

// subtree calls

subBuffers = new StringBuffer[m_Attribute.numValues()];

newID = id;

for (i = 0; i m_Attribute.numValues(); i++) {

newID++;

buffer.append(” “);

if (i 0) {

buffer.append(“else “);

}

buffer.append(“if (((String) i[” + m_Attribute.index()

+ “]).equals(\”” + m_Attribute.value(i) + “\”))\n”);

buffer.append(” return node” + newID + “(i);\n”);

subBuffers[i] = new StringBuffer();

newID = m_Successors[i].toSource(newID, subBuffers[i]);

}

buffer.append(” else\n”);

buffer.append(” throw new IllegalArgumentException(\”Value ‘\” + i[“

+ m_Attribute.index() + “] + \”‘ is not allowed!\”);\n”);

buffer.append(” }\n”);

// output subtree code

for (i = 0; i m_Attribute.numValues(); i++) {

buffer.append(subBuffers[i].toString());

}

subBuffers = null;

result = newID;

}

return result;

}

/**

* Returns a string that describes the classifier as source. The

* classifier will be contained in a class with the given name (there may

* be auxiliary classes),

* and will contain a method with the signature:

* precode

* public static double classify(Object[] i);

* /code/pre

* where the array codei/code contains elements that are either

* Double, String, with missing values represented as null. The generated

* code is public domain and comes with no warranty. br/

* Note: works only if class attribute is the last attribute in the dataset.

*

* @param className the name that should be given to the source class.

* @return the object source described by a string

* @throws Exception if the source can’t be computed

*/

public String toSource(String className) throws Exception {

StringBuffer result;

int id;

result = new StringBuffer();

result.append(“class ” + className + ” {\n”);

result.append(” private static void checkMissing(Object[] i, int index) {\n”);

result.append(” if (i[index] == null)\n”);

result.append(” throw new IllegalArgumentException(\”Null values “

+ “are not allowed!\”);\n”);

result.append(” }\n\n”);

result.append(” public static double classify(Object[] i) {\n”);

id = 0;

result.append(” return node” + id + “(i);\n”);

result.append(” }\n”);

toSource(id, result);

result.append(“}\n”);

return result.toString();

}

/**

* Returns the revision string.

*

* @return the revision

*/

public String getRevision() {

return RevisionUtils.extract(“$Revision: 6404 $”);

}

/**

* Main method.

*

* @param args the options for the classifier

*/

public static void main(String[] args) {

runClassifier(new Id3(), args);

}

}

是用python學數據挖掘好,還是用java學weka的開發好

你熟悉python 就用 python,

你熟悉java 就學weka, weka 一個軟體連續開發20多年,及其成熟而且做到極致,很多都已經能自動化,其擴展版本甚至演算法都能自己選擇,極致到完全不懂的人都可以用。

weka有損自以為高深的人的威嚴和神秘感。很多人特別喜歡反覆找輪子,寧願選擇用匕首去殺敵,即使匕首旁邊有一支子彈上了膛的AK47,也會選擇視而不見。當然如果你要做一些複雜的應用,熟悉java是必須的, 另python 也可以調用weka (好像叫jython)

北大青鳥java培訓:八個最佳的數據中心開源挖掘工具?

數據挖掘,又稱為資料探勘、數據採礦。

它是資料庫知識發現(英語:Knowledge-DiscoveryinDatabases,簡稱:KDD)中的一個步驟,是一個挖掘和分析大量數據並從中提取信息的過程。

其中一些應用包括市場細分-如識別客戶從特定品牌購買特定產品的特徵,欺詐檢測-識別可能導致在線欺詐的交易模式等。

在本文中,貴陽電腦培訓整理了進行數據挖掘的8個最佳開源工具。

1、WekaWEKA作為一個公開的數據挖掘工作平台,集合了大量能承擔數據挖掘任務的機器學習演算法,包括對數據進行預處理,分類,回歸、聚類、關聯規則以及在新的互動式界面上的可視化。

2、RapidMinerRapidMiner是世界領先的數據挖掘解決方案,在一個非常大的程度上有著先進技術。

它數據挖掘任務涉及範圍廣泛,包括各種數據藝術,能簡化數據挖掘過程的設計和評價。

3、OrangeOrange是一個基於組件的數據挖掘和機器學習軟體套裝,它的功能即友好,又很強大,快速而又多功能的可視化編程前端,以便瀏覽數據分析和可視化,基綁定了Python以進行腳本開發。

它包含了完整的一系列的組件以進行數據預處理,並提供了數據帳目,過渡,建模,模式評估和勘探的功能。

其由C++和Python開發,它的圖形庫是由跨平台的Qt框架開發。

4、KnimeKNIME(KonstanzInformationMiner)是一個用戶友好,智能的,並有豐演的開源的數據集成,數據處理,數據分析和數據勘探平台。

5、jHepWorkjHepWork是一套功能完整的面向對象科學數據分析框架。

Jython宏是用來展示一維和二維直方圖的數據。

該程序包括許多工具,可以用來和二維三維的科學圖形進行互動。

6、ApacheMahoutApacheMahout是ApacheSoftwareFoundation(ASF)開發的一個全新的開源項目,其主要目標是創建一些可伸縮的機器學習演算法,供開發人員在Apache在許可下免費使用。

該項目已經發展到了它的最二個年頭,目前只有一個公共發行版。

Mahout包含許多實現,包括集群、分類、CP和進化程序。

此外,通過使用ApacheHadoop庫,Mahout可以有效地擴展到雲中。

7、ELKIELKI(EnvironmentforDevelopingKDD-ApplicationsSupportedbyIndex-Structures)主要用來聚類和找離群點。

ELKI是類似於weka的數據挖掘平台,用java編寫,有GUI圖形界面。

可以用來尋找離群點。

原創文章,作者:小藍,如若轉載,請註明出處:https://www.506064.com/zh-tw/n/279714.html

(0)
打賞 微信掃一掃 微信掃一掃 支付寶掃一掃 支付寶掃一掃
小藍的頭像小藍
上一篇 2024-12-20 15:05
下一篇 2024-12-20 15:05

相關推薦

  • MQTT使用教程

    MQTT是一種輕量級的消息傳輸協議,適用於物聯網領域中的設備與雲端、設備與設備之間的數據傳輸。本文將介紹使用MQTT實現設備與雲端數據傳輸的方法和注意事項。 一、準備工作 在使用M…

    編程 2025-04-29
  • Python3.6.5下載安裝教程

    Python是一種面向對象、解釋型計算機程序語言。它是一門動態語言,因為它不會對程序員提前聲明變數類型,而是在變數第一次賦值時自動識別該變數的類型。 Python3.6.5是Pyt…

    編程 2025-04-29
  • Deepin系統分區設置教程

    本教程將會詳細介紹Deepin系統如何進行分區設置,分享多種方式讓您了解如何規劃您的硬碟。 一、分區的基本知識 在進行Deepin系統分區設置之前,我們需要了解一些基本分區概念。 …

    編程 2025-04-29
  • 寫代碼新手教程

    本文將從語言選擇、學習方法、編碼規範以及常見問題解答等多個方面,為編程新手提供實用、簡明的教程。 一、語言選擇 作為編程新手,選擇一門編程語言是很關鍵的一步。以下是幾個有代表性的編…

    編程 2025-04-29
  • Qt雷達探測教程

    本文主要介紹如何使用Qt開發雷達探測程序,並展示一個簡單的雷達探測示例。 一、環境準備 在開始本教程之前,需要確保你的開發環境已經安裝Qt和Qt Creator。如果沒有安裝,可以…

    編程 2025-04-29
  • 猿編程python免費全套教程400集

    想要學習Python編程嗎?猿編程python免費全套教程400集是一個不錯的選擇!下面我們來詳細了解一下這個教程。 一、課程內容 猿編程python免費全套教程400集包含了從P…

    編程 2025-04-29
  • Python煙花教程

    Python煙花代碼在近年來越來越受到人們的歡迎,因為它可以讓我們在終端里玩煙花,不僅具有視覺美感,還可以通過代碼實現動畫和音效。本教程將詳細介紹Python煙花代碼的實現原理和模…

    編程 2025-04-29
  • 使用Snare服務收集日誌:完整教程

    本教程將介紹如何使用Snare服務收集Windows伺服器上的日誌,並將其發送到遠程伺服器進行集中管理。 一、安裝和配置Snare 1、下載Snare安裝程序並安裝。 https:…

    編程 2025-04-29
  • Python畫K線教程

    本教程將從以下幾個方面詳細介紹Python畫K線的方法及技巧,包括數據處理、圖表繪製、基本設置等等。 一、數據處理 1、獲取數據 在Python中可以使用Pandas庫獲取K線數據…

    編程 2025-04-28
  • Python語言程序設計教程PDF趙璐百度網盤介紹

    Python語言程序設計教程PDF趙璐百度網盤是一本介紹Python語言編程的入門教材,本文將從以下幾個方面對其進行詳細闡述。 一、Python語言的特點 Python語言屬於解釋…

    編程 2025-04-28

發表回復

登錄後才能評論