Using Weka From Java
Using Weka From Java
The power of weka data manipulation can also be exploited directly from Java code This enables the development of data mining applications (for decision support systems) without writing any machine learning code yourself Weka is completely written in Java and it comes with its documentation (javadoc style) Basis classes are: Attribute, Instance, Instances and Classier
27
27
Attribute class
This is the class for handling attributes It is contained on the weka.io package Four types of Attribute are supported: - Numeric - Nominal (a xed set of values) - String - Date A Numeric attribute denition example: A Nominal attribute denition example:
Attribute temperatura = new Attribute("Temperatura"); FastVector tempoValues = new FastVector(3); tempoValues.addElement("sole"); tempoValues.addElement("coperto"); tempoValues.addElement("pioggia"); Attribute tempo = new Attribute("Tempo", tempoValues);
Gianluca Moro - DEIS, University of Bologna
28
28
Instance class
This is the class for handling a single instance It is contained on the weka.io package Creating an instance example:
// Create empty instance with three attribute values Instance inst = new Instance(3); // Set instance's values for the attributes "temperatura" and "tempo" inst.setValue(temperatura, 25); inst.setValue(tempo, "coperto"); // Set instance's dataset to be the dataset "weather" inst.setDataset(weather);
29
29
Instances class
This is the class for handling set of instances It is contained on the weka.io package Could be directly created from an ARFF le:
FileReader reader = new FileReader(myDataset.arff); Instances set = new Instances(reader);
30
30
Classier class
Just an abstract class implemented by specic algorithms It is contained in weka.classiers package It is the nal model of the system To build a model from a set of instances:
//For example using a J48-tree algorithm Classifier myClassifier = new J48(); myClassifier.buildClassifier(set);
31
31
32
Remember to cast
32
33
33
34
34
3. Classify a message
public String classifyMessage(String message) throws Exception { ! ! // Check whether a classifier has been built. ! ! //if (m_Data.numInstances() == 0) { ! ! //! throw new Exception("No classifier available."); ! ! //} ! ! // Make separate little test set so that message ! ! // does not get added to string attribute in m_Data. ! ! Instances testset = m_Data.stringFreeStructure(); ! ! // Make message into test instance. ! ! Instance instance = makeInstance(message, testset); ! ! // Get index of predicted class value. ! ! double predicted = m_Classifier.classifyInstance(instance); ! ! // Output class value: value(..) Returns an instance's attribute value in internal format ! ! /* Class for handling an instance. All values (numeric, nominal, or string) are internally stored as floating-point numbers. If an attribute is nominal (or a string), the stored value is the index of the corresponding nominal (or string) value in the attribute's definition. We have chosen this approach in favor of a more elegant object-oriented approach because it is much faster. */ String msg ="Weather classified as: " ! ! + m_Data.classAttribute().value((int) predicted); return msg;
! ! !
! ! }
35
35
/** * Programma per classificare una giornata: a seconda delle caratteristiche meteorologichche * il sistema deve scegliere se giocare oppure no * * @author gm * */ import java.io.*; import weka.core.*; import weka.classifiers.Classifier; import weka.classifiers.trees.J48; public class Weather implements Serializable { ! // Training set ! private Instances m_Data = null; ! // Classificatore scelto ! private Classifier m_Classifier = new J48(); ! /** ! * Costruttore: crea un nuovo dataset di training inizialmente vuoto ! * ! * @throws Exception ! */ ! public Weather() throws Exception { ! ! String nameOfDataset = "WeatherSet"; ! ! ! ! /* Attribute's vector: ! ! * @attribute Tempo {sole, coperto, pioggia} ! ! * @attribute Temperatura real ! ! * @attribute Umidit real ! ! * @attribute Vento {true, false} ! ! * @attribute Gioca {yes, no} ! ! */ ! ! FastVector attributes = new FastVector(5); ! ! ! ! //Tempo ! ! FastVector tempoValues = new FastVector(3); ! ! tempoValues.addElement("sole"); ! ! tempoValues.addElement("coperto"); ! ! tempoValues.addElement("pioggia"); ! ! ! ! //Vento ! ! FastVector ventoValues = new FastVector(2); ! ! ventoValues.addElement("true"); ! ! ventoValues.addElement("false");
36
36
! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !
! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! }
// Create a new dataset m_Data = new Instances(nameOfDataset, attributes, 100); // Set the last attribute as class m_Data.setClassIndex(m_Data.numAttributes() - 1);
! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !
/** * Metodo che configura il classificatore a partire dal nome del file che lo contiene * * @param modelName Nome del file che contiene il modello */ public void setClassifier(String modelName){ ! try { ! ! Classifier model; ! ! //Load the model ! ! ! ObjectInputStream modelInObjectFile = new ObjectInputStream( ! ! ! ! ! new FileInputStream(modelName)); ! ! ! model = (Classifier) modelInObjectFile.readObject(); ! ! ! modelInObjectFile.close(); ! ! ! ! ! this.m_Classifier = model; ! ! System.out.println("Model "+modelName+" lodaded.");! ! } catch (Exception e) { ! ! // TODO Auto-generated catch block ! ! e.printStackTrace(); ! } ! }
37
37
! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !
/** * Metodo che converte una stringa in una istanza (record) * * @param data La stringa nel formato Tempo,Temperatura,Umidita',Vento, [Gioca] * @param dataSet Il dataset a cui verr aggiunta * @return L'istanza creata */ private Instance makeInstance(String data, Instances dataSet) { ! Instance instance = new Instance(5); ! String[] values = data.split(","); ! Attribute tempo = dataSet.attribute("Tempo"); ! Attribute temperatura = dataSet.attribute("Temperatura"); ! Attribute umidita = dataSet.attribute("Umidita'"); ! Attribute vento = dataSet.attribute("Vento"); ! Attribute gioca = dataSet.attribute("Gioca"); ! instance.setValue(tempo, values[0]); ! instance.setValue(temperatura, Integer.parseInt(values[1])); ! instance.setValue(umidita,Integer.parseInt(values[2])); ! instance.setValue(vento, values[3]); ! if(values.length > 4){ ! ! instance.setValue(gioca, values[4]); ! } ! // Give instance access to attribute information from the dataset. ! instance.setDataset(dataSet); ! return instance; } /** * Classifica un messaggio passato in ingresso * * @return Una rappresentazione testuale della classe del messaggio */ public String classifyMessage(String message) throws Exception { ! // Check whether classifier has been built. ! if (m_Data.numInstances() == 0) { ! ! throw new Exception("No classifier available."); ! } ! // Make separate little test set so that message ! // does not get added to string attribute in m_Data. ! Instances testset = m_Data.stringFreeStructure(); ! // Make message into test instance. ! Instance instance = makeInstance(message, testset); ! // Get index of predicted class value. ! double predicted = m_Classifier.classifyInstance(instance); ! // Output class value. ! String msg ="Weather classified as: " ! ! ! + m_Data.classAttribute().value((int) predicted); ! return msg; }
38
38
! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! }
/** * Main method. * * @option -m Model file's name * @option -classify Classifies an istance with model provided * @option -create Create a model from an arff data file, and output it in a model file */ public static void main(String[] options) { ! try { ! ! Classifier tree; ! ! if(options.length != 2){ ! ! ! String modelFile = Utils.getOption("m", options); ! ! ! String dataFile = Utils.getOption("create", options); ! ! ! if(dataFile.length() != 0){ ! ! ! ! // Load data from file ! ! ! ! Instances datas = new Instances(new BufferedReader(new FileReader(dataFile))); ! ! ! ! datas.setClassIndex(datas.numAttributes() - 1); ! ! ! ! //Build classifier ! ! ! ! tree = new J48(); ! ! ! ! tree.buildClassifier(datas); ! ! ! ! try { ! ! ! ! ! // serialize model ! ! ! ! ! ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(modelFile)); ! ! ! ! ! oos.writeObject(tree); ! ! ! ! ! oos.flush();oos.close(); ! ! ! ! ! System.out.println("Model saved on: "+modelFile); ! ! ! ! } catch (Exception e) { ! ! ! ! ! // TODO Auto-generated catch block ! ! ! ! ! e.printStackTrace(); ! ! ! ! } ! ! ! }else{ ! ! ! ! Weather w = new Weather(); ! ! ! ! w.setClassifier(modelFile); ! ! ! ! String unclassified = Utils.getOption("classify", options); ! ! ! ! if(unclassified.length() != 0){ ! ! ! ! ! //Classify the provided instance! ! ! ! ! ! ! ! ! ! ! ! System.out.println(w.classifyMessage(unclassified)); ! ! ! ! }! ! ! ! ! }! ! ! }else{ ! ! ! System.out.println("Parametri non corretti"); ! ! }//else! ! ! } catch (Exception e) { ! ! // TODO Auto-generated catch block ! ! e.printStackTrace(); ! } }
39
39
40
40
41
41