Here is my code:
import weka.clusterers.ClusterEvaluation;
import weka.clusterers.HierarchicalClusterer;
import weka.clusterers.EM;
import weka.core.converters.CSVLoader;
import weka.core.converters.ConverterUtils.DataSource;
import weka.core.neighboursearch.PerformanceStats;
import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Enumeration;
import weka.core.*;
public class WEKASample1 {
public static void main(String[] args) {
Instances data = null;
CSVLoader csvLoader = new CSVLoader();
try {
csvLoader.setSource(new File("D:\\WEKA\\numbers.csv"));
data = csvLoader.getDataSet();
HierarchicalClusterer h = new HierarchicalClusterer();
DistanceFunction d = new DistanceFunction() {
@Override
public void setOptions(String[] arg0) throws Exception {
}
@Override
public Enumeration listOptions() {
return null;
}
@Override
public String[] getOptions() {
return null;
}
@Override
public void update(Instance arg0) {
}
@Override
public void setInvertSelection(boolean arg0) {
}
@Override
public void setInstances(Instances arg0) {
}
@Override
public void setAttributeIndices(String arg0) {
}
@Override
public void postProcessDistances(double[] arg0) {
}
@Override
public boolean getInvertSelection() {
return false;
}
@Override
public Instances getInstances() {
return null;
}
@Override
public String getAttributeIndices() {
return null;
}
@Override
public double distance(Instance arg0, Instance arg1, double arg2,
PerformanceStats arg3) {
return 0;
}
@Override
public double distance(Instance arg0, Instance arg1, double arg2) {
return 0;
}
@Override
public double distance(Instance arg0, Instance arg1, PerformanceStats arg2)
throws Exception {
return 0;
}
@Override
public double distance(Instance arg0, Instance arg1) {
double s1 = arg0.value(0);
double s2 = arg1.value(0);
return Double.POSITIVE_INFINITY;
}
};
h.setDistanceFunction(d);
SelectedTag s = new SelectedTag(1, HierarchicalClusterer.TAGS_LINK_TYPE);
h.setLinkType(s);
h.buildClusterer(data);
// double[] arr;
// for(int i=0; i<data.size(); i++) {
//
// arr = h.distributionForInstance(data.get(i));
// for(int j=0; j< arr.length; j++)
// System.out.print(arr[j]+",");
// System.out.println();
//
// }
System.out.println(h.numberOfClusters());
} catch (Exception e) {
e.printStackTrace();
}
}
}
Now, the output for the number of clusters generated is always 2 even if I modify the distancefucntion method also. How do I know which instance if of which cluster? When I uncomment the code above that is written to get the distribution for the instances, I get an ArrayOutOfBound exception.
But in general, can anyone explain how is the clustering done hierarchically by WEKA here?
Here is my data set, that is of length 10 and dimension 2:
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10