java-clustering
Package provides java implementation of various clustering algorithms
Features
- Hierarchical Clustering
- KMeans Clustering
- DBSCAN
- Single Linkage Clustering
Install
Add the following dependency to your POM file:
<dependency>
<groupId>com.github.chen0040</groupId>
<artifactId>java-clustering</artifactId>
<version>1.0.3</version>
</dependency>
Spatial Segmentation using Hierarchical Clustering
The following sample code shows how to use hierarchical clustering to separate two clusters:
DataQuery.DataFrameQueryBuilder schema = DataQuery.blank()
.newInput("c1")
.newInput("c2")
.newOutput("designed")
.end();
Sampler.DataSampleBuilder negativeSampler = new Sampler()
.forColumn("c1").generate((name, index) -> randn() * 0.3 + (index % 2 == 0 ? 2 : 4))
.forColumn("c2").generate((name, index) -> randn() * 0.3 + (index % 2 == 0 ? 2 : 4))
.forColumn("designed").generate((name, index) -> 0.0)
.end();
Sampler.DataSampleBuilder positiveSampler = new Sampler()
.forColumn("c1").generate((name, index) -> rand(-4, -2))
.forColumn("c2").generate((name, index) -> rand(-2, -4))
.forColumn("designed").generate((name, index) -> 1.0)
.end();
DataFrame data = schema.build();
data = negativeSampler.sample(data, 50);
data = positiveSampler.sample(data, 50);
System.out.println(data.head(10));
HierarchicalClustering algorithm = new HierarchicalClustering();
algorithm.setLinkage(linkageCriterion);
algorithm.setClusterCount(2);
DataFrame learnedData = algorithm.fitAndTransform(data);
for(int i = 0; i < learnedData.rowCount(); ++i){
DataRow tuple = learnedData.row(i);
String clusterId = tuple.getCategoricalTargetCell("cluster");
System.out.println("learned: " + clusterId +"\tknown: "+tuple.target());
}
Spatial Segmentation using EM Clustering
The following sample code shows how to use EM clustering to separate two clusters:
DataQuery.DataFrameQueryBuilder schema = DataQuery.blank()
.newInput("c1")
.newInput("c2")
.newOutput("designed")
.end();
Sampler.DataSampleBuilder negativeSampler = new Sampler()
.forColumn("c1").generate((name, index) -> randn() * 0.3 + (index % 2 == 0 ? 2 : 4))
.forColumn("c2").generate((name, index) -> randn() * 0.3 + (index % 2 == 0 ? 2 : 4))
.forColumn("designed").generate((name, index) -> 0.0)
.end();
Sampler.DataSampleBuilder positiveSampler = new Sampler()
.forColumn("c1").generate((name, index) -> rand(-4, -2))
.forColumn("c2").generate((name, index) -> rand(-2, -4))
.forColumn("designed").generate((name, index) -> 1.0)
.end();
DataFrame data = schema.build();
data = negativeSampler.sample(data, 50);
data = positiveSampler.sample(data, 50);
System.out.println(data.head(10));
EMClustering algorithm = new EMClustering();
algorithm.setSigma0(1.5);
algorithm.setClusterCount(2);
DataFrame learnedData = algorithm.fitAndTransform(data);
for(int i = 0; i < learnedData.rowCount(); ++i){
DataRow tuple = learnedData.row(i);
String clusterId = tuple.getCategoricalTargetCell("cluster");
System.out.println("learned: " + clusterId +"\tknown: "+tuple.target());
}
Spatial Segmentation using Single Linkage Clustering
The following sample code shows how to use single linkage clustering to separate two clusters:
DataQuery.DataFrameQueryBuilder schema = DataQuery.blank()
.newInput("c1")
.newInput("c2")
.newOutput("designed")
.end();
Sampler.DataSampleBuilder negativeSampler = new Sampler()
.forColumn("c1").generate((name, index) -> randn() * 0.3 + (index % 2 == 0 ? 2 : 4))
.forColumn("c2").generate((name, index) -> randn() * 0.3 + (index % 2 == 0 ? 2 : 4))
.forColumn("designed").generate((name, index) -> 0.0)
.end();
Sampler.DataSampleBuilder positiveSampler = new Sampler()
.forColumn("c1").generate((name, index) -> rand(-4, -2))
.forColumn("c2").generate((name, index) -> rand(-2, -4))
.forColumn("designed").generate((name, index) -> 1.0)
.end();
DataFrame data = schema.build();
data = negativeSampler.sample(data, 50);
data = positiveSampler.sample(data, 50);
System.out.println(data.head(10));
SingleLinkageClustering algorithm = new SingleLinkageClustering();
algorithm.setClusterCount(2);
DataFrame learnedData = algorithm.fitAndTransform(data);
for(int i = 0; i < learnedData.rowCount(); ++i){
DataRow tuple = learnedData.row(i);
String clusterId = tuple.getCategoricalTargetCell("cluster");
System.out.println("learned: " + clusterId +"\tknown: "+tuple.target());
}
Spatial Segmentation using DBSCAN
The following sample code shows how to use DBSCAN to perform clustering:
DataQuery.DataFrameQueryBuilder schema = DataQuery.blank()
.newInput("c1")
.newInput("c2")
.newOutput("designed")
.end();
Sampler.DataSampleBuilder negativeSampler = new Sampler()
.forColumn("c1").generate((name, index) -> randn() * 0.3 + (index % 2 == 0 ? 2 : 4))
.forColumn("c2").generate((name, index) -> randn() * 0.3 + (index % 2 == 0 ? 2 : 4))
.forColumn("designed").generate((name, index) -> 0.0)
.end();
Sampler.DataSampleBuilder positiveSampler = new Sampler()
.forColumn("c1").generate((name, index) -> rand(-4, -2))
.forColumn("c2").generate((name, index) -> rand(-2, -4))
.forColumn("designed").generate((name, index) -> 1.0)
.end();
DataFrame data = schema.build();
data = negativeSampler.sample(data, 200);
data = positiveSampler.sample(data, 200);
System.out.println(data.head(10));
DBSCAN algorithm = new DBSCAN();
algorithm.setEpsilon(0.5);
DataFrame learnedData = algorithm.fitAndTransform(data);
for(int i = 0; i < learnedData.rowCount(); ++i){
DataRow tuple = learnedData.row(i);
String clusterId = tuple.getCategoricalTargetCell("cluster");
System.out.println("learned: " + clusterId +"\tknown: "+tuple.target());
}
Image Segmentation (Clustering) using KMeans
The following sample code shows how to use FuzzyART to perform image segmentation:
BufferedImage img= ImageIO.read(FileUtils.getResource("1.jpg"));
DataFrame dataFrame = ImageDataFrameFactory.dataFrame(img);
KMeans cluster = new KMeans();
DataFrame learnedData = cluster.fitAndTransform(dataFrame);
for(int i=0; i <learnedData.rowCount(); ++i) {
ImageDataRow row = (ImageDataRow)learnedData.row(i);
int x = row.getPixelX();
int y = row.getPixelY();
String clusterId = row.getCategoricalTargetCell("cluster");
System.out.println("cluster id for pixel (" + x + "," + y + ") is " + clusterId);
}
The segmented image can be generated using the trained KMeans from above as illustrated by the following sample code:
List<Integer> classColors = new ArrayList<Integer>();
for(int i=0; i < 5; ++i){
for(int j=0; j < 5; ++j){
classColors.add(ImageDataFrameFactory.get_rgb(255, rand.nextInt(255), rand.nextInt(255), rand.nextInt(255)));
}
}
BufferedImage segmented_image = new BufferedImage(img.getWidth(), img.getHeight(), img.getType());
for(int x=0; x < img.getWidth(); x++)
{
for(int y=0; y < img.getHeight(); y++)
{
int rgb = img.getRGB(x, y);
DataRow tuple = ImageDataFrameFactory.getPixelTuple(x, y, rgb);
int clusterIndex = cluster.transform(tuple);
rgb = classColors.get(clusterIndex % classColors.size());
segmented_image.setRGB(x, y, rgb);
}
}
Thanks for sharing, nice post! Post really provice useful information!
ReplyDeleteHương Lâm chuyên cung cấp máy photocopy, chúng tôi cung cấp máy photocopy ricoh, toshiba, canon, sharp, đặc biệt chúng tôi có cung cấp máy photocopy màu uy tín, giá rẻ nhất.