K均值算法（K-means）点聚合

MrMao14

2022-02-28

编程小知识

点聚合, 算法

一、算法原理

K-Means算法是一种聚类分析（cluster analysis）的算法，通过事先不知道类别的情况下，将相似的对象统一归类到簇中，其归类过程使用多次迭代的欧氏距离（Euclidean Distance）进行计算

欧式距离

算法过程如下，通过不断查找k值与点之间最短的欧式距离并归类进簇，通过多次迭代的方式进行归类，找出最优的簇中心点

二、算法Java实现

1.工具类KMeansUtil.java

import java.util.ArrayList;
import java.util.List;

/**
 * @author : MrMao14
 * @version : 1.0
 * @program : web
 * @description : 将点集合根据k值算法迭代一定次数
 * @create : 2022/2/28 9:00
 **/
public class KMeansUtil {
    //设定一个比较大的值
    public static final double DISTANCE = 10000.00;
    // 聚类中心数
    public int k;
    // 迭代最大次数
    public int maxIter;
    // 点集
    public List<PointBean> points;
    // 中心点
    public List<PointBean> centers;

    public KMeansUtil(int k, int maxIter, List<PointBean> points) {
        this.k = k;
        this.maxIter = maxIter;
        this.points = points;
        //初始化中心点
        initCenters();
    }

    /*
     * 初始化聚类中心
     * 这里的选取策略是，从点集中按序列抽取K个作为初始聚类中心
     */
    public void initCenters() {
        centers = new ArrayList<>(k);

        for (int i = 0; i < k; i++) {
            PointBean tmPoint = points.get(i * 2);
            PointBean center = new PointBean(tmPoint.getX(), tmPoint.getY());
            center.setClusterID(i + 1);
            centers.add(center);
        }
    }


    /*
     * 停止条件是满足迭代次数
     */
    public void runKmeans() {
        // 已迭代次数
        int count = 1;

        while (count++ <= maxIter) {
            // 遍历每个点，确定其所属簇
            for (PointBean point : points) {
                assignPointToCluster(point);
            }

            //调整中心点
            adjustCenters();
        }
    }

    /*
     * 调整聚类中心，按照求平衡点的方法获得新的簇心
     */
    public void adjustCenters() {
        double[] sumX = new double[k];
        double[] sumY = new double[k];
        int[] count = new int[k];

        // 保存每个簇的横纵坐标之和
        for (int i = 0; i < k; i++) {
            sumX[i] = 0.0;
            sumY[i] = 0.0;
            count[i] = 0;
        }

        // 计算每个簇的横纵坐标总和、记录每个簇的个数
        for (PointBean point : points) {
            int clusterID = point.getClusterID();

            sumX[clusterID - 1] += point.getX();
            sumY[clusterID - 1] += point.getY();
            count[clusterID - 1]++;
        }

        // 更新簇心坐标
        for (int i = 1; i <= k; i++) {
            PointBean tmpPoint = centers.get(i-1);

            tmpPoint.setX(sumX[i] / count[i]);
            tmpPoint.setY(sumY[i] / count[i]);
            tmpPoint.setClusterID(i);
            centers.set(i, tmpPoint);
        }
    }

    /*划分点到某个簇中，欧式距离标准
     * 对传入的每个点，找到与其最近的簇中心点，将此点加入到簇
     */
    public void assignPointToCluster(PointBean point) {
        double minDistance = DISTANCE;

        int clusterID = -1;

        for (PointBean center : centers) {
            double dis = EurDistance(point, center);
            if (dis < minDistance) {
                minDistance = dis;
                clusterID = center.getClusterID();
            }
        }
        point.setClusterID(clusterID);
    }

    //欧式距离，计算两点距离
    public double EurDistance(PointBean point, PointBean center) {
        double detX = point.getX() - center.getX();
        double detY = point.getY() - center.getY();
        return Math.sqrt(detX * detX + detY * detY);
    }
}

2.实体类PointBean.java

import lombok.Data;
import lombok.NoArgsConstructor;

/**
 * @author : MrMao14
 * @version : 1.0
 * @program : web
 * @description :KMeans点聚合算法Bean
 * @create : 2022/2/28 10:11
 **/

@Data
@NoArgsConstructor
public class PointBean {
    // 点的坐标
    private Double x;
    private Double y;

    // 所在类ID
    private int clusterID = -1;

    public PointBean(Double x, Double y) {
        this.x = x;
        this.y = y;
    }

    public PointBean(Double x, Double y, int clusterID) {
        this.x = x;
        this.y = y;
        this.clusterID = clusterID;
    }
}

3.使用实例

//初始化计算类 设定聚合点数，迭代次数，坐标点集合
KMeansUtil kMeansUtil = new KMeansUtil(mapEnterprise.getPointCount(), 5, beans);
kMeansUtil.runKmeans();
//获取坐标点集合
List<PointBean> points = kMeansUtil.points;
//获取中心点集合
List<PointBean> centers = kMeansUtil.centers;