■ K-평균 군집화(K-Means Clustering) 알고리즘을 사용하는 방법을 보여준다.
▶ kmean.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import matplotlib.pyplot as pp import numpy as np import pandas as pd import tensorflow as tf def DisplayPartition(xValueList, yValueList, assignmentValueNDArray): labelList = [] colorList = ["red", "blue", "green", "yellow"] for i in range(len(assignmentValueNDArray)): labelList.append(colorList[(assignmentValueNDArray[i])]) dataFrame = pd.DataFrame(dict(x = xValueList, y = yValueList, color = labelList)) _, axexSubPlot = pp.subplots() axexSubPlot.scatter(dataFrame["x"], dataFrame["y"], c = dataFrame["color"]) pp.show() vectorCount = 2000 clusterCount = 4 sampleCountPerCluster = 500 stepCount = 1000 xValueList = [] yValueList = [] vectorList = [] # 랜덤 데이터를 생성한다. for i in range(vectorCount): if np.random.random() > 0.5: xValueList.append(np.random.normal(0.4, 0.7)) yValueList.append(np.random.normal(0.2, 0.8)) else: xValueList.append(np.random.normal(0.6, 0.4)) yValueList.append(np.random.normal(0.8, 0.5)) vectorList = list(zip(xValueList, yValueList)) vectorTensor = tf.constant(vectorList) vectorListCount = tf.shape(vectorList)[0] randomIndexTensor = tf.random_shuffle(tf.range(0, vectorListCount)) begin = [0,] size = [clusterCount,] size[0] = clusterCount centroidIndexTensor = tf.slice(randomIndexTensor, begin, size) centroidVariable = tf.Variable(tf.gather(vectorList, centroidIndexTensor)) expandedVectorTensor = tf.expand_dims(vectorTensor, 0) expandedCentroidVector = tf.expand_dims(centroidVariable, 1) subtractedVectorTensor = tf.subtract(expandedVectorTensor, expandedCentroidVector) euclideanDistanceTensor = tf.reduce_sum(tf.square(subtractedVectorTensor), 2) assignmentTensor = tf.to_int32(tf.argmin(euclideanDistanceTensor, 0)) partitionList = tf.dynamic_partition(vectorTensor, assignmentTensor, clusterCount) for partition in partitionList: updatedCentroidTensor = tf.concat(tf.expand_dims(tf.reduce_mean(partition, 0), 0), 0) initializeOperation = tf.global_variables_initializer() sess = tf.Session() sess.run(initializeOperation) for step in range(stepCount): _, centroidValueNDArray, assignmentValueNDArray = sess.run([updatedCentroidTensor, centroidVariable, assignmentTensor]) DisplayPartition(xValueList, yValueList, assignmentValueNDArray) pp.plot(xValueList,yValueList, "o", label = "Input Data") pp.legend() pp.show() |