k-means clustering 군집화

[패키지 다운로드]

pip install pandas

#k-means 알고리즘

# 1.초기 랜덤하게 k개 점 선택

# 2. 모든 점을 가장 가까운 선택점에 할당해 k개 군집 구성

# 3. 각 군집에서 다시 평균을 구해 k개의 중심점을 계산

# 4. 중심점이 변하지 않을 때 까지 2~3단계 반복

import matplotlib.pyplot as plt

import numpy as np

import tensorflow as tf

import pandas as pd

#pip3.6 install pandas

num_vectors = 1000

num_clusters = 4

num_steps=100

#초기 입력 데이터 구조체 초기화

x_values=[]

y_values=[]

vector_values=[]

for i in range(num_vectors):

if np.random.random()>0.5:

x_values.append(np.random.normal(0.4,0.7))

y_values.append(np.random.normal(0.2,0.8))

else:

x_values.append(np.random.normal(0.6,0.4))

y_values.append(np.random.normal(0.2,0.8))

#python 2.x대에서는 zip()의 리턴이 리스트였는데 3.x대에서는 zip object로 오니 list()로 한번 묶어줌

vector_values = list(zip(x_values,y_values))

vectors = tf.constant(vector_values)

plt.plot(x_values,y_values,'o',label='Input Data')

plt.legend()

plt.show()

n_sample = tf.shape(vector_values)[0]

#인덱스 지정

random_indics = tf.random_shuffle(tf.range(0,n_sample))

#cluster수 만큼의 랜덤한 인덱스

begin=[0,]

size=[num_clusters,]

size[0] = num_clusters

#각 인덱스들의 초기 센트로이드

centroid_indices = tf.slice(random_indics,begin,size)

centroids = tf.Variable(tf.gather(vector_values,centroid_indices))

#비용함수

#vectors와 centroids를 처리하기 위해 두 인자 값에 지정된 크기로 확장해주는 expand_dims 함수 사용, 같은 차원으로

expanded_vectors = tf.expand_dims(vectors,0)

expanded_centroids = tf.expand_dims(centroids,1)

#차이를 구함

vectors_subtration = tf.subtract(expanded_vectors,expanded_centroids)

#각 차원들에 대해 개체들의 합을 계산하는 reduce_sum, 각 원소들의 제곱을 계산하는 tf.square 유클리디언 거리

euclidean_distance = tf.reduce_sum(tf.square(vectors_subtration),2)

#가장 짧은 거리를 가지는 인덱스 값으로 지정

assignments = tf.to_int32(tf.argmin(euclidean_distance,0))

#각 샘플에 대해 가장 가까운 인덱스를 찾고 이를 새로운 그룹으로 묶음

partitions = tf.dynamic_partition(vectors,assignments,num_clusters)

#centroid 업데이트 각 파티션별 reduce_mean을 실행해 군집의 평균을 찾고 새로운 센트로이드로 지정

update_centroids = tf.concat([tf.expand_dims(tf.reduce_mean(partition,0),0)for partition in partitions],0)

def display_partition(x_values,y_values,assignment_values):

labels=[]

colors=["red","blue","green","yellow"]

for i in range(len(assignment_values)):

labels.append(colors[assignment_values[i]])

colors = labels

df = pd.DataFrame(dict(x=x_values,y=y_values,color=labels))

ax = df.plot.scatter(x='x',y='y',c=df['color'])

plt.show()

저작자표시 비영리 변경금지

'Data > TensorFlow' 카테고리의 다른 글

TensorFlow Mac 설치, jupyter Notebook 설치 (2)	2017.12.10
Tensorflow Object Detection API _ CentOS7 설치 (0)	2017.08.18
MNIST 데이터로 KNN 분류기, 성능 측정 (1)	2017.04.17
MNIST 데이터 집합 읽어오기, 이미지로 나타내기 (1)	2017.04.14
선형 회귀 알고리즘 구현, 비용함수, 경사하강법 (0)	2017.04.14

부동산 On the ball

k-means clustering 군집화

'Data > TensorFlow' 카테고리의 다른 글

티스토리툴바

k-means clustering 군집화

'Data > TensorFlow' 카테고리의 다른 글

관련글

티스토리툴바