I'm running k-means clustering algorithm on data that is 3-D array, it looks this( for each year from 1910-2000, there is array of 51X 200 dimensions, those 51 are the states of US), this is the example of year 1916 for the only the first three rows, e.i 3x200 )
Year 1916
AK 0.1216 0.0811 0.0541 0.0405 0.0541 0.0000 0.0473 0.0000 0.0541 0.0000 0.0338 0.0338 0.0473 0.0338 0.0000 0.0405 0.0000 0.0338 0.0338 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0338 0.0000 0.0405 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0405 0.0000 0.0000 0.0000 0.0405 0.0338 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0338 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0338 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0338 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
AL 0.0837 0.0109 0.0113 0.0160 0.0198 0.0158 0.0067 0.0139 0.0121 0.0082 0.0102 0.0083 0.0094 0.0046 0.0031 0.0096 0.0074 0.0164 0.0129 0.0041 0.0042 0.0120 0.0232 0.0100 0.0115 0.0071 0.0024 0.0062 0.0087 0.0095 0.0088 0.0043 0.0078 0.0024 0.0051 0.0375 0.0030 0.0080 0.0101 0.0024 0.0110 0.0064 0.0104 0.0028 0.0019 0.0066 0.0013 0.0052 0.0095 0.0044 0.0048 0.0119 0.0018 0.0081 0.0071 0.0124 0.0044 0.0086 0.0033 0.0022 0.0070 0.0018 0.0025 0.0070 0.0034 0.0075 0.0004 0.0103 0.0008 0.0063 0.0112 0.0006 0.0204 0.0012 0.0022 0.0004 0.0032 0.0025 0.0015 0.0012 0.0073 0.0030 0.0016 0.0045 0.0044 0.0004 0.0028 0.0139 0.0017 0.0026 0.0038 0.0049 0.0167 0.0013 0.0004 0.0009 0.0028 0.0005 0.0050 0.0020 0.0000 0.0033 0.0086 0.0018 0.0012 0.0008 0.0060 0.0035 0.0059 0.0059 0.0008 0.0029 0.0007 0.0052 0.0079 0.0079 0.0028 0.0003 0.0027 0.0066 0.0036 0.0044 0.0032 0.0036 0.0024 0.0000 0.0076 0.0004 0.0068 0.0020 0.0002 0.0004 0.0034 0.0007 0.0011 0.0029 0.0061 0.0003 0.0068 0.0008 0.0066 0.0006 0.0029 0.0004 0.0009 0.0006 0.0036 0.0004 0.0015 0.0007 0.0005 0.0004 0.0019 0.0031 0.0019 0.0060 0.0059 0.0011 0.0000 0.0005 0.0006 0.0016 0.0018 0.0054 0.0014 0.0000 0.0008 0.0010 0.0024 0.0013 0.0040 0.0005 0.0005 0.0004 0.0000 0.0000 0.0037 0.0005 0.0000 0.0015 0.0060 0.0000 0.0000 0.0002 0.0023 0.0000 0.0034 0.0000 0.0000 0.0011 0.0047 0.0002 0.0040 0.0000 0.0018 0.0052 0.0052 0.0004 0.0025 0.0000
AR 0.0703 0.0193 0.0205 0.0120 0.0232 0.0227 0.0089 0.0110 0.0120 0.0100 0.0082 0.0117 0.0103 0.0061 0.0032 0.0093 0.0113 0.0103 0.0115 0.0040 0.0042 0.0155 0.0245 0.0111 0.0096 0.0065 0.0012 0.0090 0.0155 0.0110 0.0196 0.0059 0.0150 0.0047 0.0042 0.0132 0.0037 0.0087 0.0074 0.0024 0.0079 0.0072 0.0130 0.0026 0.0014 0.0045 0.0018 0.0081 0.0075 0.0045 0.0036 0.0069 0.0009 0.0068 0.0066 0.0095 0.0062 0.0076 0.0042 0.0030 0.0059 0.0022 0.0030 0.0052 0.0068 0.0079 0.0005 0.0094 0.0030 0.0062 0.0076 0.0011 0.0133 0.0017 0.0037 0.0015 0.0048 0.0031 0.0020 0.0010 0.0047 0.0054 0.0024 0.0042 0.0061 0.0018 0.0021 0.0097 0.0032 0.0084 0.0124 0.0047 0.0075 0.0039 0.0000 0.0042 0.0040 0.0008 0.0068 0.0018 0.0000 0.0035 0.0037 0.0030 0.0011 0.0005 0.0047 0.0072 0.0013 0.0048 0.0023 0.0087 0.0008 0.0038 0.0044 0.0045 0.0025 0.0007 0.0033 0.0028 0.0039 0.0024 0.0029 0.0033 0.0030 0.0000 0.0046 0.0011 0.0043 0.0028 0.0006 0.0019 0.0024 0.0009 0.0004 0.0045 0.0035 0.0024 0.0022 0.0007 0.0036 0.0012 0.0025 0.0000 0.0025 0.0005 0.0038 0.0000 0.0019 0.0012 0.0008 0.0017 0.0023 0.0036 0.0023 0.0036 0.0040 0.0005 0.0005 0.0014 0.0005 0.0005 0.0008 0.0044 0.0018 0.0000 0.0005 0.0016 0.0024 0.0030 0.0024 0.0006 0.0000 0.0008 0.0000 0.0000 0.0048 0.0022 0.0000 0.0039 0.0058 0.0000 0.0000 0.0000 0.0037 0.0000 0.0022 0.0012 0.0000 0.0012 0.0025 0.0000 0.0061 0.0000 0.0029 0.0023 0.0037 0.0000 0.0032 0.0000
I did my algorithm on each year individually so on array of 50X200.
My questions is
Q1: Doing the years individually is a pain, Is there anyway I can do the algorithm one time for all the years and graph it as 3D? I want to be the third dimension are the years, and have one 3D graph for the cluster for all the data
Q2: read all the data one time with the years and then get the results one time individually for each year, because currently I had to split my data text file for each year which is a pain.
My code:
from __future__ import division
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
import numpy
from numpy.random import *
import numpy as np
import random
from functools import partial
from sklearn.metrics import pairwise_distances_argmin_min
from scipy.spatial.distance import pdist,squareform
import pandas as pd
from functools import partial
from matplotlib import pyplot
def read_from_file(filename):
with open(filename) as f:
data = []
for line in f:
data.append([float(x) for x in line.split()[1:]])
return data #returned the data we read.
def main():
data = read_from_file("Data_Fcopy.txt") # This data is in scope for the initialize function now. :)
km = KMeans(n_clusters=9,init= 'random').fit(data)
print km
centers = km.cluster_centers_
labels = km.labels_
n = km.fit_transform(data)
#print n
print labels
numpy.set_printoptions(threshold=numpy.nan)
#print centers
numpy.set_printoptions(threshold=numpy.nan)
paired_data = []
for x in data:
closest, ignored = pairwise_distances_argmin_min(x, centers)
paired_data.append(closest)
new_list = [x+1 for x in paired_data]
#print paired_data
S = pd.DataFrame(new_list, columns=['x' 'center'])
print (S.to_string())
if __name__ == "__main__":
main() # This is the general (awkward) pattern for main functions in python.