1

I try SOM map analyze with package Kohonen. I used this tutorial : https://www.shanelynn.ie/self-organising-maps-for-customer-segmentation-using-r/ . This is my code :

 require(kohonen)
    data = matrix( 
        c(6, 6, 80, 280, 404, 0, 158, 158197, 158197233,
          6, 13, 80, 280, 404, 0, 158, 158197, 158197233,
          6, 13, 80, 283, 404, 0, 158, 158197, 158197233,
          6, 35, 80, 321, 301, 0, 158, 158197, 158197233,
          6, 35, 80, 3131, 200, 0, 158, 158197, 158197233,
          6, 35, 80, 20073, 200, 0, 158, 158197, 158197233,
          6, 35, 80, 183, 200, 0, 158, 158197, 158197233,
          6, 35, 80, 25, 302, 0, 158, 158197, 158197233,
          6, 35, 80, 13744, 200, 0, 158, 158197, 158197233,
          6, 35, 80, 280, 404, 0, 158, 158197, 158197233,
          9, 33, 80, 2859, 200, 0, 158, 158197, 15819736,
          9, 33, 80, 463, 200, 0, 158, 158197, 15819736,
          9, 33, 80, 2065, 200, 0, 158, 158197, 15819736,
          9, 33, 80, 1298, 200, 0, 158, 158197, 15819736,
          9, 33, 80, 86659, 200, 0, 158, 158197, 15819736,
          9, 33, 80, 942, 200, 0, 158, 158197, 15819736,
          9, 33, 80, 2027, 200, 0, 158, 158197, 15819736,
          9, 33, 80, 2979, 200, 0, 158, 158197, 15819736,
          9, 33, 80, 127390, 200, 0, 158, 158197, 15819736,
          9, 33, 80, 1150, 200, 0, 158, 158197, 15819736,
          9, 33, 80, 1150, 200, 0, 158, 158197, 15819736,
          9, 33, 80, 942, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 287, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 90, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 2, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 3314, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 483, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 2, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 543, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 439, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 689, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 797, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 732, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 5403, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 496, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 743, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 3049, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 3064, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 3042, 200, 0, 158, 158197, 15819736,
          9, 40, 80, 3077, 200, 0, 158, 158197, 15819736,
          12, 8, 80,305 ,301 , 0, 142, 1424, 14245,
          12, 8, 80, 10531,200 , 0, 142, 1424, 14245,
          12, 8, 80, 445,400 , 0, 142, 1424, 14245,
          12, 8, 80, 445,400 , 0, 142, 1424, 14245,
          12, 8, 80, 445,400 , 0, 142, 1424, 14245,
          12, 8, 80, 324,400 , 0, 142, 1424, 14245,
          12, 8, 80, 445,400 , 0, 142, 1424, 14245,
          0, 0, 80,0 ,0 , 0, 42, 424, 4245,
          12, 25, 80, 171,200 , 0, 42, 424, 4245,
          12, 25, 80, 2970,200 , 0,42, 424, 4245,
          12, 25, 80, 171,200 , 0, 42, 424, 4245,
          12, 25, 80, 2970,200 , 0, 42, 424, 4245,
          12, 25, 80, 171,200 , 0, 42, 424, 4245,
          12, 25, 80, 2970,200 , 0, 42, 424, 4245,
          15, 32, 80, 2860,200 , 0, 88, 88212, 8821237,
          15, 32, 80, 2859,200 , 0, 88, 88212, 8821237,
          15, 32, 80, 86659,200 , 0, 88, 88212, 8821237,
          15, 32, 80, 22495,200 , 0, 88, 88212, 8821237,
          15, 32, 80, 949,200 , 0, 88, 88212, 8821237,
          15, 32, 80, 1298,200 , 0, 88, 88212, 8821237,
          15, 32, 80, 2027,200 , 0, 88, 88212, 8821237,
          15, 32, 80, 2979,200 , 0, 88, 88212, 8821237,
          15, 32, 80, 103646,200 , 0, 88, 88212, 8821237,
          15, 32, 80, 406,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 1169,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 212,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 2250,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 8496,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 0,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 946,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 716263,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 63,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 63,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 1573,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 63,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 974,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 63,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 926,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 1150,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 800,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 225,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 79,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 408,200 , 0, 88, 88212, 8821237,
          9, 34, 80, 535,200 , 0, 88, 88212, 8821237,
          3, 21, 80, 208,404 , 0, 207, 20746, 2074613,
          3, 49, 80, 1298,200 , 0, 207, 20746, 2074613,
          5, 17, 80, 302,230 , 0, 207, 20746, 2074613,
          8, 28, 80, 2857,200 , 0, 5, 5188, 518810,
          13, 08, 80, 2860,200 , 0, 5, 5188, 518810,
          14, 08, 80, 2860,200 , 0, 5, 5188, 518810,
          15, 58, 80, 208,404 , 0, 66, 66249, 6624966,
          15, 58, 80, 463,200 , 0, 66, 66249, 6624966,
          15, 58, 80, 2065,200 , 0, 66, 66249, 6624966,
          15, 58, 80, 2065,200 , 0, 66, 66249, 6624966,
          13, 05, 80, 608,200 , 0, 88, 88212, 8821240,
          13, 08, 80, 608,200 , 0, 88, 88212, 8821240,
          13, 11, 80, 608,200 , 0, 88, 88212, 8821240,
          13, 14, 80, 608,200 , 0, 88, 88212, 8821240,
          13, 17, 80, 608,200 , 0, 88, 88212, 8821240,
          13, 20, 80, 608,200 , 0, 88, 88212, 8821240 ), 

         nrow=100,             
         ncol=9,              
         byrow = TRUE)        

    data_train <- data[, c(1,2,4,5,7,8,9)]
    #data_train <- data[, c(2,4,5,8)]
    data_train_matrix <- as.matrix(scale(data_train))
    som_grid <- somgrid(xdim = 3, ydim=4, topo="hexagonal")



    som_model <- som(data_train_matrix, 
      grid=som_grid, 
    rlen=500, 
     alpha=c(0.05,0.01), 
     keep.data = TRUE )


    #training proces
    plot(som_model, type="changes")
    #nodes 
    plot(som_model, type="count", main="Node Counts")
    #distance
    #plot(som_model, type="dist.neighbours", main = "SOM neighbour distances")
    #codes and weight vectors
    #plot(som_model, type="codes")
    #heatmap
    plot(som_model, type = "property", property = getCodes(som_model)[,4], main="Heat map - parameter 4")

And this is my map visualization :

enter image description here

Map is OK. My question is: is there some way how to find which data was in white node ? I look at getCodes(som_model) but there is just map numbers. So I known that white node is V4. But what data was in node V4 ? I look into all som_model values, but it dont help. Any ideas ?

> getCodes(som_model)[,4]
          V1           V2           V3           V4           V5           V6           V7           V8 
-0.727734454 -0.183272487 -0.342681905  2.361366190 -0.343764866 -0.343764866  1.298987948 -0.343532184 
          V9          V10          V11          V12 
-0.343764307  1.350552793 -0.003492471 -0.343764866 

I want result for example like this :

    > inV4
[1]  2 25  0

Is possible do this in Kohonen package?

UPDATE : It look it is not possible do it easy in Kohonen pack. So I try this :

# find which node is white
q <- getCodes(som_model)[,4]
for (i in 1:length(q)){
  if(q[i]>2){
    t<- q[i]

  }
}
# find name od node 
node <- names(t)
#remove "V" letter from node name
mynode <- gsub("V","",node)

#find which node has which input ???
mydata2 <- som_model$unit.classif
print (myadat2)

#choose just imputs which go to right node
result <- vector('list',length(mydata2))
for (i in 1:length(mydata2)){
  result <- som_model$unit.classif== mynode
}

#remove FALSE results
result2 <- which(result == TRUE)

#write all input line 
for (i in 1:length(result2)){
  print (data[result2[i],])
}

But I am not sure if this is right way. And I am not sure if this give me right result inputs. Is there any way how to check it ?

caroline
  • 161
  • 2
  • 15

1 Answers1

0

Sorry if this is too late to help you. It may help someone else.

Because you did not set the random seed, we cannot exactly reproduce your result. When I ran your code, I got the plot

SOM

and I got the codes:

getCodes(som_model)[,4]
        V1         V2         V3         V4         V5         V6         V7 
-0.3437649 -0.3437649  2.3146262  0.4037323 -0.3437649 -0.6034393 -0.3434484 
        V8         V9        V10        V11        V12 
 0.6730257  0.2089917 -0.3437649 -0.1038754  2.6302823 

For me, the white node is V12. You can get the data points that are in V12 by looking at the unit classifications that are part of the som object.

som_model$unit.classif
  [1] 12 12 12  9  9  9  9  9  9 12 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
 [26] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10  3  2  3  3  3  3  3  6  6  6
 [51]  6  6  6  6  1  1  1  1  1  1  1  1  1  1  5  5  5  5  5  5  7  5  5  5  5
 [76]  5  5  5  5  5  5  5  5  5  8  8  8  6  6  6  4  4  4  4  2  2  2  2  2  2

To get which points are in V12, you can simply use:

which(som_model$unit.classif == 12)
[1]  1  2  3 10
G5W
  • 36,531
  • 10
  • 47
  • 80