install.packages("iterators")
library(iterators)
install.packages("dplyr")
library(dplyr)
ActiveAssets <- c(0, 23, 56, 12, 69)
clusteNum <- c(1, 2, 1, 3, 2)
dbname.servername <- c(1, 2, 3, 4, 5)
dbname <- c('ABC', 'DEF', 'GHI', 'JKL', 'MNO')
Rank <- c(1, 2, 3, 4, 5)
mean.value <- c(123, 456, 789, 159, 357)
NewServer <- c('0', '0', '0', '0', '0')
df <- as.data.frame(cbind(ActiveAssets, clusteNum, dbname.servername,
dbname, Rank, mean.value, NewServer))
servermeanAsset <-aggregate(df$ActiveAssets ~ df$dbname.servername, df ,
mean)
overallmean <- mean(servermeanAsset$`df$ActiveAssets`)
compare <- iter(df, by='row')
R <- {
currentedit <- nextElem(compare)
filtered <- filter(df, currentedit$dbname.servername !=
df$dbname.servername & currentedit$ActiveAssets < df$ActiveAssets))
ordered <- filtered[with(filtered, order(filtered$mean.value,
filtered$ActiveAssets)),]
slice <- slice(ordered, 1)
df %>% mutate(NewServer = replace(slice$dbname.servername,
currentedit$dbname == df$dbname & currentedit$Rank == df$Rank,
slice$dbname.servername))
}
Which returns the output for the first run:
ActiveAssets clusteNum dbname.servername dbname Rank mean.value NewServer
0 1 1 ABC 1 123 1
23 2 2 DEF 2 456 <NA>
56 1 3 GHI 3 789 <NA>
12 3 4 JKL 4 159 <NA>
69 2 5 MNO 5 357 <NA>
Which returns the output for the second run:
ActiveAssets clusteNum dbname.servername dbname Rank mean.value NewServer
0 1 1 ABC 1 123 1
23 2 2 DEF 2 456 <NA>
56 1 3 GHI 3 789 1
12 3 4 JKL 4 159 <NA>
69 2 5 MNO 5 357 <NA>
What I want it to return in the first run:
ActiveAssets clusteNum dbname.servername dbname Rank mean.value NewServer
0 1 1 ABC 1 123 2
23 2 2 DEF 2 456 0
56 1 3 GHI 3 789 0
12 3 4 JKL 4 159 0
69 2 5 MNO 5 357 0
What I want to return for the second run:
ActiveAssets clusteNum dbname.servername dbname Rank mean.value NewServer
0 1 1 ABC 1 123 2
23 2 2 DEF 2 456 3
56 1 3 GHI 3 789 0
12 3 4 JKL 4 159 0
69 2 5 MNO 5 357 0
What I want to return for the third run:
ActiveAssets clusteNum dbname.servername dbname Rank mean.value NewServer
0 1 1 ABC 1 123 2
23 2 2 DEF 2 456 3
56 1 3 GHI 3 789 5
12 3 4 JKL 4 159 0
69 2 5 MNO 5 357 0
What I want to return for the fourth run:
ActiveAssets clusteNum dbname.servername dbname Rank mean.value NewServer
0 1 1 ABC 1 123 2
23 2 2 DEF 2 456 3
56 1 3 GHI 3 789 5
12 3 4 JKL 4 159 1
69 2 5 MNO 5 357 0
What I want to return for the fifth run:
ActiveAssets clusteNum dbname.servername dbname Rank mean.value NewServer
0 1 1 ABC 1 123 2
23 2 2 DEF 2 456 3
56 1 3 GHI 3 789 5
12 3 4 JKL 4 159 1
69 2 5 MNO 5 357 0
-- the fifth run shouldn't move because there isn't another row with a
different server value and and ActiveAsset value more than its value.
So what I am attempting to do is to look at the data I have on a one row basis (I will put this in a loop later on that will loop through until the newserver column has no '0' fields left in it) and row by row go through the data. By row I want to grab the smallest database (determined by the active asset) that isn't in the same server as the selected row and then take the servername from that other row and write it to the newserver column in the currently selected row.
What I am finding is I can step through the dataset row by row and it filters/orders/slices the data just fine but when I try to write the servername of the slice to the newserver column of df it will overwrite what is in other rows of that column negating any return that was given in previous runs.
I have read through and tried recommendations from the following links for reference: Look for cell within a data frame and replace its value without loops R
R: conditionally replace values in loop
Populating a data frame in R in a loop
For loop with if condition on multiple R functions
R An if else statement inside a for loop
What am I doing wrong?
I need assistance finding out how to get that last row to write to the newserver column for one row only (the row in the currentedit) and not alter any other row in the newserver column.
Edit: Provided output of the code.