-1

I have a dataframe in R that looks like this:

dataframe

And then another weights dataframe that looks like this: Weights

What I want to do is apply the weights to the dataframe - which is fairly simple and I can do it by using the following code:

dataframe$month <- as.numeric(dataframe$month)
dataframe_weight<-dataframe

for (i in 1:15){
  dataframe_weight[i,]<-dataframe[i,]*weights
}

Which returns me the following dataframe: Applied weights

However, this does not take into account for the NA's. What I need to do is rescale the weights somehow, so that they equate to 1 across all rows, but each age still has the proportionate weighting. For example, in month 201408, age1 the value should still be 1 when the weights are applied, because there are no other values and so that value gets all the weight. For the second month, 201409, as there are only two values, proportionately age1 will get approx. 53% of the weight and age2 47% of the weight (0.1809143/(0.1809143+0.1590556))

I am stumped with how to automate this (began to try various ways and didn't get very far), and not do it manually (as I have many dataframes that I need to do this for). I have searched and not found any questions relating to this, or that I can work off. I hope this question makes sense. You can replicate my dataframes using the following code:

month <- c("201408", "201409", "201410", "201411", "201412", "201501", "201502", "201503", "201504", "201505", "201506", "201507", "201508", "201509", "201510")
age1 <- c(1, 0.9464432, 0.9661004, 2.2874682, 0.6786986, 0.7456758, 1.1342144, 0.9981846, 1.0592016, 0.8341938, 1.1630893, 0.9972508, 1.0716317, 1.0424335, 1.075181)
age1 <- data.frame(month, age1)

month <- c("201409", "201410", "201411", "201412", "201501", "201502", "201503", "201504", "201505", "201506", "201507", "201508", "201509", "201510")
age2 <- c(1, 0.9397603, 1.0692599, 2.2361409, 0.5877691, 0.8220721, 1.087845, 0.9934881, 1.0479094, 0.8770588, 1.107826, 1.0017968, 1.0764996,1.034393)
age2 <- data.frame(month, age2)

month <- c("201410", "201411", "201412", "201501", "201502", "201503", "201504", "201505", "201506", "201507", "201508", "201509", "201510")
age3 <- c(1, 0.9078398, 1.0619787, 1.4231532, 0.937846, 0.8444599, 1.0654393, 1.0079098, 0.994476, 0.6992733, 1.4121658, 1.025296, 1.0913576)
age3 <- data.frame(month, age3)

month <- c("201411", "201412", "201501", "201502", "201503", "201504", "201505", "201506", "201507", "201508", "201509", "201510")
age4 <- c(1, 0.8942244, 0.9099405, 1.5851158, 1.0059785, 0.8506144, 1.0508878, 0.9639585, 0.6992876, 1.0276086, 1.4123104, 1.0038351)
age4 <- data.frame(month, age4)

month <- c("201412", "201501", "201502", "201503", "201504", "201505", "201506", "201507", "201508", "201509", "201510")
age5 <- c(1, 0.7264975, 1.1133892, 1.4952122, 1.0502483, 0.8943884, 1.0049447, 0.7233516, 0.9075124, 1.1223967, 1.2951269)
age5 <- data.frame(month, age5)

month <- c("201501", "201502", "201503", "201504", "201505", "201506", "201507", "201508", "201509", "201510")
age6 <- c(1, 0.9679026, 1.0168767, 1.5844894, 1.0294516, 0.9014677, 0.6664228, 1.0717137, 0.8909056, 1.1459715)
age6 <- data.frame(month, age6)

month <- c("201502", "201503", "201504", "201505", "201506", "201507", "201508", "201509", "201510")
age7 <- c(1, 0.9403795, 1.1877307, 1.359906, 1.1427003, 0.5717126, 0.9550687, 1.1257902, 0.8886474)
age7 <- data.frame(month, age7)

month <- c("201503", "201504", "201505", "201506", "201507", "201508", "201509", "201510")
age8 <- c(1, 0.9701066, 1.1289901, 1.4153004, 0.756067, 0.7669884, 1.0004406, 1.1310102)
age8 <- data.frame(month, age8)

month <- c("201504", "201505", "201506", "201507", "201508", "201509", "201510")
age9 <- c(1, 0.8378029, 1.3229611, 0.9690153, 1.0648304, 0.7414129, 1.0042986)
age9 <- data.frame(month, age9)

month <- c("201505", "201506", "201507", "201508", "201509", "201510")
age10plus <- c(1, 0.9856009, 0.9402859, 0.9949159, 1.0224494, 0.9917433) 
age10plus <- data.frame(month, age10plus)

library(dplyr)
library(purrr)
dataframe <- list(age1, age2, age3, age4, age5, age6, age7, age8, age9, age10plus) %>% reduce(left_join, by= "month")


weights <- c(0.18091432, 0.15905558, 0.13518614, 0.11459798, 0.09552710, 0.07757876, 0.06265265, 0.05057607, 0.03761133, 0.08630005)
weights <- data.frame(cbind(c(1), t(weights)))




dataframe$month <- as.numeric(dataframe$month)
dataframe_weight<-dataframe

for (i in 1:15){
  dataframe_weight[i,]<-dataframe[i,]*weights
}
Joshua
  • 40,822
  • 8
  • 72
  • 132
Jay J
  • 155
  • 1
  • 12

2 Answers2

0
#more appropriate data structures
m <- as.matrix(dataframe[,-1])
rownames(m) <- dataframe[, 1]

weights <- as.numeric(weights)

#first value of weights seems superfluous
weights <- weights[-1]

#create matrix of normalized weights
w <- t(outer(weights, 
             c((!is.na(m)) %*% weights), #matrix multiplication to sum weights for non-NA values
             "/"))

#check that weights sum to 1
is.na(w) <- is.na(m) 
rowSums(w, na.rm = TRUE)
#[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

#multiply
m * w
#            age1      age2       age3       age4       age5       age6       age7       age8       age9  age10plus
#201408 1.0000000        NA         NA         NA         NA         NA         NA         NA         NA         NA
#201409 0.5036479 0.4678519         NA         NA         NA         NA         NA         NA         NA         NA
#201410 0.3678400 0.3145790 0.28450894         NA         NA         NA         NA         NA         NA         NA
#201411 0.7017091 0.2883774 0.20809923 0.19431488         NA         NA         NA         NA         NA         NA
#201412 0.1791765 0.5190143 0.20949767 0.14953908 0.13939841         NA         NA         NA         NA         NA
#201501 0.1768391 0.1225493 0.25219649 0.13669266 0.09097372 0.10169464         NA         NA         NA         NA
#201502 0.2485675 0.1583927 0.15358190 0.22004641 0.12883977 0.09096008 0.07589546         NA         NA         NA
#201503 0.2061274 0.1975004 0.13030563 0.13158841 0.16303521 0.09004573 0.06725035 0.05772940         NA         NA
#201504 0.2097239 0.1729450 0.15763668 0.10668567 0.10980320 0.13453292 0.08144301 0.05369835 0.04116377         NA
#201505 0.1509176 0.1666758 0.13625544 0.12042962 0.08543833 0.07986358 0.08520172 0.05709988 0.03151088 0.08630005
#201506 0.2104195 0.1395011 0.13443937 0.11046770 0.09599945 0.06993475 0.07159320 0.07158033 0.04975833 0.08505741
#201507 0.1804170 0.1762059 0.09453206 0.08013695 0.06909968 0.05170026 0.03581931 0.03823890 0.03644595 0.08114672
#201508 0.1938735 0.1593414 0.19090525 0.11776187 0.08669203 0.08314222 0.05983759 0.03879126 0.04004969 0.08586129
#201509 0.1885912 0.1712233 0.13860581 0.16184792 0.10721930 0.06911535 0.07053374 0.05059835 0.02788553 0.08823744
#201510 0.1945156 0.1645260 0.14753642 0.11503748 0.12371972 0.08890305 0.05567612 0.05720205 0.03777301 0.08558750
Roland
  • 127,288
  • 10
  • 191
  • 288
  • Hi, thanks for this but the weight per year doesn't exactly add up to 1, which it should! – Jay J Aug 24 '18 at 15:18
  • Maybe your explanation in the question is insufficient. `is.na(w) <- is.na(m); rowSums(w, na.rm = TRUE)` returns a vector of ones and in the second row we have 53 % and 47 % as required. – Roland Aug 27 '18 at 05:53
  • Thanks - yes the final output is what I should get but I was confused as to why the 'w' table was fully populated when there shouldn't be any values past the first 2 in the second row, there should be NA's. & because of this I can't QA the weights table by aggregating columns and ensuring they add to 1 (which I need to do). I did explain in the question that I needed the weights to equate to 1, apologies if I could've made it clearer. – Jay J Aug 29 '18 at 16:39
  • Well, my previous comment should help you then. I just didn't bother before because you claimed to just need the end result. – Roland Aug 29 '18 at 16:45
0
dataframe$month <- as.numeric(dataframe$month)

Creating a dataframe of 1's in the formal I need to answer the above question

dataframe_weight <- dataframe
dataframe_weight[!is.na(dataframe_weight)] <- 1
dataframe_weight[,1] <- dataframe$month

Multiply new dataframe (dataframe of 1's) by weights dataframe

rescaled_weight<-dataframe_weight

    for (i in 1:15){
         rescaled_weight[i,]<-dataframe_weight[i,]*weights
    }

Rescale/normalize the weights

rescaled_weight <- rescaled_weight[,-1]/rowSums(rescaled_weight[,-1], na.rm=T)

Check new weights sum to 1

rescaled_weight <- rescaled_weight %>%
  mutate(aggregate=rowSums(rescaled_weight[,1:10], na.rm=TRUE))

rescaled_weight <- rescaled_weight[,-11]
dataframe <- dataframe[,-1]

Apply weights to original dataframe

weightsapplied <- rescaled_weight * dataframe
Jay J
  • 155
  • 1
  • 12
  • As long as dplyr is installed and all of the code listed in the question, it works just fine. It does for me anyway. What errors are you receiving? – Jay J Aug 29 '18 at 16:40