-1

i have two dataframes

prop=structure(list(KOD_NAR.id = structure(c(1L, 1L, 2L, 9L, 15L, 
16L, 17L, 3L, 4L, 18L, 5L, 6L, 19L, 20L, 7L, 8L, 21L, 10L, 11L, 
12L, 13L, 14L), .Label = c("", "-1", "04А  ", "04Б  ", "05А  ", 
"05Б  ", "07Д  ", "07С  ", "1", "10", "11", "12Г  ", "13", "15", 
"2", "3", "4", "5", "6", "7", "9"), class = "factor"), X1000494 = structure(c(4L, 
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 3L), .Label = c("", "0.00425531914893617", "0.0106382978723404", 
"0.848936170212766"), class = "factor"), X1000495 = structure(c(4L, 
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 3L), .Label = c("", "0.00651465798045603", "0.0293159609120521", 
"0.892508143322475"), class = "factor"), X1000496 = structure(c(4L, 
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
1L, 1L, 1L, 1L, 3L), .Label = c("", "0.00366300366300366", "0.0366300366300366", 
"0.835164835164835"), class = "factor"), X1000500 = structure(c(3L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 2L), .Label = c("", "0.0161290322580645", "1.09032258064516"
), class = "factor")), .Names = c("KOD_NAR.id", "X1000494", "X1000495", 
"X1000496", "X1000500"), class = "data.frame", row.names = c(NA, 
-22L))

The second

mash=structure(list(KOD_NAR.id = structure(c(1L, 8L, 16L, 17L, 18L, 
2L, 3L, 19L, 4L, 5L, 20L, 21L, 6L, 7L, 22L, 9L, 10L, 11L, 12L, 
13L, 14L, 15L), .Label = c("-1", "04А  ", "04Б  ", "05А  ", "05Б  ", 
"07Д  ", "07С  ", "1", "10", "11", "12Г  ", "13", "15", "16", 
"16А  ", "2", "3", "4", "5", "6", "7", "9"), class = "factor"), 
    X1000494 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, 0L, 0L, NA), X1000495 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, 1L, 1L, NA), X1000496 = c(NA, NA, NA, NA, NA, 
    NA, NA, 0L, NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, 2L, 
    0L, NA), X1000500 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L, 0L, NA)), .Names = c("KOD_NAR.id", 
"X1000494", "X1000495", "X1000496", "X1000500"), class = "data.frame", row.names = c(NA, 
-22L))

i want merge these dataframes but in a special way, namely: the file structure is such that by the rows the error's code of the driver, and by the columns the driver's ID. How can i join to the metric data of prop dataframe the nominal data of mash dataframe by corresponding KOD_NAR.id. and then after joining load excel file( write.xlsx) with marked cells by color . If in the cell in brackets 0, then color is green, if 1, then color is yellow, if 2, then color of cells is red

So output: enter image description here

is there way to do it?

TobiO
  • 1,335
  • 1
  • 9
  • 24
psysky
  • 3,037
  • 5
  • 28
  • 64
  • Exporting to Excel and applying color formatting and joining two data.frames are two very unrelated tasks. Probably best suited for two questions. Also: What have you tried, where are you stuck? – s_baldur Jul 10 '19 at 13:51

1 Answers1

1

I'm sure there are more elegant ways to do it (I'm bad with apply), but this one works.

There was a little problem with the datasets you provided

library(openxlsx)   
library(tidyr)

style0=createStyle(fgFill = "green") #here you can change colors
style1=createStyle(fgFill = "yellow")
style2=createStyle(fgFill = "red")

prop$KOD_NAR.id=as.character(prop$KOD_NAR.id)

prop$KOD_NAR.id[1:2]=c("16","16a") #there must have been something wrong with the data, these two codes found in mash are not in prop
prop$KOD_NAR.id=factor(prop$KOD_NAR.id)
mash_long=mash %>% gather(key="ID",value="mash",-KOD_NAR.id)
prop_long=prop %>% gather(key="ID",value="prop",-KOD_NAR.id)

df=full_join(mash_long,prop_long) # KOD_NAR.id in prop had empty cells
df$mash[is.na(df$mash)]=""
df$prop[is.na(df$prop)]=""
df$cellcontent=ifelse(df$mash=="",df$prop,paste0(df$prop," (",df$mash,")"))

df_write=df %>% select(ID,cellcontent,KOD_NAR.id,starts_with("X")) %>% spread(ID,cellcontent)

wb=createWorkbook("workbook")
addWorksheet(wb,"Info")
writeData(wb,sheet = 1,df_write)
for(i in 2:ncol(df_write)){
  for(k in 1:nrow(df_write)){
    if(grepl("\\(",df_write[k,i])){
      addStyle(wb=wb,sheet = 1,rows = k+1,cols = i, #there will be a header row
               style=get(paste0("style",gsub(".*\\((\\d+)\\)","\\1",df_write[k,i]))))
    }
  }
}
saveWorkbook(wb,"example.xlsx",overwrite = T)
TobiO
  • 1,335
  • 1
  • 9
  • 24
  • thanx, good answer, but what does mean Error: Each row of output must be identified by a unique combination of keys. Keys are shared for 258350 rows: When i use my full datasets? – psysky Jul 12 '19 at 09:04
  • 1
    I guess this error comes from the line with the `%>%spread` ? This means, that you have multiple cases, where the same combination of KOD_NAR and ID appears. You would need another Identifier, or you could do the merging of the dataframes differently (directly by cell without melting it). In this case you would do everything in the for loops, but this should actually be done with apply-functions. Or directly in excel with a vba-macro – TobiO Jul 12 '19 at 12:20
  • 1
    What you also could do is to carry the row-numbers as a dummy variable for the spread – TobiO Jul 12 '19 at 12:41