1

Suppose you have a data frame:

TF_name  L  R
      A  1  5
      B 10 15
      C 17 18

What would be the best way to manipulate this in such a way to make it look like this?

TF_name  Position
      A         1
      A         2
      A         3
      A         4
      A         5
      B        10
      B        11
      B        12
      B        13
      B        14
      B        15
      C        17
      C        18

EDIT: I've been trying out all the answers, however none seem to work on this particular data frame v. Is the way I am creating the data frame why these methods are not working?

f <- 'GADANCGGCCTAGATGTGAT'

motifs = c('GA','GC','CT','AG','AT')

v <- na.omit(data.frame(do.call(rbind, lapply(stri_locate_all_regex(f, motifs), unlist))))
v <- data.frame(v,Legend=na.omit(unlist(stri_extract_all_regex(f,motifs))))
v <- v[order(v[,1]),]
v <- v[c(3,1,2)]

organizer <- function(df,tracknom) {
  names(df)<-c("V1","V2","V3")
  newdf <- data.frame(
    Name=rep(df$V1, df$V3-df$V2 + 1),
    Track=tracknom,
    Position=unlist(mapply(seq, df$V2, df$V3))
  )
  newdf
}

v <- organizer(v,1)
v
   Name Track Position.1 Position.2 Position.3 Position.4 Position.5 Position.6 Position.7 Position.8
1    GA     1          1          8         10         12         13         14         18         19
2    GA     1          2          9         11         13         14         15         19         20
3    GC     1          1          8         10         12         13         14         18         19
4    GC     1          2          9         11         13         14         15         19         20
5    CT     1          1          8         10         12         13         14         18         19
6    CT     1          2          9         11         13         14         15         19         20
7    AG     1          1          8         10         12         13         14         18         19
8    AG     1          2          9         11         13         14         15         19         20
9    GA     1          1          8         10         12         13         14         18         19
10   GA     1          2          9         11         13         14         15         19         20
11   AT     1          1          8         10         12         13         14         18         19
12   AT     1          2          9         11         13         14         15         19         20
13   GA     1          1          8         10         12         13         14         18         19
14   GA     1          2          9         11         13         14         15         19         20
15   AT     1          1          8         10         12         13         14         18         19
alki
  • 3,334
  • 5
  • 22
  • 45

2 Answers2

2

Try

library(data.table)#v1.9.4+
setDT(df1)[, list(Position=L:R), TF_name] 
#   TF_name Position
# 1:       A        1
# 2:       A        2
# 3:       A        3
# 4:       A        4
# 5:       A        5
# 6:       B       10
# 7:       B       11
# 8:       B       12
# 9:       B       13
#10:       B       14
#11:       B       15
#12:       C       17
#13:       C       18

Update

 setDT(v)[, list(Position=start:end), .(Legend, 1:nrow(v))][,nrow:= NULL][]
 #    Legend Position
 #1:     GA        1
 #2:     GA        2
 #3:     GC        8
 #4:     GC        9
 #5:     CT       10
 #6:     CT       11
 #7:     AG       12
 #8:     AG       13
 #9:     GA       13
 #10:    GA       14
 #11:    AT       14
 #12:    AT       15
 #13:    GA       18
 #14:    GA       19
 #15:    AT       19
 #16:    AT       20
akrun
  • 874,273
  • 37
  • 540
  • 662
1

one way in base R (use mapply to give the start/end values to seq; join together; use rep to make the TF_names to the right length).

newdf <- data.frame(
  TF_name=rep(df$TF_name, df$R-df$L+1),
  Position=unlist(mapply(seq, df$L, df$R, SIMPLIFY=F))
)

Another way using plyr:

library(plyr)
ddply(df, .(TF_name), summarize, Position=seq(L, R))

I'm sure there are more - your pick really.


Update in response to question change....

Same thing, just change the column names. Also, use an explicit SIMPLIFY=F in the mapply call, it just so happens that all your ends are exactly 1 more than your starts so mapply simplifies the result to a matrix rather than a list, causing unlist to do nothing.

(This time I added a with, but that's because I'm too lazy to type v$ in front of everything. You could omit and do v$start etc instead if you liked).

with(v,
  data.frame(
    Legend=rep(Legend, end-start+1),
    Position=unlist(mapply(seq, start, end, SIMPLIFY=F))
  )
)
mathematical.coffee
  • 55,977
  • 11
  • 154
  • 194