3

I would like to modify an input function, so that the expressions always call `{`(), and doing so, keep the comments at the right place.

Here is an example :

input_fun <- function(){

  if(TRUE)
    foo
  else
    # bar
    bar

  if(FALSE) {
    this
    # baz
    baz
    that
  }

  repeat
    while(condition)
      # qux
      qux
}

cat(deparse(input_fun, control = "useSource"),sep ="\n")
#> function(){
#>   
#>   if(TRUE)
#>     foo
#>   else
#>     # bar
#>     bar
#>   
#>   if(FALSE) {
#>     this
#>     # baz
#>     baz
#>     that
#>   }
#>   
#>   repeat
#>     while(condition)
#>       # qux
#>       qux
#> }

The output would be the following output_fun or similar, where similar means that inserting or removing new lines before/after { or } is not important, and nor is indentation.

I also don't mind losing comments that are not on their own line (though I slightly better to keep them).

output_fun <- function(){
  if(TRUE){
    foo
  } else {
    # bar
    bar
  }

  if(FALSE) {
    this
    # baz
    baz
    that
  }

  repeat {
    while(condition){
    # qux
    qux
    }
  }
}

cat(deparse(output_fun, control = "useSource"),sep ="\n")
#> function(){
#>   if(TRUE){
#>     foo
#>   } else {
#>     # bar
#>     bar
#>   }
#>     
#>   if(FALSE) {
#>     this
#>     # baz
#>     baz
#>     that
#>   }
#>   
#>   repeat {
#>     while(condition){
#>     # qux
#>     qux
#>     }
#>   }
#> }

Maybe something can be done by keeping a count of control flow constructs and opened brackets, or maybe we should go through the parse tree of the input function, edit to add the { and find a way to plug back the comments from the original srcref at the right place, but I'm a bit stuck, any method will do.


edit :

We might be able to use this :

repair <- function(call){
  if(!is.call(call)) {
    return(call)
  }

  # if
  if(call[[1]] == quote(`if`)) {
    if(!is.call(call[[3]]) || call[[3]][[1]] != quote(`{`)){
      call[[3]] <- as.call(list(quote(`{`), call[[3]]))
    } 
    if(length(call) == 4 && (!is.call(call[[4]]) || call[[4]][[1]] != quote(`{`))){
      call[[4]] <- as.call(list(quote(`{`), call[[4]]))
    }
    call[-1] <- lapply(as.list(call[-1]), repair)
    return(call)
  } 

  # for
  if(call[[1]] == quote(`for`)) {
    if(!is.call(call[[4]]) || call[[4]][[1]] != quote(`{`)){
      call[[4]] <- as.call(list(quote(`{`), call[[4]]))
    } 
    call[-1] <- lapply(as.list(call[-1]), repair)
    return(call)
  } 

  # repeat
  if(call[[1]] == quote(`repeat`)) {
    if(!is.call(call[[2]]) || call[[2]][[1]] != quote(`{`)){
      call[[2]] <- as.call(list(quote(`{`), call[[2]]))
    } 
    call[-1] <- lapply(as.list(call[-1]), repair)
    return(call)
  } 

  # while
  if(call[[1]] == quote(`while`)) {
    if(!is.call(call[[3]]) || call[[3]][[1]] != quote(`{`)){
      call[[3]] <- as.call(list(quote(`{`), call[[3]]))
    } 
    call[-1] <- lapply(as.list(call[-1]), repair)
    return(call)
  } 

  #
  call[] <- lapply(call, repair)
  call  
}

output_fun0 <- input_fun
body(output_fun0) <- repair(body(input_fun))
output_fun0
#> function () 
#> {
#>     if (TRUE) {
#>         foo
#>     }
#>     else {
#>         bar
#>     }
#>     if (FALSE) {
#>         this
#>         baz
#>         that
#>     }
#>     repeat {
#>         while (condition) {
#>             qux
#>         }
#>     }
#> }
moodymudskipper
  • 46,417
  • 11
  • 121
  • 167
  • Proof of concept: `sourcecode <- attr(input_fun, "srcref"); body(input_fun)[[2]][[3]] <- quote({foo}); attr(input_fun, "srcref") <- sourcecode; attr(attr(input_fun, "srcref"), "srcfile")$lines <- gsub("foo", "{\nfoo\n}", attr(attr(input_fun, "srcref"), "srcfile")$lines)` – Roland Oct 18 '19 at 12:45
  • but finding `foo` is the hard part :) – moodymudskipper Oct 18 '19 at 12:52
  • I think it might be done by moving the comment into the call that follows them, so baz will become '#' ("# baz", baz). We loop through the source, record a comment block when we encounter one, then replace the first symbol we meet by a call containing the comment as text. Then apply my repair function, then go through the parse tree again or use regex to free the comments from '#' () – moodymudskipper Oct 18 '19 at 15:37

1 Answers1

0

Disclaimer: this is going to be long and twisted

I provide here an improved example including corner cases, and show the main steps.

The functions I used are at the bottom. They're not very well commented so shoot if you need them edited with clarifications.

data

input_fun <- function(){

  if(TRUE)
    foo
  else
    # bar_com1
    # bar_com2
    bar({
      x({y})
    }) %in% z

  # if
  if(
    FALSE) {
    this
    # baz_com
    baz
    that
  }

  repeat
    while(condition)
      # qux_com
      qux
}

Solution

We nest comment in the code by hiding them in the next call as the first argument of a `#`() function

output_fun <- nest_comments(input_fun)
output_fun
#> function () 
#> {
#>     if (TRUE) 
#>         foo
#>     else `#`("    # bar_com1\n    # bar_com2", bar)({
#>         x({
#>             y
#>         })
#>     }) %in% z
#>     `#`("  # if", if (FALSE) {
#>         this
#>         `#`("    # baz_com", baz)
#>         that
#>     })
#>     repeat while (condition) `#`("      # qux_com", qux)
#> }

We "repair" the function, adding explicit { calls where missing in control flow constructs

body(output_fun) <- repair_call(body(output_fun))
output_fun
#> function () 
#> {
#>     if (TRUE) {
#>         foo
#>     }
#>     else {
#>         `#`("    # bar_com1\n    # bar_com2", bar)({
#>             x({
#>                 y
#>             })
#>         }) %in% z
#>     }
#>     `#`("  # if", if (FALSE) {
#>         this
#>         `#`("    # baz_com", baz)
#>         that
#>     })
#>     repeat {
#>         while (condition) {
#>             `#`("      # qux_com", qux)
#>         }
#>     }
#> }

We go back to the new parse tree and extract the #() calls into independent calls above the "host" call

body(output_fun) <- unnest_comments(body(output_fun))
output_fun
#> function () 
#> {
#>     if (TRUE) {
#>         foo
#>     }
#>     else {
#>         `#`("    # bar_com1\n    # bar_com2")
#>         bar({
#>             x({
#>                 y
#>             })
#>         }) %in% z
#>     }
#>     `#`("  # if")
#>     if (FALSE) {
#>         this
#>         `#`("    # baz_com")
#>         baz
#>         that
#>     }
#>     repeat {
#>         while (condition) {
#>             `#`("      # qux_com")
#>             qux
#>         }
#>     }
#> }

Now we can use regex to set the comments back to their standard form.

output_fun <- regularize_comments(output_fun)
output_fun
#> function () 
#> {
#>     if (TRUE) {
#>         foo
#>     }
#>     else {
#>         # bar_com1
#>     # bar_com2
#>         bar({
#>             x({
#>                 y
#>             })
#>         }) %in% z
#>     }
#>     # if
#>     if (FALSE) {
#>         this
#>         # baz_com
#>         baz
#>         that
#>     }
#>     repeat {
#>         while (condition) {
#>             # qux_com
#>             qux
#>         }
#>     }
#> }

functions

regularize_comments <- function(fun) {
  env <- environment(fun)
  fun <- deparse(fun)
  #fun <- gsub("(\\s*`#`\\(\")(.*?)\\\"\\)$","\\2", fun)
  fun <- gsub("(\\s*)`#`\\(\"(\\s*)(.*?)\\\"\\)$","\\1\\3", fun)
  fun <- gsub("\\\\n","\n",fun)
  eval(parse(text=paste(fun, collapse = "\n"))[[1]],envir = env)
}
unnest_comments <- function(call) {
  if(!is.call(call)) {
    return(call)
  }

  call0 <- lapply(call, function(x) {
    call_str <- paste(deparse(x), collapse ="\n")
    if(startsWith(call_str, "`#`(")){
      #is.call(x) && x[[1]] == quote(`#`) && length(x) == 3){
      # browser()
      x <- list(extract_comment(x),
                clean_call(x))
    }
    x
  })
  call <- as.call(unlist(call0))
  call[] <- lapply(call, unnest_comments)
  call
}
# helper for unnest_comments
extract_comment <- function(call){
  if(!is.call(call)) {
    return(NULL)
  }
  if(identical(call[[1]], quote(`#`))){
    return(call[1:2])
  }
  unlist(lapply(call, extract_comment))[[1]]
}
# helper for unnest_comments
clean_call <- function(call){
  if(!is.call(call)) {
    return(call)
  }
  if(identical(call[[1]], quote(`#`))){
    return(call[[3]])
  }
  call[] <- lapply(call, clean_call)
  call
}
is_syntactic <- function(x){
  tryCatch({str2lang(x); TRUE},
           error = function(e) FALSE)
}
nest_comments <- function(fun){
  src <- deparse(fun, control = "useSource")
  # positions of comments
  commented_lgl <- grepl("^\\s*#",src)
  # positions of 1st comments of comment blocks
  first_comments_lgl <- diff(c(FALSE, commented_lgl)) == 1
  # ids of comment blocks along the lines
  comment_ids <- cumsum(first_comments_lgl) * commented_lgl
  # positions of 1st lines after comment blocks
  first_lines_lgl <- diff(!c(FALSE, commented_lgl)) == 1
  first_lines_ids <- cumsum(first_lines_lgl) * first_lines_lgl

  # we iterate through these ids, taking max from lines so if code ends with a
  # comment it will be ignored
  for(i in seq(max(first_lines_ids))){
    comments <- src[comment_ids == i]
    line_num <- which(first_lines_ids == i)
    line <- src[line_num]
    # we move forward character by character until we get a syntactic replacement
    # the code replacement starts with "`#`(" and we try all positions of 2nd
    # parenthese until something works, then deal with next code block

    j <- 0
    repeat {
      break_ <- FALSE
      j <- j+1
      line <- src[line_num]
      if(j == 1) code <- paste0("`#`('", paste(comments,collapse="\n"),"', ") else code[j] <- ""
      for(n_chr in seq(nchar(src[line_num]))){
        code[j] <- paste0(code[j], substr(line, n_chr, n_chr))
        if (n_chr < nchar(line))
          code_last_line <- paste0(code[j],")", substr(line, n_chr+1, nchar(line)))
        else
          code_last_line <- paste0(code[j],")")
        #print(code_last_line)
        src_copy <- src
        src_copy[(line_num-j+1):line_num] <- c(head(code,-1), code_last_line)
        if (is_syntactic(paste(src_copy,collapse="\n"))){
          src <- src_copy
          break_ <- TRUE
          break}
      }
      if(break_ || j == 7) break
      line_num <- line_num + 1
    }
  }
  eval(str2lang(paste(src, collapse = "\n")),envir = environment(fun))
}
repair_call <- function(call){
  if(!is.call(call)) {
    return(call)
  }
  # if
  if(call[[1]] == quote(`if`)) {
    if(!is.call(call[[3]]) || call[[3]][[1]] != quote(`{`))
      call[[3]] <- as.call(list(quote(`{`), call[[3]]))
    if(length(call) == 4 && (!is.call(call[[4]]) || call[[4]][[1]] != quote(`{`)))
      call[[4]] <- as.call(list(quote(`{`), call[[4]]))
    call[-1] <- lapply(as.list(call[-1]), repair_call)
    return(call)}
  # for
  if(call[[1]] == quote(`for`)) {
    if(!is.call(call[[4]]) || call[[4]][[1]] != quote(`{`))
      call[[4]] <- as.call(list(quote(`{`), call[[4]]))
    call[-1] <- lapply(as.list(call[-1]), repair_call)
    return(call)}
  # repeat
  if(call[[1]] == quote(`repeat`)) {
    if(!is.call(call[[2]]) || call[[2]][[1]] != quote(`{`))
      call[[2]] <- as.call(list(quote(`{`), call[[2]]))
    call[-1] <- lapply(as.list(call[-1]), repair_call)
    return(call)}
  # while
  if(call[[1]] == quote(`while`)) {
    if(!is.call(call[[3]]) || call[[3]][[1]] != quote(`{`)){
      call[[3]] <- as.call(list(quote(`{`), call[[3]]))
    }
    call[-1] <- lapply(as.list(call[-1]), repair_call)
    return(call)}
  call[] <- lapply(call, repair_call)
  call
}

moodymudskipper
  • 46,417
  • 11
  • 121
  • 167