0

Im working on building a phylogeny from scratch with downloaded FASTA sequences from GeneBank. I think Im doing alright up until the multi sequence alignment in the msa package where I get an error Error in convertAlnRows(result$msa, type) : There is an invalid aln file! As always thanks in advance for taking a look: Here is the reproducible example:

library (ape)
library (rentrez)
library (msa)

#Download Species Data 
# B_terrestris
B_terrestris <- entrez_fetch(db = "nucleotide", 
                                  id = "NC_045179.1", 
                                  rettype = "fasta")

# B_hypocrita
B_hypocrita <- entrez_fetch(db = "nucleotide", 
                             id = "NC_011923.1", 
                             rettype = "fasta")

#Vespa
Vespa <- entrez_fetch(db = "nucleotide", 
                      id = " MT137096.1", 
                      rettype = "fasta")

seq<- c(B_terrestris, B_hypocrita, Vespa) # gotta clean it 

#FASTA Cleaner Function 
fasta_cleaner <- function(fasta_object, parse = TRUE)
{
  fasta_object <- sub("^(>)(.*?)(\\n)(.*)(\\n\\n)","\\4", fasta_object)
  fasta_object <- gsub("\n", "", fasta_object)
  
  if(parse == TRUE){
    fasta_object <- stringr::str_split(fasta_object,
                                       pattern = "",
                                       simplify = FALSE)
  }
  
  return(fasta_object[[1]])
}


#This should be ready to go! 
clean.seq<-fasta_cleaner (seq, parse = FALSE)

str (clean.seq)

msa (clean.seq,  type="dna", method="Muscle", cluster="neighborjoining")
I Del Toro
  • 913
  • 4
  • 15
  • 36

0 Answers0