-2

I am trying to convert a multi fasta parser from Python to C#. For the input

>header1
ACTG
GCTA

>header2
GATTACA

it would return the dictionary {'header2': 'GATTACA', 'header1': 'ACTGGCTA'}

The original Python code looks like:

def fastaParser(handle):
    """  Adapted from https://github.com/biopython/biopython/blob/master/Bio/SeqIO/FastaIO.py#L39 """
    fastaDict = {}
    #Skip any text before the first record (e.g. blank lines, comments)
    while True:
        line = handle.readline()
        if line == "":
            return  # Premature end of file, or just empty?
        if line[0] == ">":
            break

    while True:
        if line[0] != ">":
            raise ValueError("Records in Fasta files should start with '>' character")
        title = line[1:].rstrip()
        lines = []
        line = handle.readline()
        while True:
            if not line:
                break
            if line[0] == ">":
                break
            lines.append(line.rstrip())
            line = handle.readline()

        #Remove trailing whitespace, and any internal spaces
        sequence = "".join(lines).replace(" ", "").replace("\r", "")
        fastaDict[title] = sequence

        if not line:
            return fastaDict

if __name__ == '__main__':
    with open('fasta.txt') as f:
        print fastaParser(f)

What I have as C# code is (my code expects a string instead of an open filehandle):

    public Dictionary<int, string> parseFasta(string multiFasta)
    {
        Dictionary<int, string> fastaDict = new Dictionary<int, string>();
        using (System.IO.StringReader multiFastaReader = new System.IO.StringReader(multiFasta))
        {
            // Skip any text before the first record (e.g. blank lines, comments)
            while (true)
            {
                string line = multiFastaReader.ReadLine();
                if (line == "")
                {
                    return fastaDict; // Premature end of file, or just empty?
                }
                if (line[0] == '>')
                {
                    break;
                }
            }

            while (true)
            {
                if (line[0] != '>') // <- Here I get the error: "the name 'line' does not exist in the current context
                {
                    throw new Exception("Records in Fasta files should start with '>' character");
                }

                string title= line[1:].TrimEnd();
                List<string> lines = new List<string>();

                line = multiFastaReader.ReadLine();

                while (true)
                {
                    if (!line)
                    {
                        break;
                    }
                    if (line[0] == '>')
                    {
                        break;
                    }
                    lines.Add(line.TrimEnd());
                    line = multiFastaReader.ReadLine();
                }

                // Remove trailing whitespace, and any internal spaces
                string sequence = String.Join("", lines).Replace(" ", "").Replace("\r", "");
                fastaDict.Add(title, sequence);

                if (!line)
                {
                    return fastaDict;
                }
            }
        }
     }

The error that 'm getting is that Visual Studio says that the variables called line after the second while (true) don't exist in the current context.

BioGeek
  • 21,897
  • 23
  • 83
  • 145

1 Answers1

-2

I finally got it to work with this code:

    public Dictionary<string, string> parseFasta(string multiFasta)
    {
        Dictionary<string, string> fastaDict = new Dictionary<string, string>();
        using (System.IO.StringReader multiFastaReader = new System.IO.StringReader(multiFasta))
        {
            // Skip any text before the first record (e.g. blank lines, comments)
            string line = multiFastaReader.ReadLine();
            while (true)
            {
                if (line == "")
                {
                    return fastaDict; // Premature end of file, or just empty?
                }
                if (line[0] == '>')
                {
                    break;
                }
            }

            while (true)
            {
                if (line[0] != '>')
                {
                    throw new Exception("Records in Fasta files should start with '>' character");
                }

                string title= line.Substring(1, line.Length-1).TrimEnd();
                List<string> lines = new List<string>();

                line = multiFastaReader.ReadLine();

                while (true)
                {
                    if (line == "")
                    {
                        break;
                    }
                    if (line == null)
                    {
                        break;
                    }
                    if (line[0] == '>')
                    {
                        break;
                    }
                    lines.Add(line.TrimEnd());
                    line = multiFastaReader.ReadLine();
                }

                // Remove trailing whitespace, and any internal spaces
                string sequence = String.Join("", lines).Replace(" ", "").Replace("\r", "");
                fastaDict.Add(title, sequence);

                if (line == null)
                {
                    return fastaDict;
                }
            }
        }
     }
BioGeek
  • 21,897
  • 23
  • 83
  • 145