0

Hi I'm trying to implement a pairwise alignment algorithm for protein sequences. I have this the exact same algorithm working for DNA sequences but for some reason when I copy it over to and implement with protein it doesn't The problem with the traceback portion which in will post below.

    Prot1_Seq = "AAGG";
    Prot2_Seq = "AAGG";

    cout << "\nSequence 1: " << Prot1_Seq << endl;
    int Prot1_len = Prot1_Seq.length();

    cout << "\nSequence 2: " << Prot2_Seq << endl;
    int Prot2_len = Prot2_Seq.length();

     int matrix[Prot1_len+1][Prot2_len+1] {0};

     // applying the gap penalty to the outer border columns
        for (int i {1}; i <= Prot1_len; i++)
        {
                matrix[i][0] = gap_pen  * i;

        }
        for (int j {1}; j <= Prot2_len; j++)
        {
                matrix[0][j] = gap_pen * j;

        }

fstream myfile;
        myfile.open("100pam.txt", ios::in);

    if (!myfile.is_open())
        {
                cerr << "Error can not open file" << endl;
                exit(1);
        }


    const int len = 20;
        int pam_matrix[len][len];

        string line;

        getline(myfile,line); // get ing rid of the first line of pam matrix file, by using getline to grab it

        cout << "\n" << endl;

        for (int i = 0; i < len; i++)
        {
                char Row_first_letter;
                myfile >> Row_first_letter; // using the >> to grab the first char of each row leaving the pam matrix with just numbers

                for(int j = 0; j < len; j++)
                {
                        myfile >> pam_matrix[i][j];
                        cout << pam_matrix[i][j] << "\t";
                }
                cout << endl;
        }

    cout << endl;

    int match_score {};
    int mismatch {};

    char row_labels [20] {'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'};
    char col_labels [20] {'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'};

    for(int i {0}; i <= Prot1_len; i++)
        {
                for(int j {0}; j <= Prot2_len; j++)
                {
                        if (i > 0 && j > 0)
                        {
                                if (Prot1_Seq[i-1] == Prot2_Seq[j-1])
                                {

                    int row = distance(row_labels, find(row_labels, row_labels + 20, Prot1_Seq[i-1]));
                                        int col = distance(col_labels, find(col_labels, col_labels + 20, Prot2_Seq[j-1]));

                    match_score = pam_matrix[row][col];

                    matrix[i][j] = matrix[i-1][j-1] + match_score;
                                }
                else if (Prot1_Seq[i-1] != Prot2_Seq[j-1])
                {
                    //row_labels[i-1] = Prot1_Seq[i-1];
                    //col_labels[i-1] = Prot2_Seq[i-1];

                    int row = distance(row_labels, find(row_labels, row_labels + 20, Prot1_Seq[i-1]));
                    int col = distance(col_labels, find(col_labels, col_labels + 20, Prot2_Seq[j-1]));

                    mismatch = pam_matrix[row][col];

                    matrix[i][j] = max({matrix[i-1][j-1] + mismatch,
                                                matrix[i-1][j] + gap_pen ,
                                                matrix[i][j-1] + gap_pen});
                }
                        }
                        cout << matrix[i][j] << "\t";
                }
                cout << endl;
        }

 int terminal = matrix[Prot1_len+1-1][Prot2_len+1-1]; // rows and colums are length+1 long. and to get the edge value it's col/row length -1.
    cout << terminal << endl;

        int temp = terminal;
        int decrement_X = Prot1_len+1-1;
        int decrement_Y = Prot2_len+1-1;

        string Prot1_aligned = "";
        string Prot2_aligned = "";

        int inc_seq1 = Prot1_len-1;
        int inc_seq2 = Prot2_len-1;

        while (!(decrement_X == 0 || decrement_Y == 0))
        {//while
                if (terminal == matrix[decrement_X][decrement_Y-1] + gap_pen) {
                        temp = matrix[decrement_X][decrement_Y-1];
                        cout << temp << endl;
                        terminal = temp;
                        decrement_Y--;
                        Prot1_aligned += "_";
                        Prot2_aligned += Prot2_Seq[inc_seq2];
                        inc_seq2--;
                }

                else if (terminal == matrix[decrement_X-1][decrement_Y] + gap_pen) {
                        temp = matrix[decrement_X-1][decrement_Y];
                    cout << temp << endl;
                        terminal = temp;
                        decrement_X--;
                        Prot1_aligned += Prot1_Seq[inc_seq1];
                        Prot2_aligned += "_";
                        inc_seq1--;
                }

                else if (terminal == matrix[decrement_X-1][decrement_Y-1] + match_score) {
                        temp = matrix[decrement_X-1][decrement_Y-1];
                    cout << temp << endl;
                        terminal = temp;
                        decrement_X--;
                        decrement_Y--;
                        Prot1_aligned += Prot1_Seq[inc_seq1];
                        Prot2_aligned += Prot2_Seq[inc_seq2];
                        inc_seq1--;
                        inc_seq2--;
                }

                else if (terminal == matrix[decrement_X-1][decrement_Y-1] + mismatch) {
                        temp = matrix[decrement_X-1][decrement_Y-1];
                    cout << temp << endl;
                        terminal = temp;
                        decrement_X--;
                        decrement_Y--;
                        Prot1_aligned += Prot1_Seq[inc_seq1];
                        Prot2_aligned += Prot2_Seq[inc_seq2];
                        inc_seq1--;
                        inc_seq2--;
                }
        } //while

    reverse(Prot1_aligned.begin(), Prot1_aligned.end());
        reverse(Prot2_aligned.begin(), Prot2_aligned.end());

        cout << "Performing alignment" << " .................................................. 100%" << endl;
        cout << "Alignment complete! Your aligned sequences are:" << endl;

        cout << "Sequence 1: " << Prot1_aligned << endl;
        cout << "Sequence 2: " << Prot2_aligned << endl;
}

Let's say I have 2 protein sequences AAGG. This would generate a matrix:
0 -5 -10 -15 -20
-5 4 -1 -6 -11
-10 -1 8 3 -2
-15 -6 3 13 8
-20 -11 -2 8 18

using the traceback code I would expect the values: 18, 13, 8, 4, 0. But for some reason my program just stops at 8 and does not else. Not even seg faulting or quit the program. Any solutions?

TheLordRaj
  • 35
  • 7
  • Pleas extend the code to make this a minimum reproducible example. We lack ```matrix```, ```Prot1_Seq```, and ```Prot2_Seq```. – Homer512 Oct 04 '22 at 07:44
  • I made some changes/edits. Hope it helps – TheLordRaj Oct 04 '22 at 15:18
  • It doesn't help that much. Make sure the problem is actually reproducible by us (ie, code we can compile and if it needs an input file, the contents of that file as well). See also [How to create a Minimal, Reproducible Example](https://stackoverflow.com/help/minimal-reproducible-example). – G. Sliepen Oct 04 '22 at 15:25
  • If anything, your edit made the code worse by adding unrelated code and file IO. Just add compile time constants for all variables – Homer512 Oct 04 '22 at 15:35
  • My guess is you're running off the end of `Prot1_aligned` or `Prot2_aligned,` but we all need a much simpler example to help you. – JimmyNJ Oct 04 '22 at 21:03

0 Answers0