DNA Sequence Alignment using Needleman–Wunsch Algorithm

Posted Saturday, October 2, 2010

Here's an implementation of the Needleman-Wunsch algorithm to align two DNA sequences.

{{C#}}
/*Author: James Santiago
*Date: 10 October 2010
*/

using System;
using System.Collections.Generic;
using System.Text;

namespace DnaSequenceAlignment
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Enter the first sequence to evaluate:");
string sequence1 = Console.ReadLine();

Console.WriteLine("Enter the second sequence to evaluate:");
string sequence2 = Console.ReadLine();

Console.WriteLine(AlignSequences(sequence1, sequence2, -5));
}

/// <summary>
/// Discovers the optimal alignment of two DNA sequences according to the
/// Needleman–Wunsch algorithm.
/// </summary>
/// <param name="sequence1">The first DNA sequence to align.</param>
/// <param name="sequence2">The second DNA sequence to align.</param>
/// <param name="gapPenalty">The gap penalty.</param>
/// <returns>Returns a string denoting the optimal alignment of the two
/// sequences provided.</returns>
private static string AlignSequences(string sequence1, string sequence2,
int gapPenalty)
{
//This declares the similarity matrix and the appropriate scores.
int[,] simMatrix = new int[,] { { 10, -1, -3, -4 },
{ -1, 7, -5, -3 },
{ -3, -5, 9, 0 },
{ -4, -3, 0, 8 } };

//Sequences are stored in char arrays.
char[] sequenceArray1 = sequence1.ToCharArray();
char[] sequenceArray2 = sequence2.ToCharArray();

int length1 = sequence1.Length;
int length2 = sequence2.Length;

//The score matrix is used to score each combination of characters.
int [,] scoreMatrix = new int[length1, length2];

string matrixp = "\nScore Matrix:\n\n";

//Calculates the scores for first column in the score matrix.
for (int i = 0; i < length1; i++)
{
scoreMatrix[i, 0] = gapPenalty * i;
}

//Calulates the scores for the first row in the score matrix.
for (int i = 0; i < length2; i++)
{
scoreMatrix[0, i] = gapPenalty * i;
}

//Calculates the scores for the inner portion of the score matrix.
int match, delete, insert;
for (int i = 1; i < length1; i++)
{
for (int j = 1; j < length2; j++)
{
int seq1 = seqVal(sequence1[i]);
int seq2 = seqVal(sequence2[j]);

match = scoreMatrix[i - 1, j - 1] + simMatrix[seq1, seq2];
delete = scoreMatrix[i - 1, j] + gapPenalty;
insert = scoreMatrix[i, j - 1] + gapPenalty;

int max = Math.Max(match, delete);
scoreMatrix[i, j] = Math.Max(insert, max);
matrixp += "\t" + scoreMatrix[i, j] + " ";
}
matrixp += "\n";
}

//This prints out the inner portion of the score matrix (no first
//row/first column).
Console.WriteLine(matrixp);

string align1 = "";
string align2 = "";
int k = sequence1.Length - 1;
int l = sequence2.Length - 1;

//Creates the alignment according to the scores in the score matrix.
//The while loop works from the last cell of the matrix (last value
//in the array) then moves according to scores in the top, left and
//diagonal cells.  A score that denotes a match would move the loop
//diagonally, a score of delete would move the loop left and a score
//of insert would move the loop up.
while (k >= 0 && l >= 0)
{
int score, scoreDiag, scoreUp, scoreLeft;
score = scoreMatrix[k, l];
if (k != 0 && l != 0) scoreDiag = scoreMatrix[k - 1, l - 1];
else scoreDiag = 100;
if (l != 0) scoreUp = scoreMatrix[k, l - 1];
else scoreUp = 100;
if (k != 0) scoreLeft = scoreMatrix[k - 1, l];
else scoreLeft = 100;

int seq1 = seqVal(sequence1[k]);
int seq2 = seqVal(sequence2[l]);

if (score == scoreDiag + simMatrix[seq1, seq2])
{
align1 = sequence1[k] + align1;
align2 = sequence2[l] + align2;
k--;
l--;
}
else if (score == scoreLeft + gapPenalty)
{
align1 = sequence1[k] + align1;
align2 = "-" + align2;
k--;
}
else if (score == scoreUp + gapPenalty)
{
align1 = "-" + align1;
align2 = sequence2[l] + align2;
l--;
}
else
{
align1 = sequence1[k] + align1;
align2 = sequence2[l] + align2;
k--;
l--;
}
}
while (k >= 0)
{
align1 = sequence1[k] + align1;
align2 = "-" + align2;
k--;
}
while (l >= 0)
{
align1 = "-" + align1;
align2 = sequence1[l] + align2;
l--;
}

return "Optimal Alignment:\n\n" + align1 + "\n" + align2;
}

/// <summary>
/// Finds the value of each nucleotide.
/// </summary>
/// <param name="evalChar">The character to be replaced.</param>
/// <returns>Returns the int value of the nucleotide.</returns>
private static int seqVal(char evalChar)
{
switch (evalChar)
{
case 'A':
return 0;
case 'C':
return 1;
case 'G':
return 2;
case 'T':
return 3;
default:
return -1;
}

}

}
}