Get Longtest Common Phrase in Powershell

Posted Saturday, January 14, 2012 in Old JamesCMS Posts

Here is a powershell function to give you the longest common phrase (set of words) from two sets of text. It's adapted from the longest common substring problem.

{{Powershell}}
function Git-LongestCommonPhrase([string]$text1, [string]$text2, $all)            
{            
 $text1 = $text1.tolower()            
 $text2 = $text2.tolower()            
            
 $S = $text1.split(“,.’(){}[]~!@#$%^&*_+-= “)            
 $T = $text2.split(“,.’(){}[]~!@#$%^&*_+-= “)            
            
 $L = New-Object ‘int[,]’ ($S.count + 1), ($T.count + 1)            
 $results = New-Object ‘string[]’ 100            
 $z = 0            
 $x = 0            
            
 for ($i = 1; $i -le $S.count; $i++)            
 {            
  for ($j = 1; $j -le $T.count; $j++)            
  {            
   if ($S[$i] -eq $T[$j])            
   {            
    if ($i -eq 1 -or $j -eq 1)            
    {            
     $L[$i, $j] = 1            
    }            
    else            
    {            
     $L[$i, $j] = $L[($i - 1),($j - 1)] + 1            
    }            
    if ($L[$i, $j] -gt $z)            
    {            
     $z = $L[$i,$j]            
     $results[$x] = $null            
     $max = “”            
    }            
    if ($L[$i, $j] -gt $z)            
    {            
     for ($k = 1; $k -le $z; $k++)            
     {            
      $results[$x] += $S[$i - $z + $k] + “ “            
     }            
     $max = $results[$x]            
     $x ++            
    }            
   }            
  }            
}            
if ($all -eq “all”)            
{            
 $results            
}            
else             
{            
 $max            
}            
}