Posted Saturday, January 14, 2012 in Old JamesCMS Posts
Here is a powershell function to give you the longest common phrase (set of words) from two sets of text. It's adapted from the longest common substring problem.
{{Powershell}}
function Git-LongestCommonPhrase([string]$text1, [string]$text2, $all)
{
$text1 = $text1.tolower()
$text2 = $text2.tolower()
$S = $text1.split(“,.’(){}[]~!@#$%^&*_+-= “)
$T = $text2.split(“,.’(){}[]~!@#$%^&*_+-= “)
$L = New-Object ‘int[,]’ ($S.count + 1), ($T.count + 1)
$results = New-Object ‘string[]’ 100
$z = 0
$x = 0
for ($i = 1; $i -le $S.count; $i++)
{
for ($j = 1; $j -le $T.count; $j++)
{
if ($S[$i] -eq $T[$j])
{
if ($i -eq 1 -or $j -eq 1)
{
$L[$i, $j] = 1
}
else
{
$L[$i, $j] = $L[($i - 1),($j - 1)] + 1
}
if ($L[$i, $j] -gt $z)
{
$z = $L[$i,$j]
$results[$x] = $null
$max = “”
}
if ($L[$i, $j] -gt $z)
{
for ($k = 1; $k -le $z; $k++)
{
$results[$x] += $S[$i - $z + $k] + “ “
}
$max = $results[$x]
$x ++
}
}
}
}
if ($all -eq “all”)
{
$results
}
else
{
$max
}
}