String Matching – Haskell
– String Matching
– Directed Studies – Assignment 3
– By: Nicholas Webb.
{- Longest Common Subsequence algorithm
– s1 subsequence s2 characters in s1 appear in s2 in same order but not necessarily contiguous
e.g., “ppl” subsequence “popular”
– s1 longestCommentSubsequence s2 length s where s subsequence s1 and s subsequence s2 and
not exists s’ where s’ subsequence s1 and s’ subsequence s2 and length s’ > length s
-}
– Task 1: Example LCS Implementations.
–
– Definitional implementation. Very slow due to redundant recursive computations.
lcsLength _ [] = 0
lcsLength [] _ = 0
lcsLength (x:xs) (y:ys)
| x == y = 1 + lcsLength xs ys
| otherwise = max (lcsLength xs (y:ys)) (lcsLength (x:xs) ys)
lcs _ [] = []
lcs [] _ = []
lcs (x:xs) (y:ys)
| x == y = x : lcs xs ys
| otherwise = longest (lcs xs (y:ys)) (lcs (x:xs) ys)
where
longest s1 s2 = if length s1 > length s2 then s1 else s2
– Efficient implementation. Uses a lazily generated table using dynamic programming.
lcsLength’ xs ys = lcsLen (length xs) (length ys)
where
lcsLen i j = lcsTable !! i !! j
lcsTable = [ [lcsEntry i j | j <- [0..]] | i <- [0..] ]
lcsEntry _ 0 = 0
lcsEntry 0 _ = 0
lcsEntry i j
| x == y = 1 + lcsLen (i-1) (j-1)
| otherwise = max (lcsLen i (j-1)) (lcsLen (i-1) j)
where
x = xs!!(i-1)
y = ys!!(j-1)
lcs’ xs ys = lcsLookup (length xs) (length ys)
where
lcsLookup i j = lcsTable !! i !! j
lcsTable = [ [lcsEntry i j | j <- [0..]] | i length s2 then s1 else s2
– Task 2: Cost Calculation.
–
– Takes the costs associated with a match, a mismatch, and a space, and two characters
– and returns the appropriate costing for those characters.
costChar :: Int -> Int -> Int -> Char -> Char -> Int
costChar m mm sp c1 c2
| c1 == c2 = m
| c1 == ‘-’ || c2 == ‘-’ = sp
| otherwise = mm
– Takes the costs associated with a match, a mismatch, and a space, and two strings
– (known to be of the same length) and returns the cost of that alignment of the strings.
costString :: Int -> Int -> Int -> String -> String -> Int
costString _ _ _ _ [] = 0
costString m mm sp (x:xs) (y:ys) = costChar m mm sp x y + costString m mm sp xs ys
– Task 3: Function minimaBy
–
– Takes a function from arbitrary values to ordinal values and a list of said values and
– returns the list of all the elements of the original list that are smallest.
minimaBy f l = filter (\x -> (f x) == (foldr1 min (map f l))) l
– Task 4: attach tails.
–
– attachHeads: Attaches new heads (x and y) to each of the lists of pair of lists.
attachHeads :: a -> a -> [([a],[a])] -> [([a],[a])]
attachHeads x y lis = [(x:xs, y:ys) | (xs, ys) a -> [([a],[a])] -> [([a],[a])]
attachTails x y lis = [(xs++[x], ys++[y]) | (xs, ys) Int -> Int -> String -> String -> Int
similarity’ m mm sp xs ys = simLookup (length xs) (length ys)
where
simLookup i j = simTable !! i !! j
simTable = [ [ simEntry i j | j <- [0..] ] | i Int -> Int -> String -> String -> [(String,String)]
optAlign m mm sp xs ys = minimaBy (\a -> costString m mm sp (fst a) (snd a)) (align xs ys)
– Task 7: Optimal alignment (slow) revisited
–
– The behaviours of align and optAlign combined into a single function optAlignment.
optAlignment :: Int -> Int -> Int -> String -> String -> [(String,String)]
optAlignment m mm sp xs ys = minimaBy (\a -> costString m mm sp (fst a) (snd a)) (align xs ys)
where
align [ ] [ ] = [("","")]
align (x:xs) [ ] = attachHeads x ‘-’ (align xs [ ])
align [ ] (y:ys) = attachHeads ‘-’ y (align [ ] ys)
align (x:xs) (y:ys) = attachHeads x y (align xs ys) ++
attachHeads x ‘-’ (align xs (y:ys)) ++
attachHeads ‘-’ y (align (x:xs) ys)
– Task 8: Optimal alignment (faster)
–
– optimalAlignment’ a faster version of the definitional solution to alignments. The
– function uses dynamic programming pattern to build up the alignment strings.
optimalAlignment’ :: Int -> Int -> Int -> String -> String -> [(String,String)]
optimalAlignment’ m mm sp xs ys = opt (length xs) (length ys)
where
opt i j = optTable !! i !! j
optTable = [[ optEntry i j | j <- [0..]] | i 0 && j == 0 = attachTails x ‘-’ (opt (i-1) 0)
| j > 0 && i == 0 = attachTails ‘-’ y (opt 0 (j-1))
| otherwise = minimaBy (\a -> costString m mm sp (fst a) (snd a))(
attachTails x y (opt (i-1) (j-1)) ++
attachTails x ‘-’ (opt (i-1) j) ++
attachTails ‘-’ y (opt i (j-1))
)
where
x = xs !! (i-1)
y = ys !! (j-1)
– Task 9: Optimal alignment (fast)
–
– The construction of optimal alignments combined with their cost calculations in a
– single dynamic programming function optimalAlignment” that determines optimal alignments.
optimalAlignment” :: Int -> Int -> Int -> String -> String -> (Int,[(String,String)])
optimalAlignment” m mm sp xs ys = opt (length xs) (length ys)
where
opt i j = optTable !! i !! j
optTable = [[ optEntry i j | j <- [0..]] | i 0 && j == 0 = (fst(opt (i-1) 0) + costChar m mm sp x ‘-’, attachTails x ‘-’ (snd(opt (i-1) 0)))
| j > 0 && i == 0 = (fst(opt 0 (j-1)) + costChar m mm sp ‘-’ y, attachTails ‘-’ y (snd(opt 0 (j-1))))
| otherwise = (\(h:t) -> (fst(h), foldr (\(a,b) c -> b ++ c) [] (h:t)))(minimaBy fst
[(fst(opt (i-1) (j-1)) + costChar m mm sp x y, attachTails x y (snd(opt (i-1) (j-1)))),
(fst(opt (i-1) j) + costChar m mm sp x '-', attachTails x '-' (snd(opt (i-1) j))),
(fst(opt i (j-1)) + costChar m mm sp '-' y, attachTails '-' y (snd(opt i (j-1))))])
where
x = xs !! (i-1)
y = ys !! (j-1)
No comments yet.
Leave a comment
-
Recent
-
Links
-
Archives
- May 2009 (3)
- December 2008 (1)
- November 2008 (13)
- September 2008 (6)
-
Categories
-
RSS
Entries RSS
Comments RSS