From 1f78f8b3adb9505b54fc986b756a13c486307a38 Mon Sep 17 00:00:00 2001 From: Sean D Gillespie Date: Sat, 16 Mar 2024 13:58:35 -0400 Subject: [PATCH] refactor(lib): Extract normalizeWord from genTrigraph --- src/Data/Gibberish/Trigraph.hs | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/Data/Gibberish/Trigraph.hs b/src/Data/Gibberish/Trigraph.hs index 38a4f08..3589e55 100644 --- a/src/Data/Gibberish/Trigraph.hs +++ b/src/Data/Gibberish/Trigraph.hs @@ -35,15 +35,11 @@ newtype TrigraphConfig = TrigraphConfig -- | Generate trigraphs from a list of words genTrigraph :: [Text] -> Trigraph -genTrigraph = Trigraph . foldr (foldWord . transform) Map.empty +genTrigraph = Trigraph . foldr (foldWord . normalizeWord) Map.empty where foldWord = Map.unionWith combine . mkTrigraph combine (Frequencies f1) (Frequencies f2) = Frequencies $ Map.unionWith (+) f1 f2 - transform word' = - Text.map (toQwertyKey . toLower) $ - Text.filter (not . isPunctuation) word' - -- | Generate a trigraph from a single word mkTrigraph :: Text -> Map Digram Frequencies mkTrigraph word = foldr insert' Map.empty $ scanTrigrams word @@ -54,6 +50,20 @@ mkTrigraph word = foldr insert' Map.empty $ scanTrigrams word Frequencies (Map.unionWith (+) m1 m2) mkFrequencies c = Frequencies $ Map.singleton (Unigram c) 1 +-- | Normalize a word before calculating the trigraph: +-- +-- 1. Remove punctuation (quotes, dashes, and so on) +-- 2. Lower case all letters +-- 3. Translate non-qwerty chars to qwerty keys (eg, à -> a) +normalizeWord :: Text -> Text +normalizeWord = Text.map transformChar . Text.filter filterChar + where + transformChar :: Char -> Char + transformChar = toQwertyKey . toLower + + filterChar :: Char -> Bool + filterChar = not . isPunctuation + scanTrigrams :: Text -> [Trigram] scanTrigrams word = case Text.take 3 word of [a, b, c] -> Trigram a b c : scanTrigrams (Text.tail word)