Skip to content

Commit

Permalink
refactor(lib): Extract normalizeWord from genTrigraph
Browse files Browse the repository at this point in the history
  • Loading branch information
sgillespie committed Mar 16, 2024
1 parent c741f55 commit 1f78f8b
Showing 1 changed file with 15 additions and 5 deletions.
20 changes: 15 additions & 5 deletions src/Data/Gibberish/Trigraph.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,11 @@ newtype TrigraphConfig = TrigraphConfig

-- | Generate trigraphs from a list of words
genTrigraph :: [Text] -> Trigraph
genTrigraph = Trigraph . foldr (foldWord . transform) Map.empty
genTrigraph = Trigraph . foldr (foldWord . normalizeWord) Map.empty
where
foldWord = Map.unionWith combine . mkTrigraph
combine (Frequencies f1) (Frequencies f2) = Frequencies $ Map.unionWith (+) f1 f2

transform word' =
Text.map (toQwertyKey . toLower) $
Text.filter (not . isPunctuation) word'

-- | Generate a trigraph from a single word
mkTrigraph :: Text -> Map Digram Frequencies
mkTrigraph word = foldr insert' Map.empty $ scanTrigrams word
Expand All @@ -54,6 +50,20 @@ mkTrigraph word = foldr insert' Map.empty $ scanTrigrams word
Frequencies (Map.unionWith (+) m1 m2)
mkFrequencies c = Frequencies $ Map.singleton (Unigram c) 1

-- | Normalize a word before calculating the trigraph:
--
-- 1. Remove punctuation (quotes, dashes, and so on)
-- 2. Lower case all letters
-- 3. Translate non-qwerty chars to qwerty keys (eg, à -> a)
normalizeWord :: Text -> Text
normalizeWord = Text.map transformChar . Text.filter filterChar
where
transformChar :: Char -> Char
transformChar = toQwertyKey . toLower

filterChar :: Char -> Bool
filterChar = not . isPunctuation

scanTrigrams :: Text -> [Trigram]
scanTrigrams word = case Text.take 3 word of
[a, b, c] -> Trigram a b c : scanTrigrams (Text.tail word)
Expand Down

0 comments on commit 1f78f8b

Please sign in to comment.