Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support text 2 #29

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions cabal.project
Original file line number Diff line number Diff line change
@@ -1 +1,27 @@
packages: ./country, ./code-generation

allow-newer:
zigzag:base,
haskell-src-meta:template-haskell,
siphon:text,
disjoint-containers:aeson,
colonnade:bytestring,
colonnade:text,

-- https://github.com/byteverse/bytebuild/pull/29
source-repository-package
type: git
location: https://github.com/parsonsmatt/bytebuild
tag: e8dee26a4cc379a1971b428187fad51824de8e38

-- https://github.com/erikd/wide-word/pull/69
source-repository-package
type: git
location: https://github.com/parsonsmatt/wide-word
tag: 8d8447cda8b6410c262408f605d0679623367071

-- https://github.com/haskell-primitive/primitive-unaligned/pull/10
source-repository-package
type: git
location: https://github.com/parsonsmatt/primitive-unaligned
tag: 25a9ec03577e979671496ba9c8af58fbe952f3fc
2 changes: 1 addition & 1 deletion country/country.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ library
, primitive >= 0.6.4 && <0.8
, primitive-unlifted >= 0.1.3 && <1.0
, scientific >=0.3 && <0.4
, text >= 1.2 && <1.3
, text >= 1.2 && <2.2
, text-short >=0.1.3
, unordered-containers >=0.2 && <0.3
default-language: Haskell2010
Expand Down
6 changes: 3 additions & 3 deletions country/src/Continent.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import Continent.Unsafe
import Control.Monad (forM_)
import Control.Monad.ST (runST)
import Country.Unexposed.Continents (continentAList)
import Country.Unexposed.Util (mapTextArray,charToWord16,word16ToInt,timesTwo)
import Country.Unexposed.Util (mapTextArray,word16ToInt,timesTwo,charToTextWord)
import Country.Unsafe (Country(Country))
import Data.Char (toLower)
import Data.Text (Text)
Expand All @@ -51,8 +51,8 @@ allAlphaUpper = TA.run $ do
m <- TA.new (timesTwo numberOfContinents)
forM_ continentNameDb $ \(n,_,(a1,a2)) -> do
let ix = timesTwo (fromIntegral n)
TA.unsafeWrite m ix (charToWord16 a1)
TA.unsafeWrite m (ix + 1) (charToWord16 a2)
TA.unsafeWrite m ix (charToTextWord a1)
TA.unsafeWrite m (ix + 1) (charToTextWord a2)
return m
{-# NOINLINE allAlphaUpper #-}

Expand Down
18 changes: 12 additions & 6 deletions country/src/Country.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE MagicHash #-}
Expand Down Expand Up @@ -40,7 +41,7 @@ import Country.Unexposed.Names (hashMapUtf16,hashMapUtf8)
import Country.Unexposed.Names (numberOfPossibleCodes,alphaTwoHashMap,alphaThreeHashMap,decodeMap,decodeMapUtf8,decodeNumeric,encodeEnglish,encodeEnglishShort)
import Country.Unexposed.Trie (Trie,trieFromList,trieParser)
import Country.Unexposed.TrieByte (TrieByte,trieByteFromList,trieByteParser)
import Country.Unexposed.Util (mapTextArray,charToWord16,word16ToInt,timesTwo,timesThree)
import Country.Unexposed.Util (mapTextArray,charToTextWord,word16ToInt,timesTwo,timesThree)
import Country.Unsafe (Country(..))
import Data.Bytes.Types (Bytes(Bytes))
import Data.ByteString (ByteString)
Expand Down Expand Up @@ -114,10 +115,15 @@ decodeAlphaThree = flip HM.lookup alphaThreeHashMap
-- countries. It strives to handle any source language. Open an
-- issue on the issue tracker if there are names that are missing.
decode :: Text -> Maybe Country
#if MIN_VERSION_base(4,17,0)
decode txt =
decodeUtf8 (TE.encodeUtf8 txt)
#else
decode (TI.Text (TA.Array arr) off16 len16) =
case (BytesHashMap.lookup (Bytes (ByteArray arr) (off16 * 2) (len16 * 2)) hashMapUtf16) of
Nothing -> Nothing
Just w -> Just (Country (fromIntegral w))
#endif

-- | Decode a 'Country' from a UTF-8-encoded 'ByteString'.
decodeUtf8 :: ByteString -> Maybe Country
Expand Down Expand Up @@ -164,8 +170,8 @@ allAlphaTwoUpper = TA.run $ do
m <- TA.new (timesTwo numberOfCountries)
forM_ countryNameQuads $ \(n,_,(a1,a2),_) -> do
let ix = timesTwo (indexOfCountry (Country n))
TA.unsafeWrite m ix (charToWord16 a1)
TA.unsafeWrite m (ix + 1) (charToWord16 a2)
TA.unsafeWrite m ix (charToTextWord a1)
TA.unsafeWrite m (ix + 1) (charToTextWord a2)
return m
{-# NOINLINE allAlphaTwoUpper #-}

Expand All @@ -174,9 +180,9 @@ allAlphaThreeUpper = TA.run $ do
m <- TA.new (timesThree numberOfCountries)
forM_ countryNameQuads $ \(n,_,_,(a1,a2,a3)) -> do
let ix = timesThree (indexOfCountry (Country n))
TA.unsafeWrite m ix (charToWord16 a1)
TA.unsafeWrite m (ix + 1) (charToWord16 a2)
TA.unsafeWrite m (ix + 2) (charToWord16 a3)
TA.unsafeWrite m ix (charToTextWord a1)
TA.unsafeWrite m (ix + 1) (charToTextWord a2)
TA.unsafeWrite m (ix + 2) (charToTextWord a3)
return m
{-# NOINLINE allAlphaThreeUpper #-}

Expand Down
5 changes: 5 additions & 0 deletions country/src/Country/Unexposed/Names.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# language CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
Expand Down Expand Up @@ -140,7 +141,11 @@ hashMapUtf8 = BytesHashMap.fromTrustedList
hashMapUtf16 :: BytesHashMap.Map
hashMapUtf16 = BytesHashMap.fromTrustedList
( map
#if MIN_VERSION_base(4,17,0)
(\(a,Text.Text (Text.ByteArray arr) off16 len16) ->
#else
(\(a,Text.Text (Text.Array arr) off16 len16) ->
#endif
(Bytes (ByteArray arr) (off16 * 2) (len16 * 2),fromIntegral a)
) countryPairs
)
Expand Down
64 changes: 61 additions & 3 deletions country/src/Country/Unexposed/Util.hs
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
{-# language RankNTypes #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}

module Country.Unexposed.Util
( mapTextArray
, textWordToChar
, charToTextWord
, charToWord16
, word16ToChar
, charToWord8
, word8ToChar
, word16ToInt
, timesTwo
, timesThree
Expand All @@ -13,27 +19,79 @@ module Country.Unexposed.Util

import Data.Bits (unsafeShiftL,unsafeShiftR)
import Data.Char (chr,ord)
import Data.Word (Word16)
import Data.Word (Word16, Word8)
import GHC.Exts (sizeofByteArray#)
import GHC.Int (Int(I#))
import Control.Monad.ST
import Data.Text.Unsafe
import Data.Text.Internal
import qualified Data.Text.Internal.Unsafe.Char as Unsafe.Char

import qualified Data.Text.Array as TA


mapTextArray :: (Char -> Char) -> TA.Array -> TA.Array
#if MIN_VERSION_base(4,17,0)
mapTextArray f src@(TA.ByteArray arr) = runST $ do
-- this implementation is lifted from the text internals
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

marr <- TA.new (l + 4)
outer marr (l + 4) o 0
where
o = 0
l = I# (sizeofByteArray# arr)
outer :: forall s. TA.MArray s -> Int -> Int -> Int -> ST s TA.Array
outer !dst !dstLen = inner
where
inner !srcOff !dstOff
| srcOff >= l + o = do
TA.shrinkM dst dstOff
arr <- TA.unsafeFreeze dst
return arr
| dstOff + 4 > dstLen = do
let !dstLen' = dstLen + (l + o) - srcOff + 4
dst' <- TA.resizeM dst dstLen'
outer dst' dstLen' srcOff dstOff
| otherwise = do
let !(Iter c d) = iterArray src srcOff
d' <- Unsafe.Char.unsafeWrite dst dstOff (safe (f c))
inner (srcOff + d) (dstOff + d')
#else
mapTextArray f a@(TA.Array inner) = TA.run $ do
let len = half (I# (sizeofByteArray# inner))
m <- TA.new len
TA.copyI m 0 a 0 len
TA.copyI len m 0 a 0
let go !ix = if ix < len
then do
TA.unsafeWrite m ix (charToWord16 (f (word16ToChar (TA.unsafeIndex a ix))))
TA.unsafeWrite m ix (charToTextWord (f (textWordToChar (TA.unsafeIndex a ix))))
go (ix + 1)
else return ()
go 0
return m
#endif
{-# INLINE mapTextArray #-}

#if MIN_VERSION_text(2,0,0)
textWordToChar :: Word8 -> Char
textWordToChar = word8ToChar

charToTextWord :: Char -> Word8
charToTextWord = charToWord8
#else
textWordToChar :: Word16 -> Char
textWordToChar = word16ToChar

charToTextWord :: Char -> Word16
charToTextWord = charToWord16
#endif

word8ToChar :: Word8 -> Char
word8ToChar = chr . fromIntegral
{-# INLINE word8ToChar #-}

charToWord8 :: Char -> Word8
charToWord8 = fromIntegral . ord
{-# INLINE charToWord8 #-}

word16ToChar :: Word16 -> Char
word16ToChar = chr . fromIntegral
{-# INLINE word16ToChar #-}
Expand Down