From 717c2a6de66ec54795f8383e22bd83290bc9f8e7 Mon Sep 17 00:00:00 2001 From: amorphobia Date: Fri, 6 Sep 2024 15:20:32 +0800 Subject: [PATCH] feat: add escape filter support escape chars `\n`, `\t`, and `\\` --- dicts/cizu_raw.txt | 4 ++-- schema/jiandao.schema.yaml | 1 + schema/lua/jiandao/escape_filter.lua | 29 +++++++++++++++++++++++ schema/lua/jiandao/hint_filter.lua | 2 +- schema/lua/jiandao/unicode_translator.lua | 7 +----- 5 files changed, 34 insertions(+), 9 deletions(-) create mode 100644 schema/lua/jiandao/escape_filter.lua diff --git a/dicts/cizu_raw.txt b/dicts/cizu_raw.txt index 68f933c..be1f51a 100644 --- a/dicts/cizu_raw.txt +++ b/dicts/cizu_raw.txt @@ -54589,7 +54589,7 @@ 迥然 jyrf iu 1000 春雨如油 jyry vv 1000 超音速 jys vov 1000 -床前明月光,疑是地上霜。举头望明月,低头思故乡。 jys voi 899 1 《静夜思》 +床前明月光,疑是地上霜。\n举头望明月,低头思故乡。 jys voi 899 1 《静夜思》 交易所 jys oou 899 解压缩 jys uva 898 窘色 jyse ou 1000 @@ -117540,7 +117540,7 @@ 源自于 yzy auv 1000 又怎样 yzy auv 899 要怎样 yzy vuv 898 -慈母手中线,游子身上衣。临行密密缝,意恐迟迟归。谁言寸草心,报得三春晖。 yzy aa 798 1 《游子吟》 +慈母手中线,游子身上衣。\n临行密密缝,意恐迟迟归。\n谁言寸草心,报得三春晖。 yzy aa 798 1 《游子吟》 越做越 yzy viv 797 药引 yzyb ia 1000 要有 yzyd vv 1000 diff --git a/schema/jiandao.schema.yaml b/schema/jiandao.schema.yaml index b678794..79b6fb0 100644 --- a/schema/jiandao.schema.yaml +++ b/schema/jiandao.schema.yaml @@ -76,6 +76,7 @@ engine: - simplifier@emoji_suggestion - uniquifier - lua_filter@*jiandao/hint_filter + - lua_filter@*jiandao/escape_filter - simplifier@traditionalization - reverse_lookup_filter@danzi_lookup diff --git a/schema/lua/jiandao/escape_filter.lua b/schema/lua/jiandao/escape_filter.lua new file mode 100644 index 0000000..d8b6beb --- /dev/null +++ b/schema/lua/jiandao/escape_filter.lua @@ -0,0 +1,29 @@ +--[[ + Unicode Translator + Copyright (C) 2024 Xuesong Peng + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +--]] + +local function filter(input) + for cand in input:iter() do + local text = string.gsub(cand.text, "\\n", "\n") + text = string.gsub(text, "\\t", "\t") + text = string.gsub(text, "\\\\", "\\") + local comment = cand:get_genuine().comment + yield(Candidate(cand.type, cand._start, cand._end, text, comment)) + end +end + +return filter diff --git a/schema/lua/jiandao/hint_filter.lua b/schema/lua/jiandao/hint_filter.lua index 8cc138b..6002626 100644 --- a/schema/lua/jiandao/hint_filter.lua +++ b/schema/lua/jiandao/hint_filter.lua @@ -1,7 +1,7 @@ --[[ Hint Filter Copyright (C) 2020 Rea - Copyright (C) 2021, 2023 Xuesong Peng + Copyright (C) 2021, 2023 - 2024 Xuesong Peng This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published diff --git a/schema/lua/jiandao/unicode_translator.lua b/schema/lua/jiandao/unicode_translator.lua index b4dcd15..29c5ff3 100644 --- a/schema/lua/jiandao/unicode_translator.lua +++ b/schema/lua/jiandao/unicode_translator.lua @@ -1,6 +1,6 @@ --[[ Unicode Translator - Copyright (C) 2023 Xuesong Peng + Copyright (C) 2023 - 2024 Xuesong Peng This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published @@ -17,17 +17,12 @@ --]] local function translator(input, seg) - local is_win = package.config:sub(1,1) == "\\" local delimiter = string.find(input, "u`") if delimiter ~= nil then local input_code = string.sub(input, delimiter + 2) local codepoint = tonumber(input_code, 16) if codepoint ~= nil then local ch = utf8.char(codepoint) - -- to prevent software crashing on Windows - if is_win and codepoint == 10 then - ch = "LF" - end local cand = Candidate("unicode", seg.start, seg._end, ch, " Unicode") -- input_code = string.format("%04s", input_code) -- string.format not working in Hamster