From 568cb1c07319f692f18ed2ab35f9badfadaaf3f3 Mon Sep 17 00:00:00 2001 From: ikawaha Date: Sat, 3 Aug 2024 14:56:21 +0900 Subject: [PATCH] chore: Add drop filter --- filter/ja/filter.go | 6 +++++ filter/ja/filter_test.go | 49 +++++++++++++++++++++++++++++++++------- 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/filter/ja/filter.go b/filter/ja/filter.go index f61bfa5..30f820d 100644 --- a/filter/ja/filter.go +++ b/filter/ja/filter.go @@ -124,3 +124,9 @@ func (f Filter) Yield(tokens []tokenizer.Token) []string { } return ret } + +// Drop drops a token given the provided match function (stop-tags and stop-words). +func (f Filter) Drop(tokens *[]tokenizer.Token) { + f.stopTags.Drop(tokens) + f.stopWords.Drop(tokens) +} diff --git a/filter/ja/filter_test.go b/filter/ja/filter_test.go index 99397a4..5ec3324 100644 --- a/filter/ja/filter_test.go +++ b/filter/ja/filter_test.go @@ -4,23 +4,56 @@ import ( "reflect" "testing" - "github.com/ikawaha/kagome-dict/ipa" + "github.com/ikawaha/kagome-dict/dict" "github.com/ikawaha/kagome/v2/tokenizer" ) +const testDictPath = "../../testdata/ipa.dict" + func TestFilter(t *testing.T) { - f, err := NewFilter() + d, err := dict.LoadDictFile(testDictPath) if err != nil { t.Fatal(err) } - tz, err := tokenizer.New(ipa.Dict(), tokenizer.OmitBosEos()) + tz, err := tokenizer.New(d, tokenizer.OmitBosEos()) if err != nil { t.Fatal(err) } tokens := tz.Tokenize("人魚は、南の方の海にばかり棲んでいるのではありません。") - want := []string{"人魚", "南", "方", "海", "棲む"} - got := f.Yield(tokens) - if !reflect.DeepEqual(got, want) { - t.Errorf("got %+v, want %+v", got, want) - } + t.Run("yield string from tokens", func(t *testing.T) { + f, err := NewFilter() + if err != nil { + t.Fatal(err) + } + want := []string{"人魚", "南", "方", "海", "棲む"} + got := f.Yield(tokens) + if !reflect.DeepEqual(got, want) { + t.Errorf("got %+v, want %+v", got, want) + } + }) + t.Run("drop tokens", func(t *testing.T) { + f, err := NewFilter() + if err != nil { + t.Fatal(err) + } + f.Drop(&tokens) + if len(tokens) != 5 { + t.Errorf("got %+v, want %+v", len(tokens), 5) + } + if tokens[0].Surface != "人魚" { + t.Errorf("got %+v, want %+v", tokens[0].Surface, "人魚") + } + if tokens[1].Surface != "南" { + t.Errorf("got %+v, want %+v", tokens[1].Surface, "南") + } + if tokens[2].Surface != "方" { + t.Errorf("got %+v, want %+v", tokens[2].Surface, "方") + } + if tokens[3].Surface != "海" { + t.Errorf("got %+v, want %+v", tokens[3].Surface, "海") + } + if tokens[4].Surface != "棲ん" { + t.Errorf("got %+v, want %+v", tokens[4].Surface, "棲ん") + } + }) }