Skip to content

Commit

Permalink
chore: Add drop filter
Browse files Browse the repository at this point in the history
  • Loading branch information
ikawaha committed Aug 3, 2024
1 parent 8085186 commit 568cb1c
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 8 deletions.
6 changes: 6 additions & 0 deletions filter/ja/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,9 @@ func (f Filter) Yield(tokens []tokenizer.Token) []string {
}
return ret
}

// Drop drops a token given the provided match function (stop-tags and stop-words).
func (f Filter) Drop(tokens *[]tokenizer.Token) {
f.stopTags.Drop(tokens)
f.stopWords.Drop(tokens)
}
49 changes: 41 additions & 8 deletions filter/ja/filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,56 @@ import (
"reflect"
"testing"

"github.com/ikawaha/kagome-dict/ipa"
"github.com/ikawaha/kagome-dict/dict"
"github.com/ikawaha/kagome/v2/tokenizer"
)

const testDictPath = "../../testdata/ipa.dict"

func TestFilter(t *testing.T) {
f, err := NewFilter()
d, err := dict.LoadDictFile(testDictPath)
if err != nil {
t.Fatal(err)
}
tz, err := tokenizer.New(ipa.Dict(), tokenizer.OmitBosEos())
tz, err := tokenizer.New(d, tokenizer.OmitBosEos())
if err != nil {
t.Fatal(err)
}
tokens := tz.Tokenize("人魚は、南の方の海にばかり棲んでいるのではありません。")
want := []string{"人魚", "南", "方", "海", "棲む"}
got := f.Yield(tokens)
if !reflect.DeepEqual(got, want) {
t.Errorf("got %+v, want %+v", got, want)
}
t.Run("yield string from tokens", func(t *testing.T) {
f, err := NewFilter()
if err != nil {
t.Fatal(err)
}
want := []string{"人魚", "南", "方", "海", "棲む"}
got := f.Yield(tokens)
if !reflect.DeepEqual(got, want) {
t.Errorf("got %+v, want %+v", got, want)
}
})
t.Run("drop tokens", func(t *testing.T) {
f, err := NewFilter()
if err != nil {
t.Fatal(err)
}
f.Drop(&tokens)
if len(tokens) != 5 {
t.Errorf("got %+v, want %+v", len(tokens), 5)
}
if tokens[0].Surface != "人魚" {
t.Errorf("got %+v, want %+v", tokens[0].Surface, "人魚")
}
if tokens[1].Surface != "南" {
t.Errorf("got %+v, want %+v", tokens[1].Surface, "南")
}
if tokens[2].Surface != "方" {
t.Errorf("got %+v, want %+v", tokens[2].Surface, "方")
}
if tokens[3].Surface != "海" {
t.Errorf("got %+v, want %+v", tokens[3].Surface, "海")
}
if tokens[4].Surface != "棲ん" {
t.Errorf("got %+v, want %+v", tokens[4].Surface, "棲ん")
}
})
}

0 comments on commit 568cb1c

Please sign in to comment.