From 887d627d2fe07071bebf9d97728cd2f822f81d7c Mon Sep 17 00:00:00 2001 From: Hugo-ter-Doest Date: Fri, 9 Aug 2024 11:21:01 +0200 Subject: [PATCH] Syntax --- examples/tokenizer/testSentenceTokenizer.js | 2 +- lib/natural/tokenizers/sentence_tokenizer.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/tokenizer/testSentenceTokenizer.js b/examples/tokenizer/testSentenceTokenizer.js index ebe5a005..649280cc 100644 --- a/examples/tokenizer/testSentenceTokenizer.js +++ b/examples/tokenizer/testSentenceTokenizer.js @@ -21,4 +21,4 @@ Experts believe that with continued global cooperation and investment, renewable Stay tuned for more updates on this developing story.` const result = tokenizer.tokenize(testData) -console.log(result) \ No newline at end of file +console.log(result) diff --git a/lib/natural/tokenizers/sentence_tokenizer.js b/lib/natural/tokenizers/sentence_tokenizer.js index 3cba5fd5..bf75cc37 100644 --- a/lib/natural/tokenizers/sentence_tokenizer.js +++ b/lib/natural/tokenizers/sentence_tokenizer.js @@ -82,7 +82,7 @@ class SentenceTokenizer extends Tokenizer { // Regular expression for sentence delimiters optionally followed by a bracket or quote // Multiple delimiters with spaces in between are allowed // The expression makes sure that the sentence delimiter group ends with a sentence delimiter - const delimiterPattern = /([.?!… ]*)([.?!…])(["'”’)}\]]?)/g; + const delimiterPattern = /([.?!… ]*)([.?!…])(["'”’)}\]]?)/g const modifiedText = text.replace(delimiterPattern, (match, p1, p2, p3) => { const placeholder = generateUniqueCode(DELIM, this.replacementCounter++) this.delimiterMap.set(placeholder, p1 + p2 + p3)