diff --git a/client/src/utils/file.ts b/client/src/utils/file.ts index 57ee3dac356..32735e2800e 100644 --- a/client/src/utils/file.ts +++ b/client/src/utils/file.ts @@ -173,9 +173,16 @@ export const splitText2Chunks = ({ text, maxLen }: { text: string; maxLen: numbe chunks.push(chunk); } - const enc = getOpenAiEncMap(); - const encodeText = enc.encode(chunks.join('')); - const tokens = encodeText.length; + const tokens = (() => { + try { + const enc = getOpenAiEncMap(); + const encodeText = enc.encode(chunks.join('')); + const tokens = encodeText.length; + return tokens; + } catch (error) { + return chunks.join('').length; + } + })(); return { chunks, @@ -274,5 +281,6 @@ export const simpleText = (text: string) => { text = text.replace(/([\u4e00-\u9fa5])\s+([\u4e00-\u9fa5])/g, '$1$2'); text = text.replace(/\n{2,}/g, '\n'); text = text.replace(/\s{2,}/g, ' '); + text = text.replace(/[^\x00-\x7F]/g, ' '); return text; };