Skip to content

Commit

Permalink
perf: read file token error
Browse files Browse the repository at this point in the history
  • Loading branch information
c121914yu committed Aug 26, 2023
1 parent 93030af commit 13439c5
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions client/src/utils/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,16 @@ export const splitText2Chunks = ({ text, maxLen }: { text: string; maxLen: numbe
chunks.push(chunk);
}

const enc = getOpenAiEncMap();
const encodeText = enc.encode(chunks.join(''));
const tokens = encodeText.length;
const tokens = (() => {
try {
const enc = getOpenAiEncMap();
const encodeText = enc.encode(chunks.join(''));
const tokens = encodeText.length;
return tokens;
} catch (error) {
return chunks.join('').length;
}
})();

return {
chunks,
Expand Down Expand Up @@ -274,5 +281,6 @@ export const simpleText = (text: string) => {
text = text.replace(/([\u4e00-\u9fa5])\s+([\u4e00-\u9fa5])/g, '$1$2');
text = text.replace(/\n{2,}/g, '\n');
text = text.replace(/\s{2,}/g, ' ');
text = text.replace(/[^\x00-\x7F]/g, ' ');
return text;
};

0 comments on commit 13439c5

Please sign in to comment.