Skip to content

Commit 2c8cbb4

Browse files
committed
fix: use provider-specific token estimation for manual chunks
Match document processing heuristic: Ollama uses length/3, OpenAI uses estimateTokenCount
1 parent 5638d3a commit 2c8cbb4

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

apps/sim/lib/knowledge/chunks/service.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,9 @@ export async function createChunk(
185185
kbOllamaBaseUrl
186186
)
187187

188-
const tokenCount = estimateTokenCount(chunkData.content, 'openai')
188+
const tokenCount = isOllama
189+
? { count: Math.ceil(chunkData.content.length / 3) }
190+
: estimateTokenCount(chunkData.content, 'openai')
189191
const chunkId = generateId()
190192
const now = new Date()
191193

@@ -549,7 +551,9 @@ export async function updateChunk(
549551
kbOllamaBaseUrl
550552
)
551553
newEmbedding = embeddings[0]
552-
newTokenCount = estimateTokenCount(content, 'openai').count
554+
newTokenCount = isOllama
555+
? Math.ceil(content.length / 3)
556+
: estimateTokenCount(content, 'openai').count
553557
}
554558

555559
const newHash = createHash('sha256').update(content).digest('hex')

0 commit comments

Comments
 (0)