Skip to content

Commit f1108d2

Browse files
committed
Fix token estimation
1 parent dd0d7b4 commit f1108d2

File tree

2 files changed

+11
-10
lines changed

2 files changed

+11
-10
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ fastapi[standard]
22
uvicorn
33
fastapi-analytics
44
slowapi
5-
tokencost
5+
tiktoken
66
pytest
77
pytest-asyncio
88
click>=8.0.0

src/gitingest/ingest_from_query.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
import os
22
from fnmatch import fnmatch
33
from typing import Dict, List, Union
4-
import asyncio
4+
import tiktoken
55

6-
from tokencost import count_string_tokens
7-
from gitingest.parse_query import parse_query
86

97
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
108
MAX_DIRECTORY_DEPTH = 20 # Maximum depth of directory traversal
@@ -267,18 +265,21 @@ def create_tree_structure(query: dict, node: Dict, prefix: str = "", is_last: bo
267265
return tree
268266

269267
def generate_token_string(context_string: str) -> str:
268+
"""Returns the number of tokens in a text string."""
270269
formatted_tokens = ""
271270
try:
272-
total_gpt_tokens = count_string_tokens(prompt=context_string, model="gpt-4o")
271+
encoding = tiktoken.get_encoding("cl100k_base", )
272+
total_tokens = len(encoding.encode(context_string, disallowed_special=()))
273+
273274
except Exception as e:
274275
print(e)
275276
return None
276-
if total_gpt_tokens > 1000000:
277-
formatted_tokens = f"{total_gpt_tokens/1000000:.1f}M"
278-
elif total_gpt_tokens > 1000:
279-
formatted_tokens = f"{total_gpt_tokens/1000:.1f}k"
277+
if total_tokens > 1000000:
278+
formatted_tokens = f"{total_tokens/1000000:.1f}M"
279+
elif total_tokens > 1000:
280+
formatted_tokens = f"{total_tokens/1000:.1f}k"
280281
else:
281-
formatted_tokens = f"{total_gpt_tokens}"
282+
formatted_tokens = f"{total_tokens}"
282283
return formatted_tokens
283284

284285
def ingest_single_file(path: str, query: dict) -> Dict:

0 commit comments

Comments
 (0)