@@ -225,32 +225,38 @@ def detokenize(self, tokens: List[int], special: bool = False) -> bytes:
225225 # Extra
226226 def metadata (self ) -> Dict [str , str ]:
227227 metadata : Dict [str , str ] = {}
228- buffer_size = 1024
228+ # Pre-allocate a 16KB buffer. This is large enough to handle almost all
229+ # metadata values (including gpt-oss large chat templates ~15KB) in a single pass,
230+ # eliminating the need for resize-and-retry in most cases.
231+ buffer_size = 16384
229232 buffer = ctypes .create_string_buffer (buffer_size )
230- # zero the buffer
231- buffer .value = b"\0 " * buffer_size
233+
234+ # Caching function references reduces the overhead of property lookups within loops.
235+ get_key_by_index = llama_cpp .llama_model_meta_key_by_index
236+ get_val_by_index = llama_cpp .llama_model_meta_val_str_by_index
237+ metadata_count = llama_cpp .llama_model_meta_count (self .model )
232238 # iterate over model keys
233- for i in range (llama_cpp . llama_model_meta_count ( self . model ) ):
234- nbytes = llama_cpp . llama_model_meta_key_by_index (
235- self .model , i , buffer , buffer_size
236- )
239+ for i in range (metadata_count ):
240+ # 1. Get Key
241+ nbytes = get_key_by_index ( self .model , i , buffer , buffer_size )
242+ # Handle buffer resize if the key exceeds current size
237243 if nbytes > buffer_size :
238- buffer_size = nbytes + 1
244+ buffer_size = nbytes + 1024
239245 buffer = ctypes .create_string_buffer (buffer_size )
240- nbytes = llama_cpp .llama_model_meta_key_by_index (
241- self .model , i , buffer , buffer_size
242- )
246+ # Retry with the larger buffer
247+ nbytes = get_key_by_index (self .model , i , buffer , buffer_size )
243248 key = buffer .value .decode ("utf-8" )
244- nbytes = llama_cpp .llama_model_meta_val_str_by_index (
245- self .model , i , buffer , buffer_size
246- )
249+
250+ # 2. Get Value
251+ nbytes = get_val_by_index (self .model , i , buffer , buffer_size )
252+ # Handle buffer resize if the value exceeds current size
247253 if nbytes > buffer_size :
248- buffer_size = nbytes + 1
254+ buffer_size = nbytes + 1024
249255 buffer = ctypes .create_string_buffer (buffer_size )
250- nbytes = llama_cpp .llama_model_meta_val_str_by_index (
251- self .model , i , buffer , buffer_size
252- )
256+ # Retry with the larger buffer
257+ nbytes = get_val_by_index (self .model , i , buffer , buffer_size )
253258 value = buffer .value .decode ("utf-8" )
259+
254260 metadata [key ] = value
255261 return metadata
256262
0 commit comments