diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp index 1da89515f7..33a76dba40 100644 --- a/src/llama-model-loader.cpp +++ b/src/llama-model-loader.cpp @@ -1086,10 +1086,10 @@ bool llama_model_loader::load_all_data( } else { // If upload_backend is valid load the tensor in chunks to pinned memory and upload the buffers asynchronously to the GPU. if (upload_backend) { - auto offset = (off_t) weight->offs; + size_t offset = weight->offs; alignment = file->read_alignment(); - off_t aligned_offset = offset & ~(alignment - 1); - off_t offset_from_alignment = offset - aligned_offset; + size_t aligned_offset = offset & ~(alignment - 1); + size_t offset_from_alignment = offset - aligned_offset; file->seek(aligned_offset, SEEK_SET); // Calculate aligned read boundaries