1313#ifdef __has_include
1414 #if __has_include(<unistd.h>)
1515 #include < unistd.h>
16+ #include < fcntl.h>
17+ #include < sys/stat.h>
1618 #if defined(_POSIX_MAPPED_FILES)
1719 #include < sys/mman.h>
18- #include < fcntl.h>
1920 #endif
2021 #if defined(_POSIX_MEMLOCK_RANGE)
2122 #include < sys/resource.h>
@@ -74,7 +75,7 @@ struct llama_file::impl {
7475 return ret;
7576 }
7677
77- impl (const char * fname, const char * mode) {
78+ impl (const char * fname, const char * mode, [[maybe_unused]] const bool use_direct_io = false ) {
7879 fp = ggml_fopen (fname, mode);
7980 if (fp == NULL ) {
8081 throw std::runtime_error (format (" failed to open %s: %s" , fname, strerror (errno)));
@@ -153,13 +154,40 @@ struct llama_file::impl {
153154 write_raw (&val, sizeof (val));
154155 }
155156
157+ void read_aligned_chunk (size_t offset, void * dest, size_t size) const {
158+ throw std::runtime_error (" DirectIO is not implemented on Windows." );
159+ }
160+
156161 ~impl () {
157162 if (fp) {
158163 std::fclose (fp);
159164 }
160165 }
161166#else
162- impl (const char * fname, const char * mode) {
167+ impl (const char * fname, const char * mode, [[maybe_unused]] const bool use_direct_io = false ) {
168+ #ifdef __linux__
169+ // Try unbuffered I/O for read only
170+ if (use_direct_io && std::strcmp (mode, " rb" ) == 0 ) {
171+ fd = open (fname, O_RDONLY | O_DIRECT);
172+
173+ if (fd != -1 ) {
174+ struct stat file_stats{};
175+ fstat (fd, &file_stats);
176+
177+ size = file_stats.st_size ;
178+ alignment = file_stats.st_blksize ;
179+
180+ off_t ret = lseek (fd, 0 , SEEK_SET);
181+ if (ret == -1 ) {
182+ throw std::runtime_error (format (" seek error: %s" , strerror (errno)));
183+ }
184+ return ;
185+ }
186+
187+ LLAMA_LOG_WARN (" Failed to open model %s with error: %s. Falling back to buffered I/O" ,
188+ fname, strerror (errno));
189+ }
190+ #endif
163191 fp = ggml_fopen (fname, mode);
164192 if (fp == NULL ) {
165193 throw std::runtime_error (format (" failed to open %s: %s" , fname, strerror (errno)));
@@ -170,27 +198,30 @@ struct llama_file::impl {
170198 }
171199
172200 size_t tell () const {
173- // TODO: this ifdef is never true?
174- #ifdef _WIN32
175- __int64 ret = _ftelli64 (fp);
176- #else
177- long ret = std::ftell (fp);
178- #endif
179- if (ret == -1 ) {
180- throw std::runtime_error (format (" ftell error: %s" , strerror (errno)));
201+ if (fd == -1 ) {
202+ long ret = std::ftell (fp);
203+ if (ret == -1 ) {
204+ throw std::runtime_error (format (" ftell error: %s" , strerror (errno)));
205+ }
206+
207+ return (size_t ) ret;
181208 }
182209
183- return (size_t ) ret;
210+ off_t pos = lseek (fd, 0 , SEEK_CUR);
211+ if (pos == -1 ) {
212+ throw std::runtime_error (format (" lseek error: %s" , strerror (errno)));
213+ }
214+ return (size_t ) pos;
184215 }
185216
186217 void seek (size_t offset, int whence) const {
187- // TODO: this ifdef is never true?
188- # ifdef _WIN32
189- int ret = _fseeki64 (fp, (__int64 ) offset, whence);
190- # else
191- int ret = std::fseek (fp, ( long ) offset, whence);
192- # endif
193- if (ret != 0 ) {
218+ off_t ret = 0 ;
219+ if (fd == - 1 ) {
220+ ret = std::fseek (fp, (long ) offset, whence);
221+ } else {
222+ ret = lseek (fd, offset, whence);
223+ }
224+ if (ret == - 1 ) {
194225 throw std::runtime_error (format (" seek error: %s" , strerror (errno)));
195226 }
196227 }
@@ -200,13 +231,55 @@ struct llama_file::impl {
200231 return ;
201232 }
202233 errno = 0 ;
203- std::size_t ret = std::fread (ptr, len, 1 , fp);
204- if (ferror (fp)) {
205- throw std::runtime_error (format (" read error: %s" , strerror (errno)));
234+ if (fd == -1 ) {
235+ std::size_t ret = std::fread (ptr, len, 1 , fp);
236+ if (ferror (fp)) {
237+ throw std::runtime_error (format (" read error: %s" , strerror (errno)));
238+ }
239+ if (ret != 1 ) {
240+ throw std::runtime_error (" unexpectedly reached end of file" );
241+ }
242+ } else {
243+ bool successful = false ;
244+ while (!successful) {
245+ off_t ret = read (fd, ptr, len);
246+
247+ if (ret == -1 ) {
248+ if (errno == EINTR) {
249+ continue ; // Interrupted by signal, retry
250+ }
251+ throw std::runtime_error (format (" read error: %s" , strerror (errno)));
252+ }
253+ if (ret == 0 ) {
254+ throw std::runtime_error (" unexpectedly reached end of file" );
255+ }
256+
257+ successful = true ;
258+ }
206259 }
207- if (ret != 1 ) {
208- throw std::runtime_error (" unexpectedly reached end of file" );
260+ }
261+
262+ void read_aligned_chunk (size_t offset, void * dest, size_t size) const {
263+ off_t aligned_offset = offset & ~(alignment - 1 );
264+ off_t offset_from_alignment = offset - aligned_offset;
265+ size_t bytes_to_read = (offset_from_alignment + size + alignment - 1 ) & ~(alignment - 1 );
266+
267+ void * raw_buffer = nullptr ;
268+ int ret = posix_memalign (&raw_buffer, alignment, bytes_to_read);
269+ if (ret != 0 ) {
270+ throw std::runtime_error (format (" posix_memalign failed with error %d" , ret));
209271 }
272+
273+ struct aligned_buffer_deleter {
274+ void operator ()(void * p) const { free (p); }
275+ };
276+ std::unique_ptr<void , aligned_buffer_deleter> buffer (raw_buffer);
277+
278+ seek (aligned_offset, SEEK_SET);
279+ read_raw (buffer.get (), bytes_to_read);
280+
281+ uintptr_t actual_data = reinterpret_cast <uintptr_t >(buffer.get ()) + offset_from_alignment;
282+ memcpy (dest, reinterpret_cast <void *>(actual_data), size);
210283 }
211284
212285 uint32_t read_u32 () const {
@@ -231,22 +304,43 @@ struct llama_file::impl {
231304 }
232305
233306 ~impl () {
234- if (fp) {
307+ if (fd != -1 ) {
308+ close (fd);
309+ } else {
235310 std::fclose (fp);
236311 }
237312 }
313+ int fd = -1 ;
238314#endif
239315
240- FILE * fp;
241- size_t size;
316+ void read_raw_at (void * ptr, size_t len, size_t offset) const {
317+ if (alignment != 1 ) {
318+ read_aligned_chunk (offset, ptr, len);
319+ } else {
320+ seek (offset, SEEK_SET);
321+ read_raw (ptr, len);
322+ }
323+ }
324+
325+ size_t read_alignment () const {
326+ return alignment;
327+ }
328+
329+ size_t alignment = 1 ;
330+
331+ FILE * fp{};
332+ size_t size{};
242333};
243334
244- llama_file::llama_file (const char * fname, const char * mode) : pimpl(std::make_unique<impl>(fname, mode)) {}
335+ llama_file::llama_file (const char * fname, const char * mode, const bool use_direct_io) :
336+ pimpl(std::make_unique<impl>(fname, mode, use_direct_io)) {}
245337llama_file::~llama_file () = default ;
246338
247339size_t llama_file::tell () const { return pimpl->tell (); }
248340size_t llama_file::size () const { return pimpl->size ; }
249341
342+ size_t llama_file::read_alignment () const { return pimpl->read_alignment (); }
343+
250344int llama_file::file_id () const {
251345#ifdef _WIN32
252346 return _fileno (pimpl->fp );
@@ -261,6 +355,7 @@ int llama_file::file_id() const {
261355
262356void llama_file::seek (size_t offset, int whence) const { pimpl->seek (offset, whence); }
263357void llama_file::read_raw (void * ptr, size_t len) const { pimpl->read_raw (ptr, len); }
358+ void llama_file::read_raw_at (void * ptr, size_t len, size_t offset) const { pimpl->read_raw_at (ptr, len, offset); }
264359
265360uint32_t llama_file::read_u32 () const { return pimpl->read_u32 (); }
266361
0 commit comments