hpcaitech · thecaptain789 · Feb 7, 2026
@@ -28,10 +28,10 @@ def jload(f, mode="r"):
 
 def read_string_by_schema(data: Dict[str, Any], schema: str) -> str:
     """
-    Read a feild of the dataset be schema
+    Read a field of the dataset by schema
     Args:
         data: Dict[str, Any]
-        schema: cascaded feild names seperated by '.'. e.g. person.name.first will access data['person']['name']['first']
+        schema: cascaded field names separated by '.'. e.g. person.name.first will access data['person']['name']['first']
     """
     keys = schema.split(".")
     result = data

@@ -69,7 +69,7 @@ def disambiguity(input):
         data_name = input("Enter a short description of the data:")
         separator = input(
             "Enter a separator to force separating text into chunks, if no separator is given, the default separator is '\\n\\n'. Note that"
-            + "we use neural text spliter to split texts into chunks, the seperator only serves as a delimiter to force split long passage into"
+            + "we use neural text splitter to split texts into chunks, the separator only serves as a delimiter to force split long passage into"
             + " chunks before passing to the neural network. Press ENTER directly to skip:"
         )
         separator = separator if separator != "" else "\n\n"

@@ -227,7 +227,7 @@ def save_quantized(
                             new_value = str(value)
                         except Exception as e:
                             raise TypeError(
-                                f"safetensors_metadata: both keys and values must be strings and an error occured when trying to convert them: {e}"
+                                f"safetensors_metadata: both keys and values must be strings and an error occurred when trying to convert them: {e}"
                             )
                         if new_key in new_safetensors_metadata:
                             print(

@@ -142,7 +142,7 @@ def preprocess(self, requests):
 
             input_ids = inputs["input_ids"].to(self.device)
             attention_mask = inputs["attention_mask"].to(self.device)
-            # making a batch out of the recieved requests
+            # making a batch out of the received requests
             # attention masks are passed for cases where input tokens are padded.
             if input_ids.shape is not None:
                 if input_ids_batch is None: