Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions applications/ColossalChat/coati/dataset/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ def jload(f, mode="r"):

def read_string_by_schema(data: Dict[str, Any], schema: str) -> str:
"""
Read a feild of the dataset be schema
Read a field of the dataset by schema
Args:
data: Dict[str, Any]
schema: cascaded feild names seperated by '.'. e.g. person.name.first will access data['person']['name']['first']
schema: cascaded field names separated by '.'. e.g. person.name.first will access data['person']['name']['first']
"""
keys = schema.split(".")
result = data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def disambiguity(input):
data_name = input("Enter a short description of the data:")
separator = input(
"Enter a separator to force separating text into chunks, if no separator is given, the default separator is '\\n\\n'. Note that"
+ "we use neural text spliter to split texts into chunks, the seperator only serves as a delimiter to force split long passage into"
+ "we use neural text splitter to split texts into chunks, the separator only serves as a delimiter to force split long passage into"
+ " chunks before passing to the neural network. Press ENTER directly to skip:"
)
separator = separator if separator != "" else "\n\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def save_quantized(
new_value = str(value)
except Exception as e:
raise TypeError(
f"safetensors_metadata: both keys and values must be strings and an error occured when trying to convert them: {e}"
f"safetensors_metadata: both keys and values must be strings and an error occurred when trying to convert them: {e}"
)
if new_key in new_safetensors_metadata:
print(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def preprocess(self, requests):

input_ids = inputs["input_ids"].to(self.device)
attention_mask = inputs["attention_mask"].to(self.device)
# making a batch out of the recieved requests
# making a batch out of the received requests
# attention masks are passed for cases where input tokens are padded.
if input_ids.shape is not None:
if input_ids_batch is None:
Expand Down