From 34051369baf569b0309aa00355d5346f1683ec63 Mon Sep 17 00:00:00 2001 From: Alberto Mannari Date: Fri, 10 Oct 2025 13:46:53 +0000 Subject: [PATCH] Fix parsing of escaped and unicode characters from custom prompts Signed-off-by: Alberto Mannari --- aiu_fms_testing_utils/scripts/drive_paged_programs.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/aiu_fms_testing_utils/scripts/drive_paged_programs.py b/aiu_fms_testing_utils/scripts/drive_paged_programs.py index 2dcd0216..b573ae02 100644 --- a/aiu_fms_testing_utils/scripts/drive_paged_programs.py +++ b/aiu_fms_testing_utils/scripts/drive_paged_programs.py @@ -195,11 +195,12 @@ "Using custom prompts from user, programs parameter will be ignored as it will be determined by user prompt" ) result = [] - with open(DATASET_PATH, "r") as file: + with open(DATASET_PATH, "rb") as file: for line in file: - res_line = line.strip() + res_line = line.decode("unicode_escape").strip() result.append((res_line, get_pad_size(len(tokenizer.encode(res_line))))) custom_shape = (len(result), max([_[1] for _ in result])) + dprint(f"Custom shape: {custom_shape}") def __custom_line_sampler(*args, **kwargs): return_key = kwargs.get("return_key", False)