From fd3cc028d45429c3f1ada92dbd6c429183c098d7 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Wed, 3 Jul 2024 22:51:31 -0400 Subject: [PATCH] Fix reuse of pipelines including IterBlock When running `sdg.generate()` more than once, a pipeline including an `IterBlock` would fail because the `block_kwargs` were removed the first time the block was used. This change removes the special handling for `IterBlock` because it does not appear to be necessary. The new code ends up calling the block's constructor with all of the same arguments, but now `block_kwargs` doesn't disappear. Signed-off-by: Russell Bryant --- src/instructlab/sdg/pipeline.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/instructlab/sdg/pipeline.py b/src/instructlab/sdg/pipeline.py index fc93f78d..bc570a83 100644 --- a/src/instructlab/sdg/pipeline.py +++ b/src/instructlab/sdg/pipeline.py @@ -3,7 +3,6 @@ from datasets import Dataset # Local -from .iterblock import IterBlock from .logger_config import setup_logger logger = setup_logger(__name__) @@ -39,12 +38,6 @@ def generate(self, dataset) -> Dataset: drop_duplicates_cols = block_prop.get("drop_duplicates", False) block = block_type(**block_config) - if block_type == IterBlock: - block_kwargs = block_config.pop("block_kwargs") - block = block_type(**block_config, block_kwargs=block_kwargs) - else: - block = block_type(**block_config) - logger.info("Running block: %s", block_config["block_name"]) logger.info(dataset)