From 2c0b4591ae4a13a89a73fb29a170adf6e52b3903 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Tue, 25 Feb 2025 10:27:56 +0100 Subject: [PATCH 1/4] feat: add refactoring of merge and parse --- scrapegraphai/nodes/generate_answer_node.py | 31 +++++++++++++++++++++ uv.lock | 2 +- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py index f7f20cf8..d50b72db 100644 --- a/scrapegraphai/nodes/generate_answer_node.py +++ b/scrapegraphai/nodes/generate_answer_node.py @@ -87,6 +87,37 @@ def invoke_with_timeout(self, chain, inputs, timeout): self.logger.error(f"Error during chain execution: {str(e)}") raise + def process(self, state: dict) -> dict: + """Process the input state and generate an answer.""" + user_prompt = state.get("user_prompt") + # Check for content in different possible state keys + content = ( + state.get("relevant_chunks") + or state.get("parsed_doc") + or state.get("doc") + or state.get("content") + ) + + if not content: + raise ValueError("No content found in state to generate answer from") + + if not user_prompt: + raise ValueError("No user prompt found in state") + + # Create the chain input with both content and question keys + chain_input = { + "content": content, + "question": user_prompt + } + + try: + response = self.invoke_with_timeout(self.chain, chain_input, self.timeout) + state.update({self.output[0]: response}) + return state + except Exception as e: + self.logger.error(f"Error in GenerateAnswerNode: {str(e)}") + raise + def execute(self, state: dict) -> dict: """ Executes the GenerateAnswerNode. diff --git a/uv.lock b/uv.lock index 000a667c..415aade0 100644 --- a/uv.lock +++ b/uv.lock @@ -3446,7 +3446,7 @@ wheels = [ [[package]] name = "scrapegraphai" -version = "1.36.0" +version = "1.39.0b1" source = { editable = "." } dependencies = [ { name = "async-timeout", version = "4.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, From ac2fcd66ce2603153877e3141b3ff862a348e335 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Tue, 25 Feb 2025 09:29:03 +0000 Subject: [PATCH 2/4] ci(release): 1.39.0-beta.2 [skip ci] ## [1.39.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.39.0-beta.1...v1.39.0-beta.2) (2025-02-25) ### Features * add refactoring of merge and parse ([2c0b459](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2c0b4591ae4a13a89a73fb29a170adf6e52b3903)) ### CI * **release:** 1.38.1 [skip ci] ([5c3d62d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5c3d62d55b5c6dcbb304b5879a19ca09bc18b153)) --- CHANGELOG.md | 12 ++++++++++++ pyproject.toml | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cad2238..6b833bf7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +## [1.39.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.39.0-beta.1...v1.39.0-beta.2) (2025-02-25) + + +### Features + +* add refactoring of merge and parse ([2c0b459](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2c0b4591ae4a13a89a73fb29a170adf6e52b3903)) + + +### CI + +* **release:** 1.38.1 [skip ci] ([5c3d62d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5c3d62d55b5c6dcbb304b5879a19ca09bc18b153)) + ## [1.39.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.38.1-beta.1...v1.39.0-beta.1) (2025-02-17) diff --git a/pyproject.toml b/pyproject.toml index 3ce4c882..10a776a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "scrapegraphai" -version = "1.39.0b1" +version = "1.39.0b2" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." From 8cf96857a000eada6d1c9ce1a357ee3d1f2bd003 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Tue, 25 Feb 2025 12:09:58 +0100 Subject: [PATCH 3/4] feat: update parse node --- scrapegraphai/nodes/parse_node.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scrapegraphai/nodes/parse_node.py b/scrapegraphai/nodes/parse_node.py index fbc9ba31..cb61a643 100644 --- a/scrapegraphai/nodes/parse_node.py +++ b/scrapegraphai/nodes/parse_node.py @@ -78,7 +78,6 @@ def execute(self, state: dict) -> dict: self.logger.info(f"--- Executing {self.node_name} Node ---") input_keys = self.get_input_keys(state) - input_data = [state[key] for key in input_keys] docs_transformed = input_data[0] source = input_data[1] if self.parse_urls else None @@ -121,6 +120,9 @@ def execute(self, state: dict) -> dict: ) state.update({self.output[0]: chunks}) + state.update({"parsed_doc": chunks}) + state.update({"content": chunks}) + if self.parse_urls: state.update({self.output[1]: link_urls}) state.update({self.output[2]: img_urls}) From 71053bc7586b0e723272d0eb7e668c07aa666eae Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Tue, 25 Feb 2025 11:12:29 +0000 Subject: [PATCH 4/4] ci(release): 1.40.0-beta.1 [skip ci] ## [1.40.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.39.0...v1.40.0-beta.1) (2025-02-25) ### Features * add refactoring of merge and parse ([2c0b459](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2c0b4591ae4a13a89a73fb29a170adf6e52b3903)) * update parse node ([8cf9685](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/8cf96857a000eada6d1c9ce1a357ee3d1f2bd003)) ### CI * **release:** 1.39.0-beta.2 [skip ci] ([ac2fcd6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ac2fcd66ce2603153877e3141b3ff862a348e335)) --- CHANGELOG.md | 13 +++++++++++++ pyproject.toml | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e3c0f40f..13c9e588 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,16 @@ +## [1.40.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.39.0...v1.40.0-beta.1) (2025-02-25) + + +### Features + +* add refactoring of merge and parse ([2c0b459](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2c0b4591ae4a13a89a73fb29a170adf6e52b3903)) +* update parse node ([8cf9685](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/8cf96857a000eada6d1c9ce1a357ee3d1f2bd003)) + + +### CI + +* **release:** 1.39.0-beta.2 [skip ci] ([ac2fcd6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ac2fcd66ce2603153877e3141b3ff862a348e335)) + ## [1.39.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.39.0-beta.1...v1.39.0-beta.2) (2025-02-25) diff --git a/pyproject.toml b/pyproject.toml index fcecc70b..7617f99e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "scrapegraphai" -version = "1.39.0b2" +version = "1.40.0b1"