Skip to content

Commit 91ede93

Browse files
committed
refactoring of the code
1 parent eb25725 commit 91ede93

File tree

8 files changed

+49
-41
lines changed

8 files changed

+49
-41
lines changed

scrapegraphai/graphs/code_generator_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def _create_graph(self) -> BaseGraph:
6666
BaseGraph: A graph instance representing the web scraping workflow.
6767
"""
6868

69-
if self.schema is None:
69+
if self.schema is None:
7070
raise KeyError("The schema is required for CodeGeneratorGraph")
7171

7272
fetch_node = FetchNode(

scrapegraphai/integrations/burr_bridge.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
try:
1212
import burr
1313
from burr import tracking
14-
from burr.core import Application, ApplicationBuilder, State, Action, default, ApplicationContext
14+
from burr.core import (Application, ApplicationBuilder,
15+
State, Action, default, ApplicationContext)
1516
from burr.lifecycle import PostRunStepHook, PreRunStepHook
1617
except ImportError:
1718
raise ImportError("""burr package is not installed.

scrapegraphai/integrations/indexify_node.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,21 +50,13 @@ def execute(self, state: dict) -> dict:
5050

5151
self.logger.info(f"--- Executing {self.node_name} Node ---")
5252

53-
# Interpret input keys based on the provided input expression
54-
# input_keys length matches the min_input_len parameter in the __init__ method
55-
# e.g. "answer & parsed_doc" or "answer | img_urls"
56-
5753
input_keys = self.get_input_keys(state)
5854

59-
# Fetching data from the state based on the input keys
6055
input_data = [state[key] for key in input_keys]
6156

6257
answer = input_data[0]
6358
img_urls = input_data[1]
6459

65-
# Indexify the content
66-
# ...
67-
6860
isIndexified = True
6961
state.update({self.output[0]: isIndexified})
7062

scrapegraphai/nodes/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
__init__.py file for node folder
2+
__init__.py file for node folder module
33
"""
44

55
from .base_node import BaseNode
@@ -19,7 +19,7 @@
1919
from .graph_iterator_node import GraphIteratorNode
2020
from .merge_answers_node import MergeAnswersNode
2121
from .generate_answer_omni_node import GenerateAnswerOmniNode
22-
from .merge_generated_scripts import MergeGeneratedScriptsNode
22+
from .merge_generated_scripts_node import MergeGeneratedScriptsNode
2323
from .fetch_screen_node import FetchScreenNode
2424
from .generate_answer_from_image_node import GenerateAnswerFromImageNode
2525
from .concat_answers_node import ConcatAnswersNode
@@ -32,4 +32,4 @@
3232
from .fetch_node_level_k import FetchNodeLevelK
3333
from .generate_answer_node_k_level import GenerateAnswerNodeKLevel
3434
from .description_node import DescriptionNode
35-
from .parse_node_depth_k import ParseNodeDepthK
35+
from .parse_node_depth_k_node import ParseNodeDepthK

scrapegraphai/nodes/merge_generated_scripts.py renamed to scrapegraphai/nodes/merge_generated_scripts_node.py

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
MergeAnswersNode Module
33
"""
44
from typing import List, Optional
5-
from tqdm import tqdm
65
from langchain.prompts import PromptTemplate
7-
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
6+
from langchain_core.output_parsers import StrOutputParser
7+
from ..prompts import TEMPLATE_MERGE_SCRIPTS_PROMPT
88
from ..utils.logging import get_logger
99
from .base_node import BaseNode
1010

@@ -51,10 +51,8 @@ def execute(self, state: dict) -> dict:
5151

5252
self.logger.info(f"--- Executing {self.node_name} Node ---")
5353

54-
# Interpret input keys based on the provided input expression
5554
input_keys = self.get_input_keys(state)
5655

57-
# Fetching data from the state based on the input keys
5856
input_data = [state[key] for key in input_keys]
5957

6058
user_prompt = input_data[0]
@@ -67,20 +65,8 @@ def execute(self, state: dict) -> dict:
6765
scripts_str += "-----------------------------------\n"
6866
scripts_str += script
6967

70-
TEMPLATE_MERGE = """
71-
You are a python expert in web scraping and you have just generated multiple scripts to scrape different URLs.\n
72-
The scripts are generated based on a user question and the content of the websites.\n
73-
You need to create one single script that merges the scripts generated for each URL.\n
74-
The scraped contents are in a JSON format and you need to merge them based on the context and providing a correct JSON structure.\n
75-
The output should be just in python code without any comment and should implement the main function.\n
76-
The python script, when executed, should format the extracted information sticking to the user question and scripts output format.\n
77-
USER PROMPT: {user_prompt}\n
78-
SCRIPTS:\n
79-
{scripts}
80-
"""
81-
8268
prompt_template = PromptTemplate(
83-
template=TEMPLATE_MERGE,
69+
template=TEMPLATE_MERGE_SCRIPTS_PROMPT,
8470
input_variables=["user_prompt"],
8571
partial_variables={
8672
"scripts": scripts_str,
@@ -90,6 +76,5 @@ def execute(self, state: dict) -> dict:
9076
merge_chain = prompt_template | self.llm_model | StrOutputParser()
9177
answer = merge_chain.invoke({"user_prompt": user_prompt})
9278

93-
# Update the state with the generated answer
9479
state.update({self.output[0]: answer})
9580
return state

scrapegraphai/prompts/__init__.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,37 @@
22
__init__.py for the prompts folder
33
"""
44

5-
from .generate_answer_node_prompts import TEMPLATE_CHUNKS, TEMPLATE_NO_CHUNKS, TEMPLATE_MERGE, TEMPLATE_CHUNKS_MD, TEMPLATE_NO_CHUNKS_MD, TEMPLATE_MERGE_MD
6-
from .generate_answer_node_csv_prompts import TEMPLATE_CHUKS_CSV, TEMPLATE_NO_CHUKS_CSV, TEMPLATE_MERGE_CSV
7-
from .generate_answer_node_pdf_prompts import TEMPLATE_CHUNKS_PDF, TEMPLATE_NO_CHUNKS_PDF, TEMPLATE_MERGE_PDF
8-
from .generate_answer_node_omni_prompts import TEMPLATE_CHUNKS_OMNI, TEMPLATE_NO_CHUNKS_OMNI, TEMPLATE_MERGE_OMNI
5+
from .generate_answer_node_prompts import (TEMPLATE_CHUNKS,
6+
TEMPLATE_NO_CHUNKS,
7+
TEMPLATE_MERGE, TEMPLATE_CHUNKS_MD,
8+
TEMPLATE_NO_CHUNKS_MD, TEMPLATE_MERGE_MD)
9+
from .generate_answer_node_csv_prompts import (TEMPLATE_CHUKS_CSV,
10+
TEMPLATE_NO_CHUKS_CSV,
11+
TEMPLATE_MERGE_CSV)
12+
from .generate_answer_node_pdf_prompts import (TEMPLATE_CHUNKS_PDF,
13+
TEMPLATE_NO_CHUNKS_PDF,
14+
TEMPLATE_MERGE_PDF)
15+
from .generate_answer_node_omni_prompts import (TEMPLATE_CHUNKS_OMNI,
16+
TEMPLATE_NO_CHUNKS_OMNI,
17+
TEMPLATE_MERGE_OMNI)
918
from .merge_answer_node_prompts import TEMPLATE_COMBINED
1019
from .robots_node_prompts import TEMPLATE_ROBOT
1120
from .search_internet_node_prompts import TEMPLATE_SEARCH_INTERNET
1221
from .search_link_node_prompts import TEMPLATE_RELEVANT_LINKS
13-
from .search_node_with_context_prompts import TEMPLATE_SEARCH_WITH_CONTEXT_CHUNKS, TEMPLATE_SEARCH_WITH_CONTEXT_NO_CHUNKS
22+
from .search_node_with_context_prompts import (TEMPLATE_SEARCH_WITH_CONTEXT_CHUNKS,
23+
TEMPLATE_SEARCH_WITH_CONTEXT_NO_CHUNKS)
1424
from .prompt_refiner_node_prompts import TEMPLATE_REFINER, TEMPLATE_REFINER_WITH_CONTEXT
1525
from .html_analyzer_node_prompts import TEMPLATE_HTML_ANALYSIS, TEMPLATE_HTML_ANALYSIS_WITH_CONTEXT
1626
from .generate_code_node_prompts import (TEMPLATE_INIT_CODE_GENERATION,
17-
TEMPLATE_SYNTAX_ANALYSIS, TEMPLATE_SYNTAX_CODE_GENERATION,
18-
TEMPLATE_EXECUTION_ANALYSIS, TEMPLATE_EXECUTION_CODE_GENERATION,
19-
TEMPLATE_VALIDATION_ANALYSIS, TEMPLATE_VALIDATION_CODE_GENERATION,
20-
TEMPLATE_SEMANTIC_COMPARISON, TEMPLATE_SEMANTIC_ANALYSIS,
27+
TEMPLATE_SYNTAX_ANALYSIS,
28+
TEMPLATE_SYNTAX_CODE_GENERATION,
29+
TEMPLATE_EXECUTION_ANALYSIS,
30+
TEMPLATE_EXECUTION_CODE_GENERATION,
31+
TEMPLATE_VALIDATION_ANALYSIS,
32+
TEMPLATE_VALIDATION_CODE_GENERATION,
33+
TEMPLATE_SEMANTIC_COMPARISON,
34+
TEMPLATE_SEMANTIC_ANALYSIS,
2135
TEMPLATE_SEMANTIC_CODE_GENERATION)
22-
from .reasoning_node_prompts import TEMPLATE_REASONING, TEMPLATE_REASONING_WITH_CONTEXT
36+
from .reasoning_node_prompts import (TEMPLATE_REASONING,
37+
TEMPLATE_REASONING_WITH_CONTEXT)
38+
from .merge_generated_scripts_prompts import TEMPLATE_MERGE_SCRIPTS_PROMPT
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
"""
2+
merge_generated_scripts_prompts module
3+
"""
4+
TEMPLATE_MERGE_SCRIPTS_PROMPT = """
5+
You are a python expert in web scraping and you have just generated multiple scripts to scrape different URLs.\n
6+
The scripts are generated based on a user question and the content of the websites.\n
7+
You need to create one single script that merges the scripts generated for each URL.\n
8+
The scraped contents are in a JSON format and you need to merge them based on the context and providing a correct JSON structure.\n
9+
The output should be just in python code without any comment and should implement the main function.\n
10+
The python script, when executed, should format the extracted information sticking to the user question and scripts output format.\n
11+
USER PROMPT: {user_prompt}\n
12+
SCRIPTS:\n
13+
{scripts}
14+
"""

0 commit comments

Comments
 (0)