Skip to content

Commit 68f58cc

Browse files
committed
refactoring of generate answer node
1 parent 602dd00 commit 68f58cc

File tree

5 files changed

+21
-32
lines changed

5 files changed

+21
-32
lines changed

scrapegraphai/nodes/generate_answer_csv_node.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,9 @@ def execute(self, state):
132132

133133
chain = prompt | self.llm_model | output_parser
134134
answer = chain.invoke({"question": user_prompt})
135-
else:
136-
prompt = PromptTemplate(
135+
break
136+
137+
prompt = PromptTemplate(
137138
template=template_chunks_csv_prompt,
138139
input_variables=["question"],
139140
partial_variables={

scrapegraphai/nodes/generate_answer_node.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,9 @@ def execute(self, state: dict) -> dict:
119119
"format_instructions": format_instructions})
120120
chain = prompt | self.llm_model | output_parser
121121
answer = chain.invoke({"question": user_prompt})
122+
break
122123

123-
else:
124-
prompt = PromptTemplate(
124+
prompt = PromptTemplate(
125125
template=template_chunks_prompt,
126126
input_variables=["question"],
127127
partial_variables={"context": chunk,

scrapegraphai/nodes/generate_answer_omni_node.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,9 @@ def execute(self, state: dict) -> dict:
118118

119119
chain = prompt | self.llm_model | output_parser
120120
answer = chain.invoke({"question": user_prompt})
121-
else:
122-
prompt = PromptTemplate(
121+
break
122+
123+
prompt = PromptTemplate(
123124
template=template_chunks_omni_prompt,
124125
input_variables=["question"],
125126
partial_variables={

scrapegraphai/nodes/generate_answer_pdf_node.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@ def execute(self, state):
131131
chain = prompt | self.llm_model | output_parser
132132
answer = chain.invoke({"question": user_prompt})
133133

134-
else:
135-
prompt = PromptTemplate(
134+
break
135+
prompt = PromptTemplate(
136136
template=template_chunks_pdf_prompt,
137137
input_variables=["question"],
138138
partial_variables={

scrapegraphai/nodes/parse_node.py

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -50,48 +50,35 @@ def execute(self, state: dict) -> dict:
5050
5151
Args:
5252
state (dict): The current state of the graph. The input keys will be used to fetch the
53-
correct data from the state.
53+
correct data from the state.
5454
5555
Returns:
5656
dict: The updated state with the output key containing the parsed content chunks.
5757
5858
Raises:
59-
KeyError: If the input keys are not found in the state, indicating that the
60-
necessary information for parsing the content is missing.
59+
KeyError: If the input keys are not found in the state.
6160
"""
6261

6362
self.logger.info(f"--- Executing {self.node_name} Node ---")
6463

65-
# Interpret input keys based on the provided input expression
64+
# Fetch data using input keys
6665
input_keys = self.get_input_keys(state)
67-
68-
# Fetching data from the state based on the input keys
6966
input_data = [state[key] for key in input_keys]
70-
# Parse the document
7167
docs_transformed = input_data[0]
68+
69+
# Parse HTML if enabled
7270
if self.parse_html:
7371
docs_transformed = Html2TextTransformer().transform_documents(input_data[0])
7472
docs_transformed = docs_transformed[0]
7573

76-
chunks = chunk(text=docs_transformed.page_content,
77-
chunk_size= self.node_config.get("chunk_size", 4096)-250,
78-
token_counter=lambda x: len(x.split()),
79-
memoize=False)
80-
else:
81-
docs_transformed = docs_transformed[0]
82-
83-
if type(docs_transformed) == Document:
84-
chunks = chunk(text=docs_transformed.page_content,
85-
chunk_size= self.node_config.get("chunk_size", 4096)-250,
86-
token_counter=lambda x: len(x.split()),
87-
memoize=False)
88-
else:
74+
# Get text content
75+
text_content = docs_transformed.page_content if type(docs_transformed) == Document else docs_transformed
8976

90-
chunks = chunk(text=docs_transformed,
91-
chunk_size= self.node_config.get("chunk_size", 4096)-250,
92-
token_counter=lambda x: len(x.split()),
93-
memoize=False)
77+
# Chunk the text
78+
chunk_size = self.node_config.get("chunk_size", 4096) - 250
79+
chunks = chunk(text=text_content, chunk_size=chunk_size, token_counter=lambda x: len(x.split()), memoize=False)
9480

81+
# Update state with chunks
9582
state.update({self.output[0]: chunks})
9683

9784
return state

0 commit comments

Comments
 (0)