Skip to content

Commit 2ae26e9

Browse files
authored
Merge pull request #664 from LorenzoPaleari/598-fix-pydantic-validation-error
598 - Fix pydantic validation error
2 parents 7ad6f21 + a92dddb commit 2ae26e9

15 files changed

+204
-113
lines changed

scrapegraphai/graphs/csv_scraper_multi_graph.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
CSVScraperMultiGraph Module
33
"""
44

5+
from copy import deepcopy
56
from typing import List, Optional
67
from pydantic import BaseModel
78
from .base_graph import BaseGraph
@@ -48,6 +49,8 @@ def __init__(self, prompt: str, source: List[str],
4849

4950
self.copy_config = safe_deepcopy(config)
5051

52+
self.copy_schema = deepcopy(schema)
53+
5154
super().__init__(prompt, config, source, schema)
5255

5356
def _create_graph(self) -> BaseGraph:
@@ -58,17 +61,18 @@ def _create_graph(self) -> BaseGraph:
5861
BaseGraph: A graph instance representing the web scraping and searching workflow.
5962
"""
6063

61-
smart_scraper_instance = CSVScraperGraph(
62-
prompt="",
63-
source="",
64-
config=self.copy_config,
65-
)
64+
# smart_scraper_instance = CSVScraperGraph(
65+
# prompt="",
66+
# source="",
67+
# config=self.copy_config,
68+
# )
6669

6770
graph_iterator_node = GraphIteratorNode(
6871
input="user_prompt & jsons",
6972
output=["results"],
7073
node_config={
71-
"graph_instance": smart_scraper_instance,
74+
"graph_instance": CSVScraperGraph,
75+
"scraper_config": self.copy_config,
7276
}
7377
)
7478

@@ -77,7 +81,7 @@ def _create_graph(self) -> BaseGraph:
7781
output=["answer"],
7882
node_config={
7983
"llm_model": self.llm_model,
80-
"schema": self.schema
84+
"schema": self.copy_schema
8185
}
8286
)
8387

scrapegraphai/graphs/json_scraper_multi_graph.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -61,27 +61,29 @@ def _create_graph(self) -> BaseGraph:
6161
BaseGraph: A graph instance representing the web scraping and searching workflow.
6262
"""
6363

64-
smart_scraper_instance = JSONScraperGraph(
65-
prompt="",
66-
source="",
67-
config=self.copy_config,
68-
schema=self.copy_schema
69-
)
64+
# smart_scraper_instance = JSONScraperGraph(
65+
# prompt="",
66+
# source="",
67+
# config=self.copy_config,
68+
# schema=self.copy_schema
69+
# )
7070

7171
graph_iterator_node = GraphIteratorNode(
7272
input="user_prompt & jsons",
7373
output=["results"],
7474
node_config={
75-
"graph_instance": smart_scraper_instance,
76-
}
75+
"graph_instance": JSONScraperGraph,
76+
"scraper_config": self.copy_config,
77+
},
78+
schema=self.copy_schema
7779
)
7880

7981
merge_answers_node = MergeAnswersNode(
8082
input="user_prompt & results",
8183
output=["answer"],
8284
node_config={
8385
"llm_model": self.llm_model,
84-
"schema": self.schema
86+
"schema": self.copy_schema
8587
}
8688
)
8789

scrapegraphai/graphs/markdown_scraper_multi_graph.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,28 +55,30 @@ def _create_graph(self) -> BaseGraph:
5555
Returns:
5656
BaseGraph: A graph instance representing the web scraping and searching workflow.
5757
"""
58-
smart_scraper_instance = MDScraperGraph(
59-
prompt="",
60-
source="",
61-
config=self.copy_config,
62-
schema=self.copy_schema
63-
)
58+
# smart_scraper_instance = MDScraperGraph(
59+
# prompt="",
60+
# source="",
61+
# config=self.copy_config,
62+
# schema=self.copy_schema
63+
# )
6464

6565
# Define the graph nodes
6666
graph_iterator_node = GraphIteratorNode(
6767
input="user_prompt & jsons",
6868
output=["results"],
6969
node_config={
70-
"graph_instance": smart_scraper_instance,
71-
}
70+
"graph_instance": MDScraperGraph,
71+
"scraper_config": self.copy_config,
72+
},
73+
schema=self.copy_schema
7274
)
7375

7476
merge_answers_node = MergeAnswersNode(
7577
input="user_prompt & results",
7678
output=["answer"],
7779
node_config={
7880
"llm_model": self.llm_model,
79-
"schema": self.schema
81+
"schema": self.copy_schema
8082
}
8183
)
8284

scrapegraphai/graphs/omni_search_graph.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,12 @@ def _create_graph(self) -> BaseGraph:
6161
BaseGraph: A graph instance representing the web scraping and searching workflow.
6262
"""
6363

64-
omni_scraper_instance = OmniScraperGraph(
65-
prompt="",
66-
source="",
67-
config=self.copy_config,
68-
schema=self.copy_schema
69-
)
64+
# omni_scraper_instance = OmniScraperGraph(
65+
# prompt="",
66+
# source="",
67+
# config=self.copy_config,
68+
# schema=self.copy_schema
69+
# )
7070

7171
search_internet_node = SearchInternetNode(
7272
input="user_prompt",
@@ -81,16 +81,18 @@ def _create_graph(self) -> BaseGraph:
8181
input="user_prompt & urls",
8282
output=["results"],
8383
node_config={
84-
"graph_instance": omni_scraper_instance,
85-
}
84+
"graph_instance": OmniScraperGraph,
85+
"scraper_config": self.copy_config,
86+
},
87+
schema=self.copy_schema
8688
)
8789

8890
merge_answers_node = MergeAnswersNode(
8991
input="user_prompt & results",
9092
output=["answer"],
9193
node_config={
9294
"llm_model": self.llm_model,
93-
"schema": self.schema
95+
"schema": self.copy_schema
9496
}
9597
)
9698

scrapegraphai/graphs/pdf_scraper_multi_graph.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,27 +59,29 @@ def _create_graph(self) -> BaseGraph:
5959
BaseGraph: A graph instance representing the web scraping and searching workflow.
6060
"""
6161

62-
pdf_scraper_instance = PDFScraperGraph(
63-
prompt="",
64-
source="",
65-
config=self.copy_config,
66-
schema=self.copy_schema
67-
)
62+
# pdf_scraper_instance = PDFScraperGraph(
63+
# prompt="",
64+
# source="",
65+
# config=self.copy_config,
66+
# schema=self.copy_schema
67+
# )
6868

6969
graph_iterator_node = GraphIteratorNode(
7070
input="user_prompt & pdfs",
7171
output=["results"],
7272
node_config={
73-
"graph_instance": pdf_scraper_instance,
74-
}
73+
"graph_instance": PDFScraperGraph,
74+
"scraper_config": self.copy_config,
75+
},
76+
schema=self.copy_schema
7577
)
7678

7779
merge_answers_node = MergeAnswersNode(
7880
input="user_prompt & results",
7981
output=["answer"],
8082
node_config={
8183
"llm_model": self.llm_model,
82-
"schema": self.schema
84+
"schema": self.copy_schema
8385
}
8486
)
8587

scrapegraphai/graphs/search_graph.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,12 @@ def _create_graph(self) -> BaseGraph:
6262
BaseGraph: A graph instance representing the web scraping and searching workflow.
6363
"""
6464

65-
smart_scraper_instance = SmartScraperGraph(
66-
prompt="",
67-
source="",
68-
config=self.copy_config,
69-
schema=self.copy_schema
70-
)
65+
# smart_scraper_instance = SmartScraperGraph(
66+
# prompt="",
67+
# source="",
68+
# config=self.copy_config,
69+
# schema=self.copy_schema
70+
# )
7171

7272
search_internet_node = SearchInternetNode(
7373
input="user_prompt",
@@ -82,16 +82,18 @@ def _create_graph(self) -> BaseGraph:
8282
input="user_prompt & urls",
8383
output=["results"],
8484
node_config={
85-
"graph_instance": smart_scraper_instance,
86-
}
85+
"graph_instance": SmartScraperGraph,
86+
"scraper_config": self.copy_config
87+
},
88+
schema=self.copy_schema
8789
)
8890

8991
merge_answers_node = MergeAnswersNode(
9092
input="user_prompt & results",
9193
output=["answer"],
9294
node_config={
9395
"llm_model": self.llm_model,
94-
"schema": self.schema
96+
"schema": self.copy_schema
9597
}
9698
)
9799

scrapegraphai/graphs/smart_scraper_multi_graph.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -61,27 +61,29 @@ def _create_graph(self) -> BaseGraph:
6161
BaseGraph: A graph instance representing the web scraping and searching workflow.
6262
"""
6363

64-
smart_scraper_instance = SmartScraperGraph(
65-
prompt="",
66-
source="",
67-
config=self.copy_config,
68-
schema=self.copy_schema
69-
)
64+
# smart_scraper_instance = SmartScraperGraph(
65+
# prompt="",
66+
# source="",
67+
# config=self.copy_config,
68+
# schema=self.copy_schema
69+
# )
7070

7171
graph_iterator_node = GraphIteratorNode(
7272
input="user_prompt & urls",
7373
output=["results"],
7474
node_config={
75-
"graph_instance": smart_scraper_instance,
76-
}
75+
"graph_instance": SmartScraperGraph,
76+
"scraper_config": self.copy_config,
77+
},
78+
schema=self.copy_schema
7779
)
7880

7981
merge_answers_node = MergeAnswersNode(
8082
input="user_prompt & results",
8183
output=["answer"],
8284
node_config={
8385
"llm_model": self.llm_model,
84-
"schema": self.schema
86+
"schema": self.copy_schema
8587
}
8688
)
8789

scrapegraphai/graphs/xml_scraper_multi_graph.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,27 +59,29 @@ def _create_graph(self) -> BaseGraph:
5959
BaseGraph: A graph instance representing the web scraping and searching workflow.
6060
"""
6161

62-
smart_scraper_instance = XMLScraperGraph(
63-
prompt="",
64-
source="",
65-
config=self.copy_config,
66-
schema=self.copy_schema
67-
)
62+
# smart_scraper_instance = XMLScraperGraph(
63+
# prompt="",
64+
# source="",
65+
# config=self.copy_config,
66+
# schema=self.copy_schema
67+
# )
6868

6969
graph_iterator_node = GraphIteratorNode(
7070
input="user_prompt & jsons",
7171
output=["results"],
7272
node_config={
73-
"graph_instance": smart_scraper_instance,
74-
}
73+
"graph_instance": XMLScraperGraph,
74+
"scaper_config": self.copy_config,
75+
},
76+
schema=self.copy_schema
7577
)
7678

7779
merge_answers_node = MergeAnswersNode(
7880
input="user_prompt & results",
7981
output=["answer"],
8082
node_config={
8183
"llm_model": self.llm_model,
82-
"schema": self.schema
84+
"schema": self.copy_schema
8385
}
8486
)
8587

scrapegraphai/nodes/generate_answer_csv_node.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
from typing import List, Optional
6+
from pydantic.v1 import BaseModel as BaseModelV1
67
from langchain.prompts import PromptTemplate
78
from langchain_core.output_parsers import JsonOutputParser
89
from langchain_core.runnables import RunnableParallel
@@ -12,6 +13,7 @@
1213
from tqdm import tqdm
1314
from ..utils.logging import get_logger
1415
from .base_node import BaseNode
16+
from ..utils.llm_output_parser import typed_dict_output_parser, base_model_v2_output_parser, base_model_v1_output_parser
1517
from ..prompts import TEMPLATE_CHUKS_CSV, TEMPLATE_NO_CHUKS_CSV, TEMPLATE_MERGE_CSV
1618

1719
class GenerateAnswerCSVNode(BaseNode):
@@ -97,13 +99,13 @@ def execute(self, state):
9799

98100
if isinstance(self.llm_model, (ChatOpenAI, ChatMistralAI)):
99101
self.llm_model = self.llm_model.with_structured_output(
100-
schema = self.node_config["schema"],
101-
method="function_calling") # json schema works only on specific models
102-
103-
# default parser to empty lambda function
104-
output_parser = lambda x: x
102+
schema = self.node_config["schema"]) # json schema works only on specific models
103+
104+
output_parser = typed_dict_output_parser
105105
if is_basemodel_subclass(self.node_config["schema"]):
106-
output_parser = dict
106+
output_parser = base_model_v2_output_parser
107+
if issubclass(self.node_config["schema"], BaseModelV1):
108+
output_parser = base_model_v1_output_parser
107109
format_instructions = "NA"
108110
else:
109111
output_parser = JsonOutputParser(pydantic_object=self.node_config["schema"])

0 commit comments

Comments
 (0)