File tree Expand file tree Collapse file tree 5 files changed +81
-6
lines changed Expand file tree Collapse file tree 5 files changed +81
-6
lines changed Original file line number Diff line number Diff line change
1
+ """
2
+ Basic example of scraping pipeline using SmartScraper
3
+ """
4
+
5
+ import os
6
+ import json
7
+ from dotenv import load_dotenv
8
+ from scrapegraphai .graphs import SmartScraperGraph
9
+ from scrapegraphai .utils import prettify_exec_info
10
+
11
+ load_dotenv ()
12
+
13
+ # ************************************************
14
+ # Define the configuration for the graph
15
+ # ************************************************
16
+
17
+
18
+ graph_config = {
19
+ "llm" : {
20
+ "api_key" : os .getenv ("OPENAI_API_KEY" ),
21
+ "model" : "openai/gpt-4o" ,
22
+ },
23
+ "reasoning" : True ,
24
+ "verbose" : True ,
25
+ "headless" : False ,
26
+ }
27
+
28
+ # ************************************************
29
+ # Create the SmartScraperGraph instance and run it
30
+ # ************************************************
31
+
32
+ smart_scraper_graph = SmartScraperGraph (
33
+ prompt = "List me what does the company do, the name and a contact email." ,
34
+ source = "https://scrapegraphai.com/" ,
35
+ config = graph_config
36
+ )
37
+
38
+ result = smart_scraper_graph .run ()
39
+ print (json .dumps (result , indent = 4 ))
40
+
41
+ # ************************************************
42
+ # Get graph execution info
43
+ # ************************************************
44
+
45
+ graph_exec_info = smart_scraper_graph .get_execution_info ()
46
+ print (prettify_exec_info (graph_exec_info ))
Original file line number Diff line number Diff line change 9
9
from ..nodes import (
10
10
FetchNode ,
11
11
ParseNode ,
12
+ ReasoningNode ,
12
13
GenerateAnswerNode
13
14
)
14
15
@@ -88,6 +89,33 @@ def _create_graph(self) -> BaseGraph:
88
89
}
89
90
)
90
91
92
+ if self .config .get ("reasoning" ):
93
+ reasoning_node = ReasoningNode (
94
+ input = "user_prompt & (relevant_chunks | parsed_doc | doc)" ,
95
+ output = ["answer" ],
96
+ node_config = {
97
+ "llm_model" : self .llm_model ,
98
+ "additional_info" : self .config .get ("additional_info" ),
99
+ "schema" : self .schema ,
100
+ }
101
+ )
102
+
103
+ return BaseGraph (
104
+ nodes = [
105
+ fetch_node ,
106
+ parse_node ,
107
+ reasoning_node ,
108
+ generate_answer_node ,
109
+ ],
110
+ edges = [
111
+ (fetch_node , parse_node ),
112
+ (parse_node , reasoning_node ),
113
+ (reasoning_node , generate_answer_node )
114
+ ],
115
+ entry_point = fetch_node ,
116
+ graph_name = self .__class__ .__name__
117
+ )
118
+
91
119
return BaseGraph (
92
120
nodes = [
93
121
fetch_node ,
Original file line number Diff line number Diff line change 26
26
from .prompt_refiner_node import PromptRefinerNode
27
27
from .html_analyzer_node import HtmlAnalyzerNode
28
28
from .generate_code_node import GenerateCodeNode
29
- from .reasoning_node import ReasoningNode
29
+ from .reasoning_node import ReasoningNode
Original file line number Diff line number Diff line change @@ -50,12 +50,13 @@ def __init__(
50
50
)
51
51
52
52
self .additional_info = node_config .get ("additional_info" , None )
53
-
53
+
54
54
self .output_schema = node_config .get ("schema" )
55
55
56
56
def execute (self , state : dict ) -> dict :
57
57
"""
58
- Generate a refined prompt for the reasoning task based on the user's input and the JSON schema.
58
+ Generate a refined prompt for the reasoning task based
59
+ on the user's input and the JSON schema.
59
60
60
61
Args:
61
62
state (dict): The current state of the graph. The input keys will be used
@@ -70,11 +71,11 @@ def execute(self, state: dict) -> dict:
70
71
"""
71
72
72
73
self .logger .info (f"--- Executing { self .node_name } Node ---" )
73
-
74
+
74
75
user_prompt = state ['user_prompt' ]
75
76
76
77
self .simplefied_schema = transform_schema (self .output_schema .schema ())
77
-
78
+
78
79
if self .additional_info is not None :
79
80
prompt = PromptTemplate (
80
81
template = TEMPLATE_REASONING_WITH_CONTEXT ,
Original file line number Diff line number Diff line change 31
31
**Reasoning Output**:
32
32
[Your detailed analysis based on the above instructions]
33
33
"""
34
-
34
+
35
35
TEMPLATE_REASONING_WITH_CONTEXT = """
36
36
**Task**: Analyze the user's request and the provided JSON schema to guide an LLM in extracting information directly from a markdown file previously parsed froma a HTML file.
37
37
You can’t perform that action at this time.
0 commit comments