1
+ """
2
+ SmartScraperGraph Module Burr Version
3
+ """
4
+ from typing import Tuple
5
+
6
+ from burr import tracking
7
+ from burr .core import Application , ApplicationBuilder , State , default , when
8
+ from burr .core .action import action
9
+
10
+ from langchain_community .document_loaders import AsyncChromiumLoader
11
+ from langchain_core .documents import Document
12
+ from ..utils .remover import remover
13
+
14
+
15
+ @action (reads = ["url" , "local_dir" ], writes = ["doc" ])
16
+ def fetch_node (state : State , headless : bool = True , verbose : bool = False ) -> tuple [dict , State ]:
17
+ if verbose :
18
+ print (f"--- Executing Fetch Node ---" )
19
+
20
+ source = state .get ("url" , state .get ("local_dir" ))
21
+
22
+ if self .input == "json_dir" or self .input == "xml_dir" or self .input == "csv_dir" :
23
+ compressed_document = [Document (page_content = source , metadata = {
24
+ "source" : "local_dir"
25
+ })]
26
+ # if it is a local directory
27
+ elif not source .startswith ("http" ):
28
+ compressed_document = [Document (page_content = remover (source ), metadata = {
29
+ "source" : "local_dir"
30
+ })]
31
+
32
+ else :
33
+ if self .node_config is not None and self .node_config .get ("endpoint" ) is not None :
34
+
35
+ loader = AsyncChromiumLoader (
36
+ [source ],
37
+ proxies = {"http" : self .node_config ["endpoint" ]},
38
+ headless = headless ,
39
+ )
40
+ else :
41
+ loader = AsyncChromiumLoader (
42
+ [source ],
43
+ headless = headless ,
44
+ )
45
+
46
+ document = loader .load ()
47
+ compressed_document = [
48
+ Document (page_content = remover (str (document [0 ].page_content )))]
49
+
50
+ return {"doc" : compressed_document }, state .update (doc = compressed_document )
51
+
52
+ @action (reads = ["doc" ], writes = ["parsed_doc" ])
53
+ def parse_node (state : State , chunk_size : int ) -> tuple [dict , State ]:
54
+ return {}, state
55
+
56
+ @action (reads = ["user_prompt" , "parsed_doc" , "doc" ],
57
+ writes = ["relevant_chunks" ])
58
+ def rag_node (state : State , llm_model : object , embedder_model : object ) -> tuple [dict , State ]:
59
+ return {}, state
60
+
61
+ @action (reads = ["user_prompt" , "relevant_chunks" , "parsed_doc" , "doc" ],
62
+ writes = ["answer" ])
63
+ def generate_answer_node (state : State , llm_model : object ) -> tuple [dict , State ]:
64
+ return {}, state
65
+
66
+ def run (prompt : str , input_key : str , source : str , config : dict ) -> str :
67
+
68
+ llm_model = config ["llm_model" ]
69
+ embedder_model = config ["embedder_model" ]
70
+ chunk_size = config ["model_token" ]
71
+
72
+ initial_state = {
73
+ "user_prompt" : prompt ,
74
+ input_key : source
75
+ }
76
+ app = (
77
+ ApplicationBuilder ()
78
+ .with_actions (
79
+ fetch_node = fetch_node ,
80
+ parse_node = parse_node ,
81
+ rag_node = rag_node ,
82
+ generate_answer_node = generate_answer_node
83
+ )
84
+ .with_transitions (
85
+ ("fetch_node" , "parse_node" , default ),
86
+ ("parse_node" , "rag_node" , default ),
87
+ ("rag_node" , "generate_answer_node" , default )
88
+ )
89
+ .with_entrypoint ("fetch_node" )
90
+ .with_state (** initial_state )
91
+ .build ()
92
+ )
93
+ app .visualize (
94
+ output_file_path = "smart_scraper_graph" ,
95
+ include_conditions = False , view = True , format = "png"
96
+ )
97
+ # last_action, result, state = app.run(
98
+ # halt_after=["generate_answer_node"],
99
+ # inputs={
100
+ # "llm_model": llm_model,
101
+ # "embedder_model": embedder_model,
102
+ # "model_token": chunk_size
103
+ # }
104
+ # )
105
+ # return result.get("answer", "No answer found.")
106
+
107
+ if __name__ == '__main__' :
108
+
109
+ prompt = "What is the capital of France?"
110
+ source = "https://en.wikipedia.org/wiki/Paris"
111
+ input_key = "url"
112
+ config = {
113
+ "llm_model" : "rag-token" ,
114
+ "embedder_model" : "foo" ,
115
+ "model_token" : "bar" ,
116
+ }
117
+ run (prompt , input_key , source , config )
0 commit comments