Skip to content

Commit 080a318

Browse files
committed
feat(telemetry): add telemetry module
1 parent 93342b4 commit 080a318

22 files changed

+277
-30
lines changed

examples/openai/smart_scraper_openai.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Basic example of scraping pipeline using SmartScraper
33
"""
44

5-
import os
5+
import os, json
66
from dotenv import load_dotenv
77
from scrapegraphai.graphs import SmartScraperGraph
88
from scrapegraphai.utils import prettify_exec_info
@@ -37,7 +37,7 @@
3737
)
3838

3939
result = smart_scraper_graph.run()
40-
print(result)
40+
print(json.dumps(result, indent=4))
4141

4242
# ************************************************
4343
# Get graph execution info

scrapegraphai/graphs/abstract_graph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
OneApi
2727
)
2828
from ..models.ernie import Ernie
29-
from ..utils.logging import set_verbosity_debug, set_verbosity_warning
29+
from ..utils.logging import set_verbosity_debug, set_verbosity_warning, set_verbosity_info
3030

3131
from ..helpers import models_tokens
3232
from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic, DeepSeek
@@ -90,7 +90,7 @@ def __init__(self, prompt: str, config: dict,
9090
verbose = bool(config and config.get("verbose"))
9191

9292
if verbose:
93-
set_verbosity_debug()
93+
set_verbosity_info()
9494
else:
9595
set_verbosity_warning()
9696

scrapegraphai/graphs/base_graph.py

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
1-
"""
2-
BaseGraph Module
3-
"""
4-
51
import time
62
import warnings
73
from langchain_community.callbacks import get_openai_callback
84
from typing import Tuple
95

6+
# Import telemetry functions
7+
from ..telemetry import log_graph_execution, log_event
108

119
class BaseGraph:
1210
"""
@@ -46,12 +44,12 @@ class BaseGraph:
4644
... )
4745
"""
4846

49-
def __init__(self, nodes: list, edges: list, entry_point: str, use_burr: bool = False, burr_config: dict = None):
50-
47+
def __init__(self, nodes: list, edges: list, entry_point: str, use_burr: bool = False, burr_config: dict = None, graph_name: str = "Custom"):
5148
self.nodes = nodes
5249
self.raw_edges = edges
5350
self.edges = self._create_edges({e for e in edges})
5451
self.entry_point = entry_point.node_name
52+
self.graph_name = graph_name
5553
self.initial_state = {}
5654

5755
if nodes[0].node_name != entry_point.node_name:
@@ -103,12 +101,46 @@ def _execute_standard(self, initial_state: dict) -> Tuple[dict, list]:
103101
"total_cost_USD": 0.0,
104102
}
105103

104+
start_time = time.time()
105+
error_node = None
106+
source_type = None
107+
llm_model = None
108+
embedder_model = None
109+
106110
while current_node_name:
107111
curr_time = time.time()
108112
current_node = next(node for node in self.nodes if node.node_name == current_node_name)
109113

114+
# check if there is a "source" key in the node config
115+
if current_node.__class__.__name__ == "FetchNode":
116+
# get the second key name of the state dictionary
117+
source_type = list(state.keys())[1]
118+
# quick fix for local_dir source type
119+
if source_type == "local_dir":
120+
source_type = "html_dir"
121+
122+
# check if there is an "llm_model" variable in the class
123+
if hasattr(current_node, "llm_model") and llm_model is None:
124+
llm_model = current_node.llm_model
125+
if hasattr(llm_model, "model_name"):
126+
llm_model = llm_model.model_name
127+
elif hasattr(llm_model, "model"):
128+
llm_model = llm_model.model
129+
130+
# check if there is an "embedder_model" variable in the class
131+
if hasattr(current_node, "embedder_model") and embedder_model is None:
132+
embedder_model = current_node.embedder_model
133+
if hasattr(embedder_model, "model_name"):
134+
embedder_model = embedder_model.model_name
135+
elif hasattr(embedder_model, "model"):
136+
embedder_model = embedder_model.model
137+
110138
with get_openai_callback() as cb:
111-
result = current_node.execute(state)
139+
try:
140+
result = current_node.execute(state)
141+
except Exception as e:
142+
error_node = current_node.node_name
143+
raise e
112144
node_exec_time = time.time() - curr_time
113145
total_exec_time += node_exec_time
114146

@@ -147,6 +179,17 @@ def _execute_standard(self, initial_state: dict) -> Tuple[dict, list]:
147179
"exec_time": total_exec_time,
148180
})
149181

182+
# Log the graph execution telemetry
183+
graph_execution_time = time.time() - start_time
184+
log_graph_execution(
185+
graph_name=self.graph_name,
186+
llm_model=llm_model,
187+
embedder_model=embedder_model,
188+
source_type=source_type,
189+
execution_time=graph_execution_time,
190+
error_node=error_node
191+
)
192+
150193
return state, exec_info
151194

152195
def execute(self, initial_state: dict) -> Tuple[dict, list]:
@@ -162,7 +205,6 @@ def execute(self, initial_state: dict) -> Tuple[dict, list]:
162205

163206
self.initial_state = initial_state
164207
if self.use_burr:
165-
166208
from ..integrations import BurrBridge
167209

168210
bridge = BurrBridge(self, self.burr_config)
@@ -190,4 +232,4 @@ def append_node(self, node):
190232
# add the node to the list of nodes
191233
self.nodes.append(node)
192234
# update the edges connecting the last node to the new node
193-
self.edges = self._create_edges({e for e in self.raw_edges})
235+
self.edges = self._create_edges({e for e in self.raw_edges})

scrapegraphai/graphs/csv_scraper_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ def _create_graph(self):
6464
(fetch_node, rag_node),
6565
(rag_node, generate_answer_node)
6666
],
67-
entry_point=fetch_node
67+
entry_point=fetch_node,
68+
graph_name=self.__class__.__name__
6869
)
6970

7071
def run(self) -> str:

scrapegraphai/graphs/csv_scraper_multi_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ def _create_graph(self) -> BaseGraph:
100100
edges=[
101101
(graph_iterator_node, merge_answers_node),
102102
],
103-
entry_point=graph_iterator_node
103+
entry_point=graph_iterator_node,
104+
graph_name=self.__class__.__name__
104105
)
105106

106107
def run(self) -> str:

scrapegraphai/graphs/deep_scraper_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,8 @@ def _create_repeated_graph(self) -> BaseGraph:
141141
(search_node, graph_iterator_node),
142142
(graph_iterator_node, merge_answers_node)
143143
],
144-
entry_point=fetch_node
144+
entry_point=fetch_node,
145+
graph_name=self.__class__.__name__
145146
)
146147

147148

scrapegraphai/graphs/json_scraper_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ def _create_graph(self) -> BaseGraph:
8989
(fetch_node, rag_node),
9090
(rag_node, generate_answer_node)
9191
],
92-
entry_point=fetch_node
92+
entry_point=fetch_node,
93+
graph_name=self.__class__.__name__
9394
)
9495

9596
def run(self) -> str:

scrapegraphai/graphs/json_scraper_multi_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ def _create_graph(self) -> BaseGraph:
104104
edges=[
105105
(graph_iterator_node, merge_answers_node),
106106
],
107-
entry_point=graph_iterator_node
107+
entry_point=graph_iterator_node,
108+
graph_name=self.__class__.__name__
108109
)
109110

110111
def run(self) -> str:

scrapegraphai/graphs/omni_scraper_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ def _create_graph(self) -> BaseGraph:
122122
(image_to_text_node, rag_node),
123123
(rag_node, generate_answer_omni_node)
124124
],
125-
entry_point=fetch_node
125+
entry_point=fetch_node,
126+
graph_name=self.__class__.__name__
126127
)
127128

128129
def run(self) -> str:

scrapegraphai/graphs/omni_search_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ def _create_graph(self) -> BaseGraph:
115115
(search_internet_node, graph_iterator_node),
116116
(graph_iterator_node, merge_answers_node)
117117
],
118-
entry_point=search_internet_node
118+
entry_point=search_internet_node,
119+
graph_name=self.__class__.__name__
119120
)
120121

121122
def run(self) -> str:

scrapegraphai/graphs/pdf_scraper_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ def _create_graph(self) -> BaseGraph:
105105
(parse_node, rag_node),
106106
(rag_node, generate_answer_node_pdf)
107107
],
108-
entry_point=fetch_node
108+
entry_point=fetch_node,
109+
graph_name=self.__class__.__name__
109110
)
110111

111112
def run(self) -> str:

scrapegraphai/graphs/pdf_scraper_multi_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ def _create_graph(self) -> BaseGraph:
105105
edges=[
106106
(graph_iterator_node, merge_answers_node),
107107
],
108-
entry_point=graph_iterator_node
108+
entry_point=graph_iterator_node,
109+
graph_name=self.__class__.__name__
109110
)
110111

111112
def run(self) -> str:

scrapegraphai/graphs/script_creator_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ def _create_graph(self) -> BaseGraph:
9595
(fetch_node, parse_node),
9696
(parse_node, generate_scraper_node),
9797
],
98-
entry_point=fetch_node
98+
entry_point=fetch_node,
99+
graph_name=self.__class__.__name__
99100
)
100101

101102
def run(self) -> str:

scrapegraphai/graphs/script_creator_multi_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ def _create_graph(self) -> BaseGraph:
9999
edges=[
100100
(graph_iterator_node, merge_scripts_node),
101101
],
102-
entry_point=graph_iterator_node
102+
entry_point=graph_iterator_node,
103+
graph_name=self.__class__.__name__
103104
)
104105

105106
def run(self) -> str:

scrapegraphai/graphs/search_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,8 @@ def _create_graph(self) -> BaseGraph:
114114
(search_internet_node, graph_iterator_node),
115115
(graph_iterator_node, merge_answers_node)
116116
],
117-
entry_point=search_internet_node
117+
entry_point=search_internet_node,
118+
graph_name=self.__class__.__name__
118119
)
119120

120121
def run(self) -> str:

scrapegraphai/graphs/smart_scraper_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ def _create_graph(self) -> BaseGraph:
104104
(parse_node, rag_node),
105105
(rag_node, generate_answer_node)
106106
],
107-
entry_point=fetch_node
107+
entry_point=fetch_node,
108+
graph_name=self.__class__.__name__
108109
)
109110

110111
def run(self) -> str:

scrapegraphai/graphs/smart_scraper_multi_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ def _create_graph(self) -> BaseGraph:
104104
edges=[
105105
(graph_iterator_node, merge_answers_node),
106106
],
107-
entry_point=graph_iterator_node
107+
entry_point=graph_iterator_node,
108+
graph_name=self.__class__.__name__
108109
)
109110

110111
def run(self) -> str:

scrapegraphai/graphs/speech_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,8 @@ def _create_graph(self) -> BaseGraph:
109109
(rag_node, generate_answer_node),
110110
(generate_answer_node, text_to_speech_node)
111111
],
112-
entry_point=fetch_node
112+
entry_point=fetch_node,
113+
graph_name=self.__class__.__name__
113114
)
114115

115116
def run(self) -> str:

scrapegraphai/graphs/xml_scraper_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ def _create_graph(self) -> BaseGraph:
9191
(fetch_node, rag_node),
9292
(rag_node, generate_answer_node)
9393
],
94-
entry_point=fetch_node
94+
entry_point=fetch_node,
95+
graph_name=self.__class__.__name__
9596
)
9697

9798
def run(self) -> str:

scrapegraphai/graphs/xml_scraper_multi_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ def _create_graph(self) -> BaseGraph:
105105
edges=[
106106
(graph_iterator_node, merge_answers_node),
107107
],
108-
entry_point=graph_iterator_node
108+
entry_point=graph_iterator_node,
109+
graph_name=self.__class__.__name__
109110
)
110111

111112
def run(self) -> str:

scrapegraphai/telemetry/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""
2+
This module contains the telemetry module for the scrapegraphai package.
3+
"""
4+
5+
from .telemetry import log_graph_execution, log_event, disable_telemetry

0 commit comments

Comments
 (0)