Skip to content

Commit b913b51

Browse files
committed
Merge branch 'logger-integration' into pre/beta
2 parents 00a392b + 3d0f671 commit b913b51

35 files changed

+1403
-342
lines changed

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.10.14

examples/local_models/smart_scraper_ollama.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
2121
},
2222
"verbose": True,
23+
"headless": False
2324
}
2425

2526
# ************************************************

examples/openai/smart_scraper_openai.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818

1919
graph_config = {
2020
"llm": {
21-
"api_key":openai_key,
21+
"api_key": openai_key,
2222
"model": "gpt-3.5-turbo",
2323
},
24-
"verbose": True,
24+
"verbose": False,
2525
"headless": False,
2626
}
2727

examples/single_node/robot_node.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
graph_config = {
1313
"llm": {
14-
"model": "ollama/llama3",
14+
"model_name": "ollama/llama3",
1515
"temperature": 0,
1616
"streaming": True
1717
},

scrapegraphai/docloaders/chromium.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
import asyncio
2-
import logging
32
from typing import Any, AsyncIterator, Iterator, List, Optional
43

54
from langchain_community.document_loaders.base import BaseLoader
65
from langchain_core.documents import Document
76

8-
from ..utils import Proxy, dynamic_import, parse_or_search_proxy
7+
from ..utils import Proxy, dynamic_import, get_logger, parse_or_search_proxy
98

109

11-
logger = logging.getLogger(__name__)
10+
logger = get_logger("web-loader")
1211

1312

1413
class ChromiumLoader(BaseLoader):

scrapegraphai/graphs/abstract_graph.py

Lines changed: 56 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,28 @@
11
"""
22
AbstractGraph Module
33
"""
4+
45
from abc import ABC, abstractmethod
56
from typing import Optional
7+
68
from langchain_aws import BedrockEmbeddings
7-
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
89
from langchain_community.embeddings import HuggingFaceHubEmbeddings, OllamaEmbeddings
910
from langchain_google_genai import GoogleGenerativeAIEmbeddings
10-
from ..helpers import models_tokens
11-
from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic, DeepSeek
1211
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
12+
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
13+
14+
from ..helpers import models_tokens
15+
from ..models import (
16+
Anthropic,
17+
AzureOpenAI,
18+
Bedrock,
19+
Gemini,
20+
Groq,
21+
HuggingFace,
22+
Ollama,
23+
OpenAI,
24+
)
25+
from ..utils.logging import set_verbosity_debug, set_verbosity_warning
1326

1427
from ..helpers import models_tokens
1528
from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic, DeepSeek
@@ -67,10 +80,15 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None, sche
6780
self.execution_info = None
6881

6982
# Set common configuration parameters
70-
self.verbose = False if config is None else config.get(
71-
"verbose", False)
72-
self.headless = True if config is None else config.get(
73-
"headless", True)
83+
84+
verbose = bool(config and config.get("verbose"))
85+
86+
if verbose:
87+
set_verbosity_debug()
88+
else:
89+
set_verbosity_warning()
90+
91+
self.headless = True if config is None else config.get("headless", True)
7492
self.loader_kwargs = config.get("loader_kwargs", {})
7593

7694
common_params = {
@@ -96,22 +114,22 @@ def set_common_params(self, params: dict, overwrite=False):
96114

97115
def _set_model_token(self, llm):
98116

99-
if 'Azure' in str(type(llm)):
117+
if "Azure" in str(type(llm)):
100118
try:
101119
self.model_token = models_tokens["azure"][llm.model_name]
102120
except KeyError:
103121
raise KeyError("Model not supported")
104122

105-
elif 'HuggingFaceEndpoint' in str(type(llm)):
106-
if 'mistral' in llm.repo_id:
123+
elif "HuggingFaceEndpoint" in str(type(llm)):
124+
if "mistral" in llm.repo_id:
107125
try:
108-
self.model_token = models_tokens['mistral'][llm.repo_id]
126+
self.model_token = models_tokens["mistral"][llm.repo_id]
109127
except KeyError:
110128
raise KeyError("Model not supported")
111-
elif 'Google' in str(type(llm)):
129+
elif "Google" in str(type(llm)):
112130
try:
113-
if 'gemini' in llm.model:
114-
self.model_token = models_tokens['gemini'][llm.model]
131+
if "gemini" in llm.model:
132+
self.model_token = models_tokens["gemini"][llm.model]
115133
except KeyError:
116134
raise KeyError("Model not supported")
117135

@@ -129,17 +147,14 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
129147
KeyError: If the model is not supported.
130148
"""
131149

132-
llm_defaults = {
133-
"temperature": 0,
134-
"streaming": False
135-
}
150+
llm_defaults = {"temperature": 0, "streaming": False}
136151
llm_params = {**llm_defaults, **llm_config}
137152

138153
# If model instance is passed directly instead of the model details
139-
if 'model_instance' in llm_params:
154+
if "model_instance" in llm_params:
140155
if chat:
141-
self._set_model_token(llm_params['model_instance'])
142-
return llm_params['model_instance']
156+
self._set_model_token(llm_params["model_instance"])
157+
return llm_params["model_instance"]
143158

144159
# Instantiate the language model based on the model name
145160
if "gpt-" in llm_params["model"]:
@@ -205,18 +220,20 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
205220
elif "bedrock" in llm_params["model"]:
206221
llm_params["model"] = llm_params["model"].split("/")[-1]
207222
model_id = llm_params["model"]
208-
client = llm_params.get('client', None)
223+
client = llm_params.get("client", None)
209224
try:
210225
self.model_token = models_tokens["bedrock"][llm_params["model"]]
211226
except KeyError as exc:
212227
raise KeyError("Model not supported") from exc
213-
return Bedrock({
214-
"client": client,
215-
"model_id": model_id,
216-
"model_kwargs": {
217-
"temperature": llm_params["temperature"],
228+
return Bedrock(
229+
{
230+
"client": client,
231+
"model_id": model_id,
232+
"model_kwargs": {
233+
"temperature": llm_params["temperature"],
234+
},
218235
}
219-
})
236+
)
220237
elif "claude-3-" in llm_params["model"]:
221238
self.model_token = models_tokens["claude"]["claude3"]
222239
return Anthropic(llm_params)
@@ -227,8 +244,7 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
227244
raise KeyError("Model not supported") from exc
228245
return DeepSeek(llm_params)
229246
else:
230-
raise ValueError(
231-
"Model provided by the configuration not supported")
247+
raise ValueError("Model provided by the configuration not supported")
232248

233249
def _create_default_embedder(self, llm_config=None) -> object:
234250
"""
@@ -241,8 +257,9 @@ def _create_default_embedder(self, llm_config=None) -> object:
241257
ValueError: If the model is not supported.
242258
"""
243259
if isinstance(self.llm_model, Gemini):
244-
return GoogleGenerativeAIEmbeddings(google_api_key=llm_config['api_key'],
245-
model="models/embedding-001")
260+
return GoogleGenerativeAIEmbeddings(
261+
google_api_key=llm_config["api_key"], model="models/embedding-001"
262+
)
246263
if isinstance(self.llm_model, OpenAI):
247264
return OpenAIEmbeddings(api_key=self.llm_model.openai_api_key)
248265
elif isinstance(self.llm_model, DeepSeek):
@@ -279,8 +296,8 @@ def _create_embedder(self, embedder_config: dict) -> object:
279296
Raises:
280297
KeyError: If the model is not supported.
281298
"""
282-
if 'model_instance' in embedder_config:
283-
return embedder_config['model_instance']
299+
if "model_instance" in embedder_config:
300+
return embedder_config["model_instance"]
284301
# Instantiate the embedding model based on the model name
285302
if "openai" in embedder_config["model"]:
286303
return OpenAIEmbeddings(api_key=embedder_config["api_key"])
@@ -297,28 +314,27 @@ def _create_embedder(self, embedder_config: dict) -> object:
297314
try:
298315
models_tokens["hugging_face"][embedder_config["model"]]
299316
except KeyError as exc:
300-
raise KeyError("Model not supported")from exc
317+
raise KeyError("Model not supported") from exc
301318
return HuggingFaceHubEmbeddings(model=embedder_config["model"])
302319
elif "gemini" in embedder_config["model"]:
303320
try:
304321
models_tokens["gemini"][embedder_config["model"]]
305322
except KeyError as exc:
306-
raise KeyError("Model not supported")from exc
323+
raise KeyError("Model not supported") from exc
307324
return GoogleGenerativeAIEmbeddings(model=embedder_config["model"])
308325
elif "bedrock" in embedder_config["model"]:
309326
embedder_config["model"] = embedder_config["model"].split("/")[-1]
310-
client = embedder_config.get('client', None)
327+
client = embedder_config.get("client", None)
311328
try:
312329
models_tokens["bedrock"][embedder_config["model"]]
313330
except KeyError as exc:
314331
raise KeyError("Model not supported") from exc
315332
return BedrockEmbeddings(client=client, model_id=embedder_config["model"])
316333
else:
317-
raise ValueError(
318-
"Model provided by the configuration not supported")
334+
raise ValueError("Model provided by the configuration not supported")
319335

320336
def get_state(self, key=None) -> dict:
321-
"""""
337+
""" ""
322338
Get the final state of the graph.
323339
324340
Args:

scrapegraphai/helpers/models_tokens.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
models_tokens = {
66
"openai": {
77
"gpt-3.5-turbo-0125": 16385,
8+
"gpt-3.5": 4096,
89
"gpt-3.5-turbo": 4096,
910
"gpt-3.5-turbo-1106": 16385,
1011
"gpt-3.5-turbo-instruct": 4096,

0 commit comments

Comments
 (0)