Skip to content

Commit 0cda020

Browse files
authored
Merge branch 'pre/beta' into temp
2 parents 2333b51 + 8b2c266 commit 0cda020

40 files changed

+58
-92
lines changed

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
* conditional node ([ce00345](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ce003454953e5785d4746223c252de38cd5d07ea))
2626

2727
## [1.13.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.12.2...v1.13.0) (2024-08-09)
28+
## [1.13.0-beta.9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.0-beta.8...v1.13.0-beta.9) (2024-08-10)
29+
2830

2931

3032
### Features
@@ -65,6 +67,15 @@
6567
* **release:** 1.13.0-beta.5 [skip ci] ([2eba73b](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2eba73b784ee443260117e98ab7c943934b3018d)), closes [#513](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/513)
6668
* **release:** 1.13.0-beta.6 [skip ci] ([e75b574](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/e75b574b67040e127599da9ee1b0eee13d234cb9))
6769
* **release:** 1.13.0-beta.7 [skip ci] ([6e56925](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/6e56925355c424edae290c70fd98646ab5f420ee))
70+
* add refactoring of default temperature ([6c3b37a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/6c3b37ab001b80c09ea9ffb56d4c3df338e33a7a))
71+
72+
## [1.13.0-beta.8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.0-beta.7...v1.13.0-beta.8) (2024-08-09)
73+
74+
75+
### Bug Fixes
76+
77+
* broken node ([1272273](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/127227349915deeb0dede34aa575ad269ed7cbe3))
78+
6879

6980
## [1.13.0-beta.7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.0-beta.6...v1.13.0-beta.7) (2024-08-09)
7081

examples/local_models/smart_scraper_ollama.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
"format": "json", # Ollama needs the format to be specified explicitly
1515
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
1616
},
17-
1817
"verbose": True,
1918
"headless": False
2019
}
@@ -23,8 +22,13 @@
2322
# Create the SmartScraperGraph instance and run it
2423
# ************************************************
2524
smart_scraper_graph = SmartScraperGraph(
25+
<<<<<<< Updated upstream
2626
prompt="Find some information about what does the company do, the name and a contact email.",
2727
source="https://scrapegraphai.com/",
28+
=======
29+
prompt="List all the projects with their descriptions",
30+
source="https://perinim.github.io/projects/",
31+
>>>>>>> Stashed changes
2832
config=graph_config
2933
)
3034

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ name = "scrapegraphai"
33

44

55
version = "1.13.3"
6+
version = "1.13.0b9"
7+
68

79

810
description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."

scrapegraphai/graphs/abstract_graph.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,16 @@
77
import uuid
88
import warnings
99
from pydantic import BaseModel
10-
1110
from langchain_community.chat_models import ErnieBotChat
1211
from langchain_nvidia_ai_endpoints import ChatNVIDIA
1312
from langchain.chat_models import init_chat_model
14-
1513
from ..helpers import models_tokens
1614
from ..models import (
1715
OneApi,
1816
DeepSeek
1917
)
2018
from ..utils.logging import set_verbosity_warning, set_verbosity_info
2119

22-
23-
2420
class AbstractGraph(ABC):
2521
"""
2622
Scaffolding class for creating a graph representation and executing it.
@@ -53,6 +49,9 @@ class AbstractGraph(ABC):
5349
def __init__(self, prompt: str, config: dict,
5450
source: Optional[str] = None, schema: Optional[BaseModel] = None):
5551

52+
if config.get("llm").get("temperature") is None:
53+
config["llm"]["temperature"] = 0
54+
5655
self.prompt = prompt
5756
self.source = source
5857
self.config = config
@@ -212,7 +211,7 @@ def handle_model(model_name, provider, token_key, default_token=8192):
212211
print("model not found, using default token size (8192)")
213212
self.model_token = 8192
214213
return ErnieBotChat(llm_params)
215-
214+
216215
if "oneapi" in llm_params["model"]:
217216
# take the model after the last dash
218217
llm_params["model"] = llm_params["model"].split("/")[-1]
@@ -221,7 +220,7 @@ def handle_model(model_name, provider, token_key, default_token=8192):
221220
except KeyError as exc:
222221
raise KeyError("Model not supported") from exc
223222
return OneApi(llm_params)
224-
223+
225224
if "nvidia" in llm_params["model"]:
226225
try:
227226
self.model_token = models_tokens["nvidia"][llm_params["model"].split("/")[-1]]

scrapegraphai/graphs/base_graph.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
from typing import Tuple
77
from langchain_community.callbacks import get_openai_callback
88
from ..integrations import BurrBridge
9-
10-
# Import telemetry functions
11-
from ..telemetry import log_graph_execution, log_event
9+
from ..telemetry import log_graph_execution
1210

1311
class BaseGraph:
1412
"""

scrapegraphai/graphs/csv_scraper_graph.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,13 @@
44

55
from typing import Optional
66
from pydantic import BaseModel
7-
87
from .base_graph import BaseGraph
98
from .abstract_graph import AbstractGraph
10-
119
from ..nodes import (
1210
FetchNode,
1311
GenerateAnswerCSVNode
1412
)
1513

16-
1714
class CSVScraperGraph(AbstractGraph):
1815
"""
1916
SmartScraper is a comprehensive web scraping tool that automates the process of extracting

scrapegraphai/graphs/csv_scraper_multi_graph.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,19 @@
44

55
from copy import copy, deepcopy
66
from typing import List, Optional
7-
87
from pydantic import BaseModel
9-
108
from .base_graph import BaseGraph
119
from .abstract_graph import AbstractGraph
1210
from .csv_scraper_graph import CSVScraperGraph
13-
1411
from ..nodes import (
1512
GraphIteratorNode,
1613
MergeAnswersNode
1714
)
1815

19-
2016
class CSVScraperMultiGraph(AbstractGraph):
2117
"""
22-
CSVScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and generates answers to a given prompt.
18+
CSVScraperMultiGraph is a scraping pipeline that
19+
scrapes a list of URLs and generates answers to a given prompt.
2320
It only requires a user prompt and a list of URLs.
2421
2522
Attributes:
@@ -44,7 +41,8 @@ class CSVScraperMultiGraph(AbstractGraph):
4441
>>> result = search_graph.run()
4542
"""
4643

47-
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
44+
def __init__(self, prompt: str, source: List[str],
45+
config: dict, schema: Optional[BaseModel] = None):
4846

4947
self.max_results = config.get("max_results", 3)
5048

scrapegraphai/graphs/deep_scraper_graph.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@
44

55
from typing import Optional
66
from pydantic import BaseModel
7-
87
from .base_graph import BaseGraph
98
from .abstract_graph import AbstractGraph
10-
119
from ..nodes import (
1210
FetchNode,
1311
SearchLinkNode,
@@ -18,7 +16,6 @@
1816
MergeAnswersNode
1917
)
2018

21-
2219
class DeepScraperGraph(AbstractGraph):
2320
"""
2421
[WIP]
@@ -87,7 +84,6 @@ def _create_repeated_graph(self) -> BaseGraph:
8784
output=["relevant_chunks"],
8885
node_config={
8986
"llm_model": self.llm_model,
90-
"embedder_model": self.embedder_model
9187
}
9288
)
9389
generate_answer_node = GenerateAnswerNode(
@@ -104,7 +100,6 @@ def _create_repeated_graph(self) -> BaseGraph:
104100
output=["relevant_links"],
105101
node_config={
106102
"llm_model": self.llm_model,
107-
"embedder_model": self.embedder_model
108103
}
109104
)
110105
graph_iterator_node = GraphIteratorNode(

scrapegraphai/graphs/json_scraper_graph.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,13 @@
44

55
from typing import Optional
66
from pydantic import BaseModel
7-
87
from .base_graph import BaseGraph
98
from .abstract_graph import AbstractGraph
10-
119
from ..nodes import (
1210
FetchNode,
1311
GenerateAnswerNode
1412
)
1513

16-
1714
class JSONScraperGraph(AbstractGraph):
1815
"""
1916
JSONScraperGraph defines a scraping pipeline for JSON files.

scrapegraphai/graphs/json_scraper_multi_graph.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,18 @@
55
from copy import copy, deepcopy
66
from typing import List, Optional
77
from pydantic import BaseModel
8-
98
from .base_graph import BaseGraph
109
from .abstract_graph import AbstractGraph
1110
from .json_scraper_graph import JSONScraperGraph
12-
1311
from ..nodes import (
1412
GraphIteratorNode,
1513
MergeAnswersNode
1614
)
1715

18-
1916
class JSONScraperMultiGraph(AbstractGraph):
2017
"""
21-
JSONScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and generates answers to a given prompt.
18+
JSONScraperMultiGraph is a scraping pipeline that scrapes a
19+
list of URLs and generates answers to a given prompt.
2220
It only requires a user prompt and a list of URLs.
2321
2422
Attributes:

scrapegraphai/graphs/markdown_scraper_multi_graph.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,14 @@
55
from copy import copy, deepcopy
66
from typing import List, Optional
77
from pydantic import BaseModel
8-
98
from .base_graph import BaseGraph
109
from .abstract_graph import AbstractGraph
1110
from .markdown_scraper_graph import MDScraperGraph
12-
1311
from ..nodes import (
1412
GraphIteratorNode,
1513
MergeAnswersNode
1614
)
1715

18-
1916
class MDScraperMultiGraph(AbstractGraph):
2017
"""
2118
MDScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and

scrapegraphai/graphs/omni_scraper_graph.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,14 @@
44

55
from typing import Optional
66
from pydantic import BaseModel
7-
87
from .base_graph import BaseGraph
98
from .abstract_graph import AbstractGraph
10-
119
from ..nodes import (
1210
FetchNode,
1311
ParseNode,
1412
ImageToTextNode,
1513
GenerateAnswerOmniNode
1614
)
17-
1815
from ..models import OpenAIImageToText
1916

2017
class OmniScraperGraph(AbstractGraph):

scrapegraphai/graphs/pdf_scraper_graph.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,14 @@
55

66
from typing import Optional
77
from pydantic import BaseModel
8-
98
from .base_graph import BaseGraph
109
from .abstract_graph import AbstractGraph
11-
1210
from ..nodes import (
1311
FetchNode,
1412
ParseNode,
1513
GenerateAnswerPDFNode
1614
)
1715

18-
1916
class PDFScraperGraph(AbstractGraph):
2017
"""
2118
PDFScraperGraph is a scraping pipeline that extracts information from pdf files using a natural

scrapegraphai/graphs/pdf_scraper_multi_graph.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,14 @@
55
from copy import copy, deepcopy
66
from typing import List, Optional
77
from pydantic import BaseModel
8-
98
from .base_graph import BaseGraph
109
from .abstract_graph import AbstractGraph
1110
from .pdf_scraper_graph import PDFScraperGraph
12-
1311
from ..nodes import (
1412
GraphIteratorNode,
1513
MergeAnswersNode
1614
)
1715

18-
1916
class PdfScraperMultiGraph(AbstractGraph):
2017
"""
2118
PdfScraperMultiGraph is a scraping pipeline that scrapes a

scrapegraphai/graphs/script_creator_graph.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,14 @@
44

55
from typing import Optional
66
from pydantic import BaseModel
7-
87
from .base_graph import BaseGraph
98
from .abstract_graph import AbstractGraph
10-
119
from ..nodes import (
1210
FetchNode,
1311
ParseNode,
1412
GenerateScraperNode
1513
)
1614

17-
1815
class ScriptCreatorGraph(AbstractGraph):
1916
"""
2017
ScriptCreatorGraph defines a scraping pipeline for generating web scraping scripts.

scrapegraphai/graphs/script_creator_multi_graph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616
MergeGeneratedScriptsNode
1717
)
1818

19-
2019
class ScriptCreatorMultiGraph(AbstractGraph):
2120
"""
22-
ScriptCreatorMultiGraph is a scraping pipeline that scrapes a list of URLs generating web scraping scripts.
21+
ScriptCreatorMultiGraph is a scraping pipeline that scrapes a list
22+
of URLs generating web scraping scripts.
2323
It only requires a user prompt and a list of URLs.
2424
Attributes:
2525
prompt (str): The user prompt to search the internet.

scrapegraphai/graphs/search_graph.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616
MergeAnswersNode
1717
)
1818

19-
20-
2119
class SearchGraph(AbstractGraph):
2220
"""
2321
SearchGraph is a scraping pipeline that searches the internet for answers to a given prompt.

scrapegraphai/graphs/search_link_graph.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
from pydantic import BaseModel
55
from .base_graph import BaseGraph
66
from .abstract_graph import AbstractGraph
7-
8-
97
from ..nodes import ( FetchNode, ParseNode, SearchLinkNode )
108

119
class SearchLinkGraph(AbstractGraph):
1210
"""
13-
SearchLinkGraph is a scraping pipeline that automates the process of extracting information from web pages using a natural language model to interpret and answer prompts.
11+
SearchLinkGraph is a scraping pipeline that automates the process of
12+
extracting information from web pages using a natural language model
13+
to interpret and answer prompts.
1414
1515
Attributes:
1616
prompt (str): The prompt for the graph.

scrapegraphai/graphs/smart_scraper_graph.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
GenerateAnswerNode
1515
)
1616

17-
1817
class SmartScraperGraph(AbstractGraph):
1918
"""
2019
SmartScraper is a scraping pipeline that automates the process of

0 commit comments

Comments
 (0)