Skip to content

Commit 7783bfb

Browse files
authored
Merge pull request #706 from ScrapeGraphAI/pre/beta
Pre/beta
2 parents b2822f6 + 6d8f543 commit 7783bfb

File tree

4 files changed

+87
-21
lines changed

4 files changed

+87
-21
lines changed

CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
## [1.22.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.22.0-beta.4...v1.22.0-beta.5) (2024-09-27)
2+
3+
4+
### Features
5+
6+
* add reasoning integration ([b2822f6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/b2822f620a610e61d295cbf4b670aa08fde9de24))
7+
8+
## [1.22.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.22.0-beta.3...v1.22.0-beta.4) (2024-09-27)
9+
10+
11+
### Features
12+
13+
* add html_mode to smart_scraper ([bdcffd6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/bdcffd6360237b27797546a198ceece55ce4bc81))
14+
115
## [1.22.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.22.0-beta.2...v1.22.0-beta.3) (2024-09-25)
216

317

examples/extras/html_mode.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
By default smart scraper converts in md format the
4+
code. If you want to just use the original code, you have
5+
to specify in the confi
6+
"""
7+
8+
import os
9+
import json
10+
from dotenv import load_dotenv
11+
from scrapegraphai.graphs import SmartScraperGraph
12+
from scrapegraphai.utils import prettify_exec_info
13+
14+
load_dotenv()
15+
16+
# ************************************************
17+
# Define the configuration for the graph
18+
# ************************************************
19+
20+
21+
graph_config = {
22+
"llm": {
23+
"api_key": os.getenv("OPENAI_API_KEY"),
24+
"model": "openai/gpt-4o",
25+
},
26+
"html_mode": True,
27+
"verbose": True,
28+
"headless": False,
29+
}
30+
31+
# ************************************************
32+
# Create the SmartScraperGraph instance and run it
33+
# ************************************************
34+
35+
smart_scraper_graph = SmartScraperGraph(
36+
prompt="List me what does the company do, the name and a contact email.",
37+
source="https://scrapegraphai.com/",
38+
config=graph_config
39+
)
40+
41+
result = smart_scraper_graph.run()
42+
print(json.dumps(result, indent=4))
43+
44+
# ************************************************
45+
# Get graph execution info
46+
# ************************************************
47+
48+
graph_exec_info = smart_scraper_graph.get_execution_info()
49+
print(prettify_exec_info(graph_exec_info))

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
22
name = "scrapegraphai"
33

4-
version = "1.22.0b3"
4+
version = "1.22.0b5"
55

66
description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
77
authors = [

scrapegraphai/graphs/smart_scraper_graph.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,7 @@ def _create_graph(self) -> BaseGraph:
7070
"scrape_do": self.config.get("scrape_do")
7171
}
7272
)
73-
parse_node = ParseNode(
74-
input="doc",
75-
output=["parsed_doc"],
76-
node_config={
77-
"llm_model": self.llm_model,
78-
"chunk_size": self.model_token
79-
}
80-
)
73+
8174

8275
generate_answer_node = GenerateAnswerNode(
8376
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
@@ -89,6 +82,15 @@ def _create_graph(self) -> BaseGraph:
8982
}
9083
)
9184

85+
if self.config.get("html_mode") is not True:
86+
87+
parse_node = ParseNode(
88+
input="doc",
89+
output=["parsed_doc"],
90+
node_config={
91+
"llm_model": self.llm_model,
92+
"chunk_size": self.model_token
93+
9294
if self.config.get("reasoning"):
9395
reasoning_node = ReasoningNode(
9496
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
@@ -104,11 +106,13 @@ def _create_graph(self) -> BaseGraph:
104106
nodes=[
105107
fetch_node,
106108
parse_node,
109+
107110
reasoning_node,
108111
generate_answer_node,
109112
],
110113
edges=[
111114
(fetch_node, parse_node),
115+
(parse_node, generate_answer_node)
112116
(parse_node, reasoning_node),
113117
(reasoning_node, generate_answer_node)
114118
],
@@ -117,18 +121,17 @@ def _create_graph(self) -> BaseGraph:
117121
)
118122

119123
return BaseGraph(
120-
nodes=[
121-
fetch_node,
122-
parse_node,
123-
generate_answer_node,
124-
],
125-
edges=[
126-
(fetch_node, parse_node),
127-
(parse_node, generate_answer_node)
128-
],
129-
entry_point=fetch_node,
130-
graph_name=self.__class__.__name__
131-
)
124+
nodes=[
125+
fetch_node,
126+
generate_answer_node,
127+
],
128+
edges=[
129+
(fetch_node, generate_answer_node)
130+
],
131+
entry_point=fetch_node,
132+
graph_name=self.__class__.__name__
133+
)
134+
132135

133136
def run(self) -> str:
134137
"""

0 commit comments

Comments
 (0)