Skip to content

Commit 0048ba1

Browse files
committed
Merge branch 'pre/beta' of https://github.com/ScrapeGraphAI/Scrapegraph-ai into pre/beta
2 parents 71ae384 + 2eba73b commit 0048ba1

40 files changed

+1682
-26
lines changed

CHANGELOG.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,38 @@
1+
## [1.13.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.0-beta.4...v1.13.0-beta.5) (2024-08-08)
2+
3+
4+
### Bug Fixes
5+
6+
* **chunking:** count tokens from words instead of characters ([5ec2de9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5ec2de9e1a14def5596738b6cdf769f5039a246d)), closes [#513](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/513)
7+
8+
## [1.13.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.0-beta.3...v1.13.0-beta.4) (2024-08-07)
9+
10+
11+
### Bug Fixes
12+
13+
* refactoring of merge_answer_node ([898e5a7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/898e5a7af504fbf4c1cabb14103e66184037de49))
14+
15+
## [1.13.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.0-beta.2...v1.13.0-beta.3) (2024-08-07)
16+
17+
18+
### Features
19+
20+
* add mistral support ([17f2707](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/17f2707313f65a1e96443b3c8a1f5137892f2c5a))
21+
22+
23+
### Bug Fixes
24+
25+
* **FetchNode:** handling of missing browser_base key ([07720b6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/07720b6e0ca10ba6ce3c1359706a09baffcc4ad0))
26+
* **AbstractGraph:** LangChain warnings handling, Mistral tokens ([786af99](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/786af992f8fbdadfdc3d2d6a06c0cfd81289f8f2))
27+
28+
29+
### chore
30+
31+
* **models_tokens:** add mistral models ([5e82432](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5e824327c3acb69d53f3519344d0f8c2e3defa8b))
32+
* **mistral:** create examples ([f8ad616](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f8ad616e10c271443e2dcb4123c8ddb91de2ff69))
33+
* **examples:** fix Mistral examples ([b0ffc51](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/b0ffc51e5415caec562a565710f5195afe1fbcb2))
34+
* update requirements for mistral ([9868555](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/986855512319541d1d02356df9ad61ab7fc5d807))
35+
136
## [1.13.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.0-beta.1...v1.13.0-beta.2) (2024-08-07)
237

338

examples/mistral/.env.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
MISTRAL_API_KEY="YOUR MISTRAL API KEY"
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
"""
2+
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
import pandas as pd
8+
from scrapegraphai.graphs import CSVScraperMultiGraph
9+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
10+
11+
load_dotenv()
12+
# ************************************************
13+
# Read the CSV file
14+
# ************************************************
15+
16+
FILE_NAME = "inputs/username.csv"
17+
curr_dir = os.path.dirname(os.path.realpath(__file__))
18+
file_path = os.path.join(curr_dir, FILE_NAME)
19+
20+
text = pd.read_csv(file_path)
21+
22+
# ************************************************
23+
# Define the configuration for the graph
24+
# ************************************************
25+
mistral_key = os.getenv("MISTRAL_API_KEY")
26+
27+
graph_config = {
28+
"llm": {
29+
"api_key": mistral_key,
30+
"model": "mistral/open-mistral-nemo",
31+
},
32+
}
33+
34+
# ************************************************
35+
# Create the CSVScraperMultiGraph instance and run it
36+
# ************************************************
37+
38+
csv_scraper_graph = CSVScraperMultiGraph(
39+
prompt="List me all the last names",
40+
source=[str(text), str(text)],
41+
config=graph_config
42+
)
43+
44+
result = csv_scraper_graph.run()
45+
print(result)
46+
47+
# ************************************************
48+
# Get graph execution info
49+
# ************************************************
50+
51+
graph_exec_info = csv_scraper_graph.get_execution_info()
52+
print(prettify_exec_info(graph_exec_info))
53+
54+
# Save to json or csv
55+
convert_to_csv(result, "result")
56+
convert_to_json(result, "result")
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
"""
2+
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
import pandas as pd
8+
from scrapegraphai.graphs import CSVScraperGraph
9+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Read the CSV file
14+
# ************************************************
15+
16+
FILE_NAME = "inputs/username.csv"
17+
curr_dir = os.path.dirname(os.path.realpath(__file__))
18+
file_path = os.path.join(curr_dir, FILE_NAME)
19+
20+
text = pd.read_csv(file_path)
21+
22+
# ************************************************
23+
# Define the configuration for the graph
24+
# ************************************************
25+
26+
mistral_key = os.getenv("MISTRAL_API_KEY")
27+
28+
graph_config = {
29+
"llm": {
30+
"api_key": mistral_key,
31+
"model": "mistral/open-mistral-nemo",
32+
},
33+
}
34+
35+
# ************************************************
36+
# Create the CSVScraperGraph instance and run it
37+
# ************************************************
38+
39+
csv_scraper_graph = CSVScraperGraph(
40+
prompt="List me all the last names",
41+
source=str(text), # Pass the content of the file, not the file object
42+
config=graph_config
43+
)
44+
45+
result = csv_scraper_graph.run()
46+
print(result)
47+
48+
# ************************************************
49+
# Get graph execution info
50+
# ************************************************
51+
52+
graph_exec_info = csv_scraper_graph.get_execution_info()
53+
print(prettify_exec_info(graph_exec_info))
54+
55+
# Save to json or csv
56+
convert_to_csv(result, "result")
57+
convert_to_json(result, "result")
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
"""
2+
Example of custom graph using existing nodes
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
8+
from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings
9+
from scrapegraphai.graphs import BaseGraph
10+
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
11+
load_dotenv()
12+
13+
# ************************************************
14+
# Define the configuration for the graph
15+
# ************************************************
16+
17+
mistral_key = os.getenv("MISTRAL_API_KEY")
18+
graph_config = {
19+
"llm": {
20+
"api_key": mistral_key,
21+
"model": "mistral/open-mistral-nemo",
22+
},
23+
}
24+
25+
# ************************************************
26+
# Define the graph nodes
27+
# ************************************************
28+
29+
llm_model = ChatMistralAI(**graph_config["llm"])
30+
embedder = MistralAIEmbeddings(api_key=llm_model.mistral_api_key)
31+
32+
# define the nodes for the graph
33+
robot_node = RobotsNode(
34+
input="url",
35+
output=["is_scrapable"],
36+
node_config={
37+
"llm_model": llm_model,
38+
"force_scraping": True,
39+
"verbose": True,
40+
}
41+
)
42+
43+
fetch_node = FetchNode(
44+
input="url | local_dir",
45+
output=["doc", "link_urls", "img_urls"],
46+
node_config={
47+
"verbose": True,
48+
"headless": True,
49+
}
50+
)
51+
parse_node = ParseNode(
52+
input="doc",
53+
output=["parsed_doc"],
54+
node_config={
55+
"chunk_size": 4096,
56+
"verbose": True,
57+
}
58+
)
59+
rag_node = RAGNode(
60+
input="user_prompt & (parsed_doc | doc)",
61+
output=["relevant_chunks"],
62+
node_config={
63+
"llm_model": llm_model,
64+
"embedder_model": embedder,
65+
"verbose": True,
66+
}
67+
)
68+
generate_answer_node = GenerateAnswerNode(
69+
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
70+
output=["answer"],
71+
node_config={
72+
"llm_model": llm_model,
73+
"verbose": True,
74+
}
75+
)
76+
77+
# ************************************************
78+
# Create the graph by defining the connections
79+
# ************************************************
80+
81+
graph = BaseGraph(
82+
nodes=[
83+
robot_node,
84+
fetch_node,
85+
parse_node,
86+
rag_node,
87+
generate_answer_node,
88+
],
89+
edges=[
90+
(robot_node, fetch_node),
91+
(fetch_node, parse_node),
92+
(parse_node, rag_node),
93+
(rag_node, generate_answer_node)
94+
],
95+
entry_point=robot_node
96+
)
97+
98+
# ************************************************
99+
# Execute the graph
100+
# ************************************************
101+
102+
result, execution_info = graph.execute({
103+
"user_prompt": "Describe the content",
104+
"url": "https://example.com/"
105+
})
106+
107+
# get the answer from the result
108+
result = result.get("answer", "No answer found.")
109+
print(result)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import DeepScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Define the configuration for the graph
14+
# ************************************************
15+
16+
mistral_key = os.getenv("MISTRAL_API_KEY")
17+
18+
graph_config = {
19+
"llm": {
20+
"api_key": mistral_key,
21+
"model": "mistral/open-mistral-nemo",
22+
},
23+
"verbose": True,
24+
"max_depth": 1
25+
}
26+
27+
# ************************************************
28+
# Create the SmartScraperGraph instance and run it
29+
# ************************************************
30+
31+
deep_scraper_graph = DeepScraperGraph(
32+
prompt="List me all the job titles and detailed job description.",
33+
# also accepts a string with the already downloaded HTML code
34+
source="https://www.google.com/about/careers/applications/jobs/results/?location=Bangalore%20India",
35+
config=graph_config
36+
)
37+
38+
result = deep_scraper_graph.run()
39+
print(result)
40+
41+
# ************************************************
42+
# Get graph execution info
43+
# ************************************************
44+
45+
graph_exec_info = deep_scraper_graph.get_execution_info()
46+
print(deep_scraper_graph.get_state("relevant_links"))
47+
print(prettify_exec_info(graph_exec_info))

0 commit comments

Comments
 (0)