Skip to content

Commit 46d72a3

Browse files
authored
Merge pull request #481 from ScrapeGraphAI/479-using-nemotron-from-nvidia
feat: add nvidia connection
2 parents 72f18d1 + c7bac98 commit 46d72a3

37 files changed

+1776
-118
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""
2+
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
3+
"""
4+
5+
import os
6+
import pandas as pd
7+
from dotenv import load_dotenv
8+
from scrapegraphai.graphs import CSVScraperMultiGraph
9+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
10+
11+
load_dotenv()
12+
# ************************************************
13+
# Read the CSV file
14+
# ************************************************
15+
16+
FILE_NAME = "inputs/username.csv"
17+
curr_dir = os.path.dirname(os.path.realpath(__file__))
18+
file_path = os.path.join(curr_dir, FILE_NAME)
19+
20+
text = pd.read_csv(file_path)
21+
22+
# ************************************************
23+
# Define the configuration for the graph
24+
# ************************************************
25+
26+
graph_config = {
27+
"llm": {
28+
"api_key": os.getenv("NEMOTRON_APIKEY"),
29+
"model": "nvidia/meta/llama3-70b-instruct",
30+
}
31+
}
32+
33+
# ************************************************
34+
# Create the CSVScraperMultiGraph instance and run it
35+
# ************************************************
36+
37+
csv_scraper_graph = CSVScraperMultiGraph(
38+
prompt="List me all the last names",
39+
source=[str(text), str(text)],
40+
config=graph_config
41+
)
42+
43+
result = csv_scraper_graph.run()
44+
print(result)
45+
46+
# ************************************************
47+
# Get graph execution info
48+
# ************************************************
49+
50+
graph_exec_info = csv_scraper_graph.get_execution_info()
51+
print(prettify_exec_info(graph_exec_info))
52+
53+
# Save to json or csv
54+
convert_to_csv(result, "result")
55+
convert_to_json(result, "result")
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
"""
2+
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
import pandas as pd
8+
from scrapegraphai.graphs import CSVScraperGraph
9+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Read the CSV file
14+
# ************************************************
15+
16+
FILE_NAME = "inputs/username.csv"
17+
curr_dir = os.path.dirname(os.path.realpath(__file__))
18+
file_path = os.path.join(curr_dir, FILE_NAME)
19+
20+
text = pd.read_csv(file_path)
21+
22+
# ************************************************
23+
# Define the configuration for the graph
24+
# ************************************************
25+
26+
nemotron_key = os.getenv("NEMOTRON_APIKEY")
27+
28+
graph_config = {
29+
"llm": {
30+
"api_key": nemotron_key,
31+
"model": "nvidia/meta/llama3-70b-instruct",
32+
},
33+
}
34+
35+
# ************************************************
36+
# Create the CSVScraperGraph instance and run it
37+
# ************************************************
38+
39+
csv_scraper_graph = CSVScraperGraph(
40+
prompt="List me all the last names",
41+
source=str(text), # Pass the content of the file, not the file object
42+
config=graph_config
43+
)
44+
45+
result = csv_scraper_graph.run()
46+
print(result)
47+
48+
# ************************************************
49+
# Get graph execution info
50+
# ************************************************
51+
52+
graph_exec_info = csv_scraper_graph.get_execution_info()
53+
print(prettify_exec_info(graph_exec_info))
54+
55+
# Save to json or csv
56+
convert_to_csv(result, "result")
57+
convert_to_json(result, "result")
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
"""
2+
Example of custom graph using existing nodes
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
8+
from langchain_openai import OpenAIEmbeddings
9+
from scrapegraphai.models import OpenAI
10+
from scrapegraphai.graphs import BaseGraph
11+
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
12+
load_dotenv()
13+
14+
# ************************************************
15+
# Define the configuration for the graph
16+
# ************************************************
17+
18+
graph_config = {
19+
"llm": {
20+
"api_key": os.getenv("NEMOTRON_KEY"),
21+
"model": "claude-3-haiku-20240307",
22+
},
23+
}
24+
25+
# ************************************************
26+
# Define the graph nodes
27+
# ************************************************
28+
29+
llm_model = OpenAI(graph_config["llm"])
30+
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
31+
32+
# define the nodes for the graph
33+
robot_node = RobotsNode(
34+
input="url",
35+
output=["is_scrapable"],
36+
node_config={
37+
"llm_model": llm_model,
38+
"force_scraping": True,
39+
"verbose": True,
40+
}
41+
)
42+
43+
fetch_node = FetchNode(
44+
input="url | local_dir",
45+
output=["doc", "link_urls", "img_urls"],
46+
node_config={
47+
"verbose": True,
48+
"headless": True,
49+
}
50+
)
51+
parse_node = ParseNode(
52+
input="doc",
53+
output=["parsed_doc"],
54+
node_config={
55+
"chunk_size": 4096,
56+
"verbose": True,
57+
}
58+
)
59+
rag_node = RAGNode(
60+
input="user_prompt & (parsed_doc | doc)",
61+
output=["relevant_chunks"],
62+
node_config={
63+
"llm_model": llm_model,
64+
"embedder_model": embedder,
65+
"verbose": True,
66+
}
67+
)
68+
generate_answer_node = GenerateAnswerNode(
69+
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
70+
output=["answer"],
71+
node_config={
72+
"llm_model": llm_model,
73+
"verbose": True,
74+
}
75+
)
76+
77+
# ************************************************
78+
# Create the graph by defining the connections
79+
# ************************************************
80+
81+
graph = BaseGraph(
82+
nodes=[
83+
robot_node,
84+
fetch_node,
85+
parse_node,
86+
rag_node,
87+
generate_answer_node,
88+
],
89+
edges=[
90+
(robot_node, fetch_node),
91+
(fetch_node, parse_node),
92+
(parse_node, rag_node),
93+
(rag_node, generate_answer_node)
94+
],
95+
entry_point=robot_node
96+
)
97+
98+
# ************************************************
99+
# Execute the graph
100+
# ************************************************
101+
102+
result, execution_info = graph.execute({
103+
"user_prompt": "Describe the content",
104+
"url": "https://example.com/"
105+
})
106+
107+
# get the answer from the result
108+
result = result.get("answer", "No answer found.")
109+
print(result)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import DeepScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Define the configuration for the graph
14+
# ************************************************
15+
16+
nemotron_key = os.getenv("NEMOTRON_APIKEY")
17+
18+
graph_config = {
19+
"llm": {
20+
"api_key": nemotron_key,
21+
"model": "nvidia/meta/llama3-70b-instruct",
22+
},
23+
"verbose": True,
24+
"max_depth": 1
25+
}
26+
27+
# ************************************************
28+
# Create the SmartScraperGraph instance and run it
29+
# ************************************************
30+
31+
deep_scraper_graph = DeepScraperGraph(
32+
prompt="List me all the job titles and detailed job description.",
33+
# also accepts a string with the already downloaded HTML code
34+
source="https://www.google.com/about/careers/applications/jobs/results/?location=Bangalore%20India",
35+
config=graph_config
36+
)
37+
38+
result = deep_scraper_graph.run()
39+
print(result)
40+
41+
# ************************************************
42+
# Get graph execution info
43+
# ************************************************
44+
45+
graph_exec_info = deep_scraper_graph.get_execution_info()
46+
print(deep_scraper_graph.get_state("relevant_links"))
47+
print(prettify_exec_info(graph_exec_info))

examples/nemotron/inputs/books.xml

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
<?xml version="1.0"?>
2+
<catalog>
3+
<book id="bk101">
4+
<author>Gambardella, Matthew</author>
5+
<title>XML Developer's Guide</title>
6+
<genre>Computer</genre>
7+
<price>44.95</price>
8+
<publish_date>2000-10-01</publish_date>
9+
<description>An in-depth look at creating applications
10+
with XML.</description>
11+
</book>
12+
<book id="bk102">
13+
<author>Ralls, Kim</author>
14+
<title>Midnight Rain</title>
15+
<genre>Fantasy</genre>
16+
<price>5.95</price>
17+
<publish_date>2000-12-16</publish_date>
18+
<description>A former architect battles corporate zombies,
19+
an evil sorceress, and her own childhood to become queen
20+
of the world.</description>
21+
</book>
22+
<book id="bk103">
23+
<author>Corets, Eva</author>
24+
<title>Maeve Ascendant</title>
25+
<genre>Fantasy</genre>
26+
<price>5.95</price>
27+
<publish_date>2000-11-17</publish_date>
28+
<description>After the collapse of a nanotechnology
29+
society in England, the young survivors lay the
30+
foundation for a new society.</description>
31+
</book>
32+
<book id="bk104">
33+
<author>Corets, Eva</author>
34+
<title>Oberon's Legacy</title>
35+
<genre>Fantasy</genre>
36+
<price>5.95</price>
37+
<publish_date>2001-03-10</publish_date>
38+
<description>In post-apocalypse England, the mysterious
39+
agent known only as Oberon helps to create a new life
40+
for the inhabitants of London. Sequel to Maeve
41+
Ascendant.</description>
42+
</book>
43+
<book id="bk105">
44+
<author>Corets, Eva</author>
45+
<title>The Sundered Grail</title>
46+
<genre>Fantasy</genre>
47+
<price>5.95</price>
48+
<publish_date>2001-09-10</publish_date>
49+
<description>The two daughters of Maeve, half-sisters,
50+
battle one another for control of England. Sequel to
51+
Oberon's Legacy.</description>
52+
</book>
53+
<book id="bk106">
54+
<author>Randall, Cynthia</author>
55+
<title>Lover Birds</title>
56+
<genre>Romance</genre>
57+
<price>4.95</price>
58+
<publish_date>2000-09-02</publish_date>
59+
<description>When Carla meets Paul at an ornithology
60+
conference, tempers fly as feathers get ruffled.</description>
61+
</book>
62+
<book id="bk107">
63+
<author>Thurman, Paula</author>
64+
<title>Splish Splash</title>
65+
<genre>Romance</genre>
66+
<price>4.95</price>
67+
<publish_date>2000-11-02</publish_date>
68+
<description>A deep sea diver finds true love twenty
69+
thousand leagues beneath the sea.</description>
70+
</book>
71+
<book id="bk108">
72+
<author>Knorr, Stefan</author>
73+
<title>Creepy Crawlies</title>
74+
<genre>Horror</genre>
75+
<price>4.95</price>
76+
<publish_date>2000-12-06</publish_date>
77+
<description>An anthology of horror stories about roaches,
78+
centipedes, scorpions and other insects.</description>
79+
</book>
80+
<book id="bk109">
81+
<author>Kress, Peter</author>
82+
<title>Paradox Lost</title>
83+
<genre>Science Fiction</genre>
84+
<price>6.95</price>
85+
<publish_date>2000-11-02</publish_date>
86+
<description>After an inadvertant trip through a Heisenberg
87+
Uncertainty Device, James Salway discovers the problems
88+
of being quantum.</description>
89+
</book>
90+
<book id="bk110">
91+
<author>O'Brien, Tim</author>
92+
<title>Microsoft .NET: The Programming Bible</title>
93+
<genre>Computer</genre>
94+
<price>36.95</price>
95+
<publish_date>2000-12-09</publish_date>
96+
<description>Microsoft's .NET initiative is explored in
97+
detail in this deep programmer's reference.</description>
98+
</book>
99+
<book id="bk111">
100+
<author>O'Brien, Tim</author>
101+
<title>MSXML3: A Comprehensive Guide</title>
102+
<genre>Computer</genre>
103+
<price>36.95</price>
104+
<publish_date>2000-12-01</publish_date>
105+
<description>The Microsoft MSXML3 parser is covered in
106+
detail, with attention to XML DOM interfaces, XSLT processing,
107+
SAX and more.</description>
108+
</book>
109+
<book id="bk112">
110+
<author>Galos, Mike</author>
111+
<title>Visual Studio 7: A Comprehensive Guide</title>
112+
<genre>Computer</genre>
113+
<price>49.95</price>
114+
<publish_date>2001-04-16</publish_date>
115+
<description>Microsoft Visual Studio 7 is explored in depth,
116+
looking at how Visual Basic, Visual C++, C#, and ASP+ are
117+
integrated into a comprehensive development
118+
environment.</description>
119+
</book>
120+
</catalog>

0 commit comments

Comments
 (0)