Skip to content

Commit 58a257f

Browse files
committed
update model tokens
1 parent 79b8326 commit 58a257f

11 files changed

+172
-109
lines changed

examples/azure/pdf_scraper_azure.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -34,28 +34,10 @@
3434
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
3535
"""
3636

37-
schema = """
38-
{
39-
"type": "object",
40-
"properties": {
41-
"summary": {
42-
"type": "string"
43-
},
44-
"topics": {
45-
"type": "array",
46-
"items": {
47-
"type": "string"
48-
}
49-
}
50-
}
51-
}
52-
"""
53-
5437
pdf_scraper_graph = PDFScraperGraph(
5538
prompt="Summarize the text and find the main topics",
5639
source=source,
5740
config=graph_config,
58-
schema=schema,
5941
)
6042
result = pdf_scraper_graph.run()
6143

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
5+
import os, json
6+
from dotenv import load_dotenv
7+
from langchain_openai import AzureChatOpenAI
8+
from langchain_openai import AzureOpenAIEmbeddings
9+
from scrapegraphai.graphs import SmartScraperMultiGraph
10+
11+
load_dotenv()
12+
13+
# ************************************************
14+
# Define the configuration for the graph
15+
# ************************************************
16+
llm_model_instance = AzureChatOpenAI(
17+
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
18+
azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"]
19+
)
20+
21+
embedder_model_instance = AzureOpenAIEmbeddings(
22+
azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME"],
23+
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
24+
)
25+
26+
# ************************************************
27+
# Create the SmartScraperGraph instance and run it
28+
# ************************************************
29+
30+
graph_config = {
31+
"llm": {"model_instance": llm_model_instance},
32+
"embeddings": {"model_instance": embedder_model_instance}
33+
}
34+
# *******************************************************
35+
# Create the SmartScraperMultiGraph instance and run it
36+
# *******************************************************
37+
38+
multiple_search_graph = SmartScraperMultiGraph(
39+
prompt="Who is Marco Perini?",
40+
source= [
41+
"https://perinim.github.io/",
42+
"https://perinim.github.io/cv/"
43+
],
44+
schema=None,
45+
config=graph_config
46+
)
47+
48+
result = multiple_search_graph.run()
49+
print(json.dumps(result, indent=4))

examples/bedrock/pdf_scraper_graph_bedrock.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -35,28 +35,10 @@
3535
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
3636
"""
3737

38-
schema = """
39-
{
40-
"type": "object",
41-
"properties": {
42-
"summary": {
43-
"type": "string"
44-
},
45-
"topics": {
46-
"type": "array",
47-
"items": {
48-
"type": "string"
49-
}
50-
}
51-
}
52-
}
53-
"""
54-
5538
pdf_scraper_graph = PDFScraperGraph(
5639
prompt="Summarize the text and find the main topics",
5740
source=source,
5841
config=graph_config,
59-
schema=schema,
6042
)
6143
result = pdf_scraper_graph.run()
6244

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""
2+
Module for showing how PDFScraper multi works
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import PdfScraperMultiGraph
8+
9+
graph_config = {
10+
"llm": {
11+
"client": "client_name",
12+
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
13+
"temperature": 0.0
14+
},
15+
"embeddings": {
16+
"model": "bedrock/cohere.embed-multilingual-v3"
17+
}
18+
}
19+
# ***************
20+
# Covert to list
21+
# ***************
22+
23+
sources = [
24+
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
25+
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
26+
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
27+
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
28+
]
29+
30+
prompt = """
31+
You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements:
32+
33+
Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables.
34+
Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable.
35+
Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV.
36+
Response Format: For each abstract, present your response in the following structured format:
37+
38+
Independent Variable (IV):
39+
Dependent Variable (DV):
40+
Exogenous Shock:
41+
42+
Example Queries and Responses:
43+
44+
Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.
45+
46+
Response:
47+
48+
Independent Variable (IV): Employee happiness.
49+
Dependent Variable (DV): Overall firm productivity.
50+
Exogenous Shock: Sudden company-wide increase in bonus payments.
51+
52+
Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons.
53+
54+
Response:
55+
56+
Independent Variable (IV): Exposure to social media.
57+
Dependent Variable (DV): Mental health outcomes.
58+
Exogenous Shock: staggered introduction of Facebook across U.S. colleges.
59+
"""
60+
# *******************************************************
61+
# Create the SmartScraperMultiGraph instance and run it
62+
# *******************************************************
63+
64+
multiple_search_graph = PdfScraperMultiGraph(
65+
prompt=prompt,
66+
source= sources,
67+
schema=None,
68+
config=graph_config
69+
)
70+
71+
result = multiple_search_graph.run()
72+
print(json.dumps(result, indent=4))

examples/deepseek/pdf_scraper_graph_deepseek.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -40,28 +40,10 @@
4040
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
4141
"""
4242

43-
schema = """
44-
{
45-
"type": "object",
46-
"properties": {
47-
"summary": {
48-
"type": "string"
49-
},
50-
"topics": {
51-
"type": "array",
52-
"items": {
53-
"type": "string"
54-
}
55-
}
56-
}
57-
}
58-
"""
59-
6043
pdf_scraper_graph = PDFScraperGraph(
6144
prompt="Summarize the text and find the main topics",
6245
source=source,
6346
config=graph_config,
64-
schema=schema,
6547
)
6648
result = pdf_scraper_graph.run()
6749

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
5+
import os, json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperMultiGraph
8+
9+
load_dotenv()
10+
11+
# ************************************************
12+
# Define the configuration for the graph
13+
# ************************************************
14+
15+
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
16+
17+
graph_config = {
18+
"llm": {
19+
"model": "deepseek-chat",
20+
"openai_api_key": deepseek_key,
21+
"openai_api_base": 'https://api.deepseek.com/v1',
22+
},
23+
"embeddings": {
24+
"model": "ollama/nomic-embed-text",
25+
"temperature": 0,
26+
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
27+
},
28+
"verbose": True,
29+
}
30+
31+
32+
# *******************************************************
33+
# Create the SmartScraperMultiGraph instance and run it
34+
# *******************************************************
35+
36+
multiple_search_graph = SmartScraperMultiGraph(
37+
prompt="Who is Marco Perini?",
38+
source= [
39+
"https://perinim.github.io/",
40+
"https://perinim.github.io/cv/"
41+
],
42+
schema=None,
43+
config=graph_config
44+
)
45+
46+
result = multiple_search_graph.run()
47+
print(json.dumps(result, indent=4))

examples/gemini/pdf_scraper_graph_gemini.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -34,28 +34,10 @@
3434
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
3535
"""
3636

37-
schema = """
38-
{
39-
"type": "object",
40-
"properties": {
41-
"summary": {
42-
"type": "string"
43-
},
44-
"topics": {
45-
"type": "array",
46-
"items": {
47-
"type": "string"
48-
}
49-
}
50-
}
51-
}
52-
"""
53-
5437
pdf_scraper_graph = PDFScraperGraph(
5538
prompt="Summarize the text and find the main topics",
5639
source=source,
5740
config=graph_config,
58-
schema=schema,
5941
)
6042
result = pdf_scraper_graph.run()
6143

examples/groq/pdf_scraper_graph_groq.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -39,28 +39,10 @@
3939
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
4040
"""
4141

42-
schema = """
43-
{
44-
"type": "object",
45-
"properties": {
46-
"summary": {
47-
"type": "string"
48-
},
49-
"topics": {
50-
"type": "array",
51-
"items": {
52-
"type": "string"
53-
}
54-
}
55-
}
56-
}
57-
"""
58-
5942
pdf_scraper_graph = PDFScraperGraph(
6043
prompt="Summarize the text and find the main topics",
6144
source=source,
6245
config=graph_config,
63-
schema=schema,
6446
)
6547
result = pdf_scraper_graph.run()
6648

examples/huggingfacehub/pdf_scraper_graph_huggingfacehub.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -39,28 +39,10 @@
3939
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
4040
"""
4141

42-
schema = """
43-
{
44-
"type": "object",
45-
"properties": {
46-
"summary": {
47-
"type": "string"
48-
},
49-
"topics": {
50-
"type": "array",
51-
"items": {
52-
"type": "string"
53-
}
54-
}
55-
}
56-
}
57-
"""
58-
5942
pdf_scraper_graph = PDFScraperGraph(
6043
prompt="Summarize the text and find the main topics",
6144
source=source,
6245
config=graph_config,
63-
schema=schema,
6446
)
6547
result = pdf_scraper_graph.run()
6648

examples/oneapi/pdf_scraper_graph_oneapi.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@
2424
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
2525
"""
2626

27+
<<<<<<< Updated upstream
2728

29+
=======
30+
>>>>>>> Stashed changes
2831
pdf_scraper_graph = PDFScraperGraph(
2932
prompt="Summarize the text and find the main topics",
3033
source=source,

scrapegraphai/helpers/models_tokens.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"openai": {
77
"gpt-3.5-turbo-0125": 16385,
88
"gpt-3.5": 4096,
9-
"gpt-3.5-turbo": 4096,
9+
"gpt-3.5-turbo": 16385,
1010
"gpt-3.5-turbo-1106": 16385,
1111
"gpt-3.5-turbo-instruct": 4096,
1212
"gpt-4-0125-preview": 128000,

0 commit comments

Comments
 (0)