Skip to content

Commit 88f04bf

Browse files
authored
Merge pull request #161 from cemkod/main
Support for Anthropic Claude 3 models
2 parents ac6d200 + 5a67bca commit 88f04bf

File tree

7 files changed

+84
-169
lines changed

7 files changed

+84
-169
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
from langchain_community.llms import HuggingFaceEndpoint
10+
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
11+
12+
13+
# required environment variables in .env
14+
# HUGGINGFACEHUB_API_TOKEN
15+
# ANTHROPIC_API_KEY
16+
load_dotenv()
17+
18+
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
19+
# ************************************************
20+
# Initialize the model instances
21+
# ************************************************
22+
23+
24+
embedder_model_instance = HuggingFaceInferenceAPIEmbeddings(
25+
api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
26+
)
27+
28+
# ************************************************
29+
# Create the SmartScraperGraph instance and run it
30+
# ************************************************
31+
32+
graph_config = {
33+
"llm": {
34+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
35+
"model": "claude-3-haiku-20240307",
36+
"max_tokens": 4000},
37+
"embeddings": {"model_instance": embedder_model_instance}
38+
}
39+
40+
smart_scraper_graph = SmartScraperGraph(
41+
prompt="""Don't say anything else. Output JSON only. List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
42+
event_end_date, event_end_time, location, event_mode, event_category,
43+
third_party_redirect, no_of_days,
44+
time_in_hours, hosted_or_attending, refreshments_type,
45+
registration_available, registration_link""",
46+
# also accepts a string with the already downloaded HTML code
47+
source="https://www.hmhco.com/event",
48+
config=graph_config
49+
)
50+
51+
result = smart_scraper_graph.run()
52+
print(result)
53+
54+
# ************************************************
55+
# Get graph execution info
56+
# ************************************************
57+
58+
graph_exec_info = smart_scraper_graph.get_execution_info()
59+
print(prettify_exec_info(graph_exec_info))

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
langchain==0.1.14
22
langchain-openai==0.1.1
33
langchain-google-genai==1.0.1
4+
langchain-anthropic==0.1.11
45
html2text==2020.1.16
56
faiss-cpu==1.8.0
67
beautifulsoup4==4.12.3

scrapegraphai/graphs/abstract_graph.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
77
from langchain_community.embeddings import HuggingFaceHubEmbeddings, OllamaEmbeddings
88
from ..helpers import models_tokens
9-
from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Claude
10-
from langchain_aws.embeddings.bedrock import BedrockEmbeddings
11-
from langchain_google_genai import GoogleGenerativeAIEmbeddings
9+
from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic
1210

1311

1412
class AbstractGraph(ABC):
@@ -200,6 +198,9 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
200198
"temperature": llm_params["temperature"],
201199
}
202200
})
201+
elif "claude-3-" in llm_params["model"]:
202+
self.model_token = models_tokens["claude"]["claude3"]
203+
return Anthropic(llm_params)
203204
else:
204205
raise ValueError(
205206
"Model provided by the configuration not supported")

scrapegraphai/graphs/base_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from langchain_community.callbacks import get_openai_callback
88
from typing import Tuple
99

10+
1011
class BaseGraph:
1112
"""
1213
BaseGraph manages the execution flow of a graph composed of interconnected nodes.
@@ -82,7 +83,7 @@ def execute(self, initial_state: dict) -> Tuple[dict, list]:
8283
Returns:
8384
Tuple[dict, list]: A tuple containing the final state and a list of execution info.
8485
"""
85-
86+
8687
current_node_name = self.nodes[0]
8788
state = initial_state
8889

scrapegraphai/models/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@
1111
from .hugging_face import HuggingFace
1212
from .groq import Groq
1313
from .bedrock import Bedrock
14-
from .claude import Claude
14+
from .anthropic import Anthropic

scrapegraphai/models/anthropic.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
"""
2+
Anthropic Module
3+
"""
4+
from langchain_anthropic import ChatAnthropic
5+
6+
7+
class Anthropic(ChatAnthropic):
8+
"""
9+
A wrapper for the ChatAnthropic class that provides default configuration
10+
and could be extended with additional methods if needed.
11+
12+
Args:
13+
llm_config (dict): Configuration parameters for the language model.
14+
"""
15+
16+
def __init__(self, llm_config: dict):
17+
super().__init__(**llm_config)

scrapegraphai/nodes/generate_answer_node_csv.py

Lines changed: 0 additions & 164 deletions
This file was deleted.

0 commit comments

Comments
 (0)