Added support for Claude 3 models from Anthropic

cemkod · cemkod · commit e264e92e72e9 · 2024-05-06T14:34:35.000+03:00
diff --git a/examples/anthropic/smart_scraper_haiku.py b/examples/anthropic/smart_scraper_haiku.py
@@ -0,0 +1,59 @@
+""" 
+Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
+"""
+
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+from langchain_community.llms import HuggingFaceEndpoint
+from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
+
+
+# required environment variables in .env
+# HUGGINGFACEHUB_API_TOKEN
+# ANTHROPIC_API_KEY
+load_dotenv()
+
+HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
+# ************************************************
+# Initialize the model instances
+# ************************************************
+
+
+embedder_model_instance = HuggingFaceInferenceAPIEmbeddings(
+    api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
+)
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("ANTHROPIC_API_KEY"),
+        "model": "claude-3-haiku-20240307",
+        "max_tokens": 4000},
+    "embeddings": {"model_instance": embedder_model_instance}
+}
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="""Don't say anything else. Output JSON only. List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time, 
+    event_end_date, event_end_time, location, event_mode, event_category, 
+    third_party_redirect, no_of_days, 
+    time_in_hours, hosted_or_attending, refreshments_type, 
+    registration_available, registration_link""",
+    # also accepts a string with the already downloaded HTML code
+    source="https://www.hmhco.com/event",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,7 @@
 langchain==0.1.14
 langchain-openai==0.1.1
 langchain-google-genai==1.0.1
+langchain-anthropic==0.1.11
 html2text==2020.1.16
 faiss-cpu==1.8.0
 beautifulsoup4==4.12.3
diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
@@ -10,7 +10,7 @@
 from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
 
 from ..helpers import models_tokens
-from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI
+from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic
 
 
 class AbstractGraph(ABC):
@@ -47,8 +47,8 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
         self.source = source
         self.config = config
         self.llm_model = self._create_llm(config["llm"], chat=True)
-        self.embedder_model = self._create_default_embedder(    
-            ) if "embeddings" not in config else self._create_embedder(
+        self.embedder_model = self._create_default_embedder(
+        ) if "embeddings" not in config else self._create_embedder(
             config["embeddings"])
 
         # Set common configuration parameters
@@ -61,23 +61,21 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
         self.final_state = None
         self.execution_info = None
 
-
     def _set_model_token(self, llm):
 
         if 'Azure' in str(type(llm)):
             try:
                 self.model_token = models_tokens["azure"][llm.model_name]
             except KeyError:
                 raise KeyError("Model not supported")
-            
+
         elif 'HuggingFaceEndpoint' in str(type(llm)):
             if 'mistral' in llm.repo_id:
                 try:
                     self.model_token = models_tokens['mistral'][llm.repo_id]
                 except KeyError:
                     raise KeyError("Model not supported")
 
-
     def _create_llm(self, llm_config: dict, chat=False) -> object:
         """
         Create a large language model instance based on the configuration provided.
@@ -103,7 +101,7 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
             if chat:
                 self._set_model_token(llm_params['model_instance'])
             return llm_params['model_instance']
-        
+
         # Instantiate the language model based on the model name
         if "gpt-" in llm_params["model"]:
             try:
@@ -174,10 +172,13 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
                     "temperature": llm_params["temperature"],
                 }
             })
+        elif "claude-3-" in llm_params["model"]:
+            self.model_token = models_tokens["claude"]["claude3"]
+            return Anthropic(llm_params)
         else:
             raise ValueError(
                 "Model provided by the configuration not supported")
-    
+
     def _create_default_embedder(self) -> object:
         """
         Create an embedding model instance based on the chosen llm model.
@@ -208,7 +209,7 @@ def _create_default_embedder(self) -> object:
             return BedrockEmbeddings(client=None, model_id=self.llm_model.model_id)
         else:
             raise ValueError("Embedding Model missing or not supported")
-        
+
     def _create_embedder(self, embedder_config: dict) -> object:
         """
         Create an embedding model instance based on the configuration provided.
@@ -225,7 +226,7 @@ def _create_embedder(self, embedder_config: dict) -> object:
 
         if 'model_instance' in embedder_config:
             return embedder_config['model_instance']
-        
+
         # Instantiate the embedding model based on the model name
         if "openai" in embedder_config["model"]:
             return OpenAIEmbeddings(api_key=embedder_config["api_key"])
@@ -240,14 +241,14 @@ def _create_embedder(self, embedder_config: dict) -> object:
             except KeyError:
                 raise KeyError("Model not supported")
             return OllamaEmbeddings(**embedder_config)
-        
+
         elif "hugging_face" in embedder_config["model"]:
             try:
                 models_tokens["hugging_face"][embedder_config["model"]]
             except KeyError:
                 raise KeyError("Model not supported")
             return HuggingFaceHubEmbeddings(model=embedder_config["model"])
-        
+
         elif "bedrock" in embedder_config["model"]:
             embedder_config["model"] = embedder_config["model"].split("/")[-1]
             try:
@@ -257,7 +258,7 @@ def _create_embedder(self, embedder_config: dict) -> object:
             return BedrockEmbeddings(client=None, model_id=embedder_config["model"])
         else:
             raise ValueError(
-                "Model provided by the configuration not supported") 
+                "Model provided by the configuration not supported")
 
     def get_state(self, key=None) -> dict:
         """""
@@ -281,7 +282,7 @@ def get_execution_info(self):
         Returns:
             dict: The execution information of the graph.
         """
-        
+
         return self.execution_info
 
     @abstractmethod
@@ -297,4 +298,3 @@ def run(self) -> str:
         Abstract method to execute the graph and return the result.
         """
         pass
-
diff --git a/scrapegraphai/models/__init__.py b/scrapegraphai/models/__init__.py
@@ -11,3 +11,4 @@
 from .hugging_face import HuggingFace
 from .groq import Groq
 from .bedrock import Bedrock
+from .anthropic import Anthropic
diff --git a/scrapegraphai/models/anthropic.py b/scrapegraphai/models/anthropic.py
@@ -0,0 +1,17 @@
+""" 
+Anthropic Module
+"""
+from langchain_anthropic import ChatAnthropic
+
+
+class Anthropic(ChatAnthropic):
+    """
+    A wrapper for the ChatAnthropic class that provides default configuration
+    and could be extended with additional methods if needed.
+
+    Args:
+        llm_config (dict): Configuration parameters for the language model.
+    """
+
+    def __init__(self, llm_config: dict):
+        super().__init__(**llm_config)