ScrapeGraphAI · PeriniM · May 5, 2024 · May 3, 2024 · May 3, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -41,7 +41,8 @@ free-proxy = "1.1.1"
 langchain-groq = "0.1.3"
 playwright = "^1.43.0"
 langchain-aws = "^0.1.2"
-
+langchain-anthropic = "^0.1.11"
+yahoo-search-py="^0.3"
 
 [tool.poetry.dev-dependencies]
 pytest = "8.0.0"

diff --git a/requirements.txt b/requirements.txt
@@ -15,3 +15,5 @@ free-proxy==1.1.1
 langchain-groq==0.1.3
 playwright==1.43.0
 langchain-aws==0.1.2
+langchain-anthropic==0.1.11 
+yahoo-search-py==0.3
diff --git a/scrapegraphai/utils/research_web.py b/scrapegraphai/utils/research_web.py
@@ -1,10 +1,11 @@
-""" 
+"""
 Module for making the request on the web
 """
 import re
 from typing import List
 from langchain_community.tools import DuckDuckGoSearchResults
-from googlesearch import search
+from googlesearch import search as google_search
+from yahoo_search import search as yahoo_search
 
 
 def search_on_web(query: str, search_engine: str = "Google", max_results: int = 10) -> List[str]:
@@ -29,18 +30,29 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int =
     This function allows switching between Google and DuckDuckGo to perform internet searches, returning a list of result URLs.
     """
 
-    if search_engine == "Google":
+    if search_engine.lower() == "google":
         res = []
 
-        for url in search(query, stop=max_results):
+        for url in google_search(query, stop=max_results):
             res.append(url)
         return res
-    elif search_engine == "DuckDuckGo":
+    elif search_engine.lower() == "duckduckgo":
         research = DuckDuckGoSearchResults(max_results=max_results)
         res = research.run(query)
 
         links = re.findall(r'https?://[^\s,\]]+', res)
 
         return links
+    elif search_engine.lower() == "yahoo":
+        list_result = yahoo_search(query)
+        results = []
+        for page in list_result.pages:
+            if len(results) >= max_results:  # Check if max_results has already been reached
+                break  # Exit loop if max_results has been reached
+            try:
+                results.append(page.link)
+            except AttributeError:
+                continue
+        return results
     raise ValueError(
-        "The only search engines avaiable are DuckDuckGo or Google")
+        "The only search engines available are DuckDuckGo or Google")