1
- """
2
- research web module
3
- """
4
1
import re
5
2
from typing import List
6
3
from langchain_community .tools import DuckDuckGoSearchResults
7
4
from googlesearch import search as google_search
8
5
import requests
9
6
from bs4 import BeautifulSoup
10
7
11
- def search_on_web (query : str , search_engine : str = "Google" , max_results : int = 10 ) -> List [str ]:
8
+ def search_on_web (query : str , search_engine : str = "Google" , max_results : int = 10 , port : int = 8080 ) -> List [str ]:
12
9
"""
13
10
Searches the web for a given query using specified search engine options.
14
11
15
12
Args:
16
13
query (str): The search query to find on the internet.
17
- search_engine (str, optional): Specifies the search engine to use, options include 'Google', 'DuckDuckGo', or 'Bing '. Default is 'Google'.
14
+ search_engine (str, optional): Specifies the search engine to use, options include 'Google', 'DuckDuckGo', 'Bing', or 'SearXNG '. Default is 'Google'.
18
15
max_results (int, optional): The maximum number of search results to return.
16
+ port (int, optional): The port number to use when searching with 'SearXNG'. Default is 8080.
19
17
20
18
Returns:
21
19
List[str]: A list of URLs as strings that are the search results.
22
20
23
21
Raises:
24
- ValueError: If the search engine specified is neither 'Google', 'DuckDuckGo', nor 'Bing' .
22
+ ValueError: If the search engine specified is not supported .
25
23
26
24
Example:
27
25
>>> search_on_web("example query", search_engine="Google", max_results=5)
28
26
['http://example.com', 'http://example.org', ...]
29
-
30
- This function allows switching between Google, DuckDuckGo, and Bing to perform
31
- internet searches, returning a list of result URLs.
32
27
"""
33
-
28
+
34
29
if search_engine .lower () == "google" :
35
30
res = []
36
31
for url in google_search (query , stop = max_results ):
37
32
res .append (url )
38
33
return res
39
-
34
+
40
35
elif search_engine .lower () == "duckduckgo" :
41
36
research = DuckDuckGoSearchResults (max_results = max_results )
42
37
res = research .run (query )
43
38
links = re .findall (r'https?://[^\s,\]]+' , res )
44
39
return links
45
-
40
+
46
41
elif search_engine .lower () == "bing" :
47
42
headers = {
48
43
"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
@@ -51,11 +46,24 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int =
51
46
response = requests .get (search_url , headers = headers )
52
47
response .raise_for_status ()
53
48
soup = BeautifulSoup (response .text , "html.parser" )
54
-
49
+
55
50
search_results = []
56
51
for result in soup .find_all ('li' , class_ = 'b_algo' , limit = max_results ):
57
52
link = result .find ('a' )['href' ]
58
53
search_results .append (link )
59
54
return search_results
60
-
61
- raise ValueError ("The only search engines available are DuckDuckGo, Google, or Bing" )
55
+
56
+ elif search_engine .lower () == "searxng" :
57
+ url = f"http://localhost:{ port } "
58
+ params = {"q" : query , "format" : "json" }
59
+
60
+ # Send the GET request to the server
61
+ response = requests .get (url , params = params )
62
+
63
+ # Parse the response and limit to the specified max_results
64
+ data = response .json ()
65
+ limited_results = data ["results" ][:max_results ]
66
+ return limited_results
67
+
68
+ else :
69
+ raise ValueError ("The only search engines available are DuckDuckGo, Google, Bing, or SearXNG" )
0 commit comments