Skip to content

Commit 46195d6

Browse files
Update: proxy integration in googlesearch
1 parent 95d00e9 commit 46195d6

File tree

7 files changed

+312
-7
lines changed

7 files changed

+312
-7
lines changed

=1.2.5

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Requirement already satisfied: googlesearch-python in /home/funavry/anaconda3/envs/colscrap-env/lib/python3.11/site-packages (1.2.5)
2+
Requirement already satisfied: beautifulsoup4>=4.9 in /home/funavry/anaconda3/envs/colscrap-env/lib/python3.11/site-packages (from googlesearch-python) (4.12.3)
3+
Requirement already satisfied: requests>=2.20 in /home/funavry/anaconda3/envs/colscrap-env/lib/python3.11/site-packages (from googlesearch-python) (2.32.3)
4+
Requirement already satisfied: soupsieve>1.2 in /home/funavry/anaconda3/envs/colscrap-env/lib/python3.11/site-packages (from beautifulsoup4>=4.9->googlesearch-python) (2.6)
5+
Requirement already satisfied: charset-normalizer<4,>=2 in /home/funavry/anaconda3/envs/colscrap-env/lib/python3.11/site-packages (from requests>=2.20->googlesearch-python) (3.3.2)
6+
Requirement already satisfied: idna<4,>=2.5 in /home/funavry/anaconda3/envs/colscrap-env/lib/python3.11/site-packages (from requests>=2.20->googlesearch-python) (3.10)
7+
Requirement already satisfied: urllib3<3,>=1.21.1 in /home/funavry/anaconda3/envs/colscrap-env/lib/python3.11/site-packages (from requests>=2.20->googlesearch-python) (2.2.3)
8+
Requirement already satisfied: certifi>=2017.4.17 in /home/funavry/anaconda3/envs/colscrap-env/lib/python3.11/site-packages (from requests>=2.20->googlesearch-python) (2024.8.30)

notebook.ipynb

Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"pip install -e ."
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": 2,
15+
"metadata": {},
16+
"outputs": [
17+
{
18+
"name": "stderr",
19+
"output_type": "stream",
20+
"text": [
21+
"/home/funavry/anaconda3/envs/colscrap-env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
22+
" from .autonotebook import tqdm as notebook_tqdm\n"
23+
]
24+
}
25+
],
26+
"source": [
27+
"from scrapegraphai.graphs import SearchGraph"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": 3,
33+
"metadata": {},
34+
"outputs": [
35+
{
36+
"name": "stdout",
37+
"output_type": "stream",
38+
"text": [
39+
"PROXY: https://vzktqema:[email protected]:6323\n"
40+
]
41+
},
42+
{
43+
"ename": "TypeError",
44+
"evalue": "search() got an unexpected keyword argument 'num_results'",
45+
"output_type": "error",
46+
"traceback": [
47+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
48+
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
49+
"Cell \u001b[0;32mIn[3], line 36\u001b[0m\n\u001b[1;32m 31\u001b[0m prompt \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mList the top 5 companies in the world by market capitalization.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 32\u001b[0m search_graph \u001b[38;5;241m=\u001b[39m SearchGraph(\n\u001b[1;32m 33\u001b[0m prompt\u001b[38;5;241m=\u001b[39mprompt,\n\u001b[1;32m 34\u001b[0m config\u001b[38;5;241m=\u001b[39mgraph_config\n\u001b[1;32m 35\u001b[0m )\n\u001b[0;32m---> 36\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43msearch_graph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
50+
"File \u001b[0;32m~/AUK/Colloborations_Scrapegraphai/Scrapegraph-ai/scrapegraphai/graphs/search_graph.py:121\u001b[0m, in \u001b[0;36mSearchGraph.run\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;124;03mExecutes the web scraping and searching process.\u001b[39;00m\n\u001b[1;32m 116\u001b[0m \n\u001b[1;32m 117\u001b[0m \u001b[38;5;124;03mReturns:\u001b[39;00m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;124;03m str: The answer to the prompt.\u001b[39;00m\n\u001b[1;32m 119\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 120\u001b[0m inputs \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muser_prompt\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprompt}\n\u001b[0;32m--> 121\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfinal_state, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexecution_info \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgraph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;66;03m# Store the URLs after execution\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124murls\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfinal_state:\n",
51+
"File \u001b[0;32m~/AUK/Colloborations_Scrapegraphai/Scrapegraph-ai/scrapegraphai/graphs/base_graph.py:281\u001b[0m, in \u001b[0;36mBaseGraph.execute\u001b[0;34m(self, initial_state)\u001b[0m\n\u001b[1;32m 279\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m (result[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_state\u001b[39m\u001b[38;5;124m\"\u001b[39m], [])\n\u001b[1;32m 280\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 281\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execute_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43minitial_state\u001b[49m\u001b[43m)\u001b[49m\n",
52+
"File \u001b[0;32m~/AUK/Colloborations_Scrapegraphai/Scrapegraph-ai/scrapegraphai/graphs/base_graph.py:197\u001b[0m, in \u001b[0;36mBaseGraph._execute_standard\u001b[0;34m(self, initial_state)\u001b[0m\n\u001b[1;32m 184\u001b[0m graph_execution_time \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime() \u001b[38;5;241m-\u001b[39m start_time\n\u001b[1;32m 185\u001b[0m log_graph_execution(\n\u001b[1;32m 186\u001b[0m graph_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgraph_name,\n\u001b[1;32m 187\u001b[0m source\u001b[38;5;241m=\u001b[39msource,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 195\u001b[0m exception\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mstr\u001b[39m(e)\n\u001b[1;32m 196\u001b[0m )\n\u001b[0;32m--> 197\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 198\u001b[0m node_exec_time \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime() \u001b[38;5;241m-\u001b[39m curr_time\n\u001b[1;32m 199\u001b[0m total_exec_time \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m node_exec_time\n",
53+
"File \u001b[0;32m~/AUK/Colloborations_Scrapegraphai/Scrapegraph-ai/scrapegraphai/graphs/base_graph.py:181\u001b[0m, in \u001b[0;36mBaseGraph._execute_standard\u001b[0;34m(self, initial_state)\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_manager\u001b[38;5;241m.\u001b[39mexclusive_get_callback(llm_model, llm_model_name) \u001b[38;5;28;01mas\u001b[39;00m cb:\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 181\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mcurrent_node\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 183\u001b[0m error_node \u001b[38;5;241m=\u001b[39m current_node\u001b[38;5;241m.\u001b[39mnode_name\n",
54+
"File \u001b[0;32m~/AUK/Colloborations_Scrapegraphai/Scrapegraph-ai/scrapegraphai/nodes/search_internet_node.py:97\u001b[0m, in \u001b[0;36mSearchInternetNode.execute\u001b[0;34m(self, state)\u001b[0m\n\u001b[1;32m 93\u001b[0m search_query \u001b[38;5;241m=\u001b[39m search_answer\u001b[38;5;241m.\u001b[39minvoke({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muser_prompt\u001b[39m\u001b[38;5;124m\"\u001b[39m: user_prompt})[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlogger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSearch Query: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msearch_query\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 97\u001b[0m answer \u001b[38;5;241m=\u001b[39m \u001b[43msearch_on_web\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msearch_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_results\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_results\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 98\u001b[0m \u001b[43m \u001b[49m\u001b[43msearch_engine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msearch_engine\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mproxy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mproxy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(answer) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mZero results found for the search query.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
55+
"File \u001b[0;32m~/AUK/Colloborations_Scrapegraphai/Scrapegraph-ai/scrapegraphai/utils/research_web.py:74\u001b[0m, in \u001b[0;36msearch_on_web\u001b[0;34m(query, search_engine, max_results, port, timeout, proxy)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPROXY: \u001b[39m\u001b[38;5;124m\"\u001b[39m, proxy)\n\u001b[1;32m 73\u001b[0m res \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m---> 74\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m url \u001b[38;5;129;01min\u001b[39;00m \u001b[43mgoogle_search\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_results\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_results\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mproxy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxy\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 75\u001b[0m res\u001b[38;5;241m.\u001b[39mappend(url)\n\u001b[1;32m 76\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m filter_pdf_links(res)\n",
56+
"\u001b[0;31mTypeError\u001b[0m: search() got an unexpected keyword argument 'num_results'"
57+
]
58+
}
59+
],
60+
"source": [
61+
"import os\n",
62+
"os.environ['AZURE_OPENAI_GPT4O_SERVICE']=\"dwtc-openai-gpt4o\"\n",
63+
"os.environ['AZURE_OPENAI_GPT4O_DEPLOYMENT']=\"gpt4o\"\n",
64+
"os.environ['AZURE_OPENAI_GPT4O_KEY']=\"3cb3875145ec425880c6974d74e10cd7\"\n",
65+
"os.environ['AZURE_OPENAI_GPT4O_API_VERSION']=\"2024-02-15-preview\"\n",
66+
"\n",
67+
"\n",
68+
"graph_config = {\n",
69+
" \"llm\": {\n",
70+
" \"model\": \"azure_openai/gpt-4o\",\n",
71+
" \"api_key\": os.environ['AZURE_OPENAI_GPT4O_KEY'],\n",
72+
" \"azure_endpoint\": f\"https://{os.environ['AZURE_OPENAI_GPT4O_SERVICE']}.openai.azure.com\",\n",
73+
" \"azure_deployment\": os.environ['AZURE_OPENAI_GPT4O_DEPLOYMENT'],\n",
74+
" \"api_version\": os.environ['AZURE_OPENAI_GPT4O_API_VERSION'],\n",
75+
" \"temperature\": 0.0,\n",
76+
" },\n",
77+
"\n",
78+
" \"loader_kwargs\": {\n",
79+
" \"proxy\" : {\n",
80+
" \"server\": '63.141.62.30:6323', \n",
81+
" \"username\": \"vzktqema\", \n",
82+
" \"password\": \"btngo4nn7n6l\",\n",
83+
" },\n",
84+
" },\n",
85+
"\n",
86+
" \"verbose\": False,\n",
87+
" \"headless\": True,\n",
88+
" \"max_sites\": 1\n",
89+
" }\n",
90+
"\n",
91+
"prompt = \"List the top 5 companies in the world by market capitalization.\"\n",
92+
"search_graph = SearchGraph(\n",
93+
" prompt=prompt,\n",
94+
" config=graph_config\n",
95+
" )\n",
96+
"result = search_graph.run()\n"
97+
]
98+
},
99+
{
100+
"cell_type": "code",
101+
"execution_count": 1,
102+
"metadata": {},
103+
"outputs": [
104+
{
105+
"ename": "ImportError",
106+
"evalue": "cannot import name 'search' from 'googlesearch' (unknown location)",
107+
"output_type": "error",
108+
"traceback": [
109+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
110+
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
111+
"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mgooglesearch\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m search\n",
112+
"\u001b[0;31mImportError\u001b[0m: cannot import name 'search' from 'googlesearch' (unknown location)"
113+
]
114+
}
115+
],
116+
"source": [
117+
"from googlesearch import search"
118+
]
119+
},
120+
{
121+
"cell_type": "code",
122+
"execution_count": 8,
123+
"metadata": {},
124+
"outputs": [
125+
{
126+
"name": "stderr",
127+
"output_type": "stream",
128+
"text": [
129+
"102.77s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
130+
]
131+
},
132+
{
133+
"name": "stdout",
134+
"output_type": "stream",
135+
"text": [
136+
"Found existing installation: google 3.0.0\n",
137+
"Uninstalling google-3.0.0:\n",
138+
" Successfully uninstalled google-3.0.0\n"
139+
]
140+
}
141+
],
142+
"source": [
143+
"!pip uninstall google -y"
144+
]
145+
},
146+
{
147+
"cell_type": "code",
148+
"execution_count": null,
149+
"metadata": {},
150+
"outputs": [],
151+
"source": [
152+
"search()"
153+
]
154+
},
155+
{
156+
"cell_type": "code",
157+
"execution_count": 4,
158+
"metadata": {},
159+
"outputs": [
160+
{
161+
"data": {
162+
"text/plain": [
163+
"'/home/funavry/anaconda3/envs/colscrap-env/lib/python3.11/site-packages/googlesearch/__init__.py'"
164+
]
165+
},
166+
"execution_count": 4,
167+
"metadata": {},
168+
"output_type": "execute_result"
169+
}
170+
],
171+
"source": [
172+
"import inspect\n",
173+
"inspect.getfile(search)"
174+
]
175+
},
176+
{
177+
"cell_type": "code",
178+
"execution_count": 1,
179+
"metadata": {},
180+
"outputs": [
181+
{
182+
"ename": "ImportError",
183+
"evalue": "cannot import name 'search' from 'googlesearch' (unknown location)",
184+
"output_type": "error",
185+
"traceback": [
186+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
187+
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
188+
"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mgooglesearch\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m search\n\u001b[1;32m 2\u001b[0m search(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGoogle\u001b[39m\u001b[38;5;124m\"\u001b[39m, num_results\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100\u001b[39m)\n",
189+
"\u001b[0;31mImportError\u001b[0m: cannot import name 'search' from 'googlesearch' (unknown location)"
190+
]
191+
}
192+
],
193+
"source": [
194+
"from googlesearch import search\n",
195+
"search(\"Google\", num_results=100)"
196+
]
197+
},
198+
{
199+
"cell_type": "code",
200+
"execution_count": 10,
201+
"metadata": {},
202+
"outputs": [],
203+
"source": [
204+
"from scrapegraphai.utils import research_web"
205+
]
206+
},
207+
{
208+
"cell_type": "code",
209+
"execution_count": null,
210+
"metadata": {},
211+
"outputs": [],
212+
"source": [
213+
"import concurrent.futures\n",
214+
"import time\n",
215+
"from googlesearch import search # Ensure you have the googlesearch package installed\n",
216+
"\n",
217+
"def fetch_url(query):\n",
218+
" # Fetch the URLs from the search query\n",
219+
" return list(search(query, stop=10)) # Fetch 10 URLs for each query\n",
220+
"\n",
221+
"def main():\n",
222+
" query = \"Weather in Pakistan\"\n",
223+
" batch_size = 50 # Number of requests to send concurrently\n",
224+
"\n",
225+
" res = []\n",
226+
" # Create a ThreadPoolExecutor to manage threads\n",
227+
" with concurrent.futures.ThreadPoolExecutor(max_workers=batch_size) as executor:\n",
228+
" # Submit multiple fetch requests to the executor\n",
229+
" future_to_url = {executor.submit(fetch_url, query): i for i in range(batch_size)}\n",
230+
" \n",
231+
" for future in concurrent.futures.as_completed(future_to_url):\n",
232+
" try:\n",
233+
" urls = future.result()\n",
234+
" res.append(urls) # Extend the results with the fetched URLs\n",
235+
" except Exception as e:\n",
236+
" print(f\"Error fetching data: {e}\")\n",
237+
"\n",
238+
" return res\n",
239+
"\n",
240+
"if __name__ == \"__main__\":\n",
241+
" result = main()\n",
242+
" print(len(result))"
243+
]
244+
},
245+
{
246+
"cell_type": "code",
247+
"execution_count": null,
248+
"metadata": {},
249+
"outputs": [],
250+
"source": []
251+
}
252+
],
253+
"metadata": {
254+
"kernelspec": {
255+
"display_name": "colscrap-env",
256+
"language": "python",
257+
"name": "python3"
258+
},
259+
"language_info": {
260+
"codemirror_mode": {
261+
"name": "ipython",
262+
"version": 3
263+
},
264+
"file_extension": ".py",
265+
"mimetype": "text/x-python",
266+
"name": "python",
267+
"nbconvert_exporter": "python",
268+
"pygments_lexer": "ipython3",
269+
"version": "3.11.10"
270+
}
271+
},
272+
"nbformat": 4,
273+
"nbformat_minor": 2
274+
}

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@ dependencies = [
2828
"free-proxy>=1.1.1",
2929
"playwright>=1.43.0",
3030
"undetected-playwright>=0.3.0",
31-
"google>=3.0.0",
3231
"langchain-ollama>=0.1.3",
3332
"simpleeval>=1.0.0",
3433
"semchunk==2.2.0",
3534
"transformers==4.44.2",
3635
"qdrant-client>=1.11.3",
37-
"fastembed>=0.3.6"
36+
"fastembed>=0.3.6",
37+
"googlesearch-python>=1.2.5"
3838
]
3939

4040
license = "MIT"

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ minify-html>=0.15.0
1515
free-proxy>=1.1.1
1616
playwright>=1.43.0
1717
undetected-playwright>=0.3.0
18-
google>=3.0.0
1918
semchunk>=1.0.1
2019
langchain-ollama>=0.1.3
21-
simpleeval>=0.9.13
20+
simpleeval>=0.9.13
21+
googlesearch-python>=1.2.5

scrapegraphai/graphs/search_graph.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def _create_graph(self) -> BaseGraph:
7272
node_config={
7373
"llm_model": self.llm_model,
7474
"max_results": self.max_results,
75+
"loader_kwargs": self.loader_kwargs,
7576
"search_engine": self.copy_config.get("search_engine")
7677
}
7778
)

scrapegraphai/nodes/search_internet_node.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def __init__(
4141
self.verbose = (
4242
False if node_config is None else node_config.get("verbose", False)
4343
)
44+
self.proxy = node_config.get("loader_kwargs", {}).get("proxy", None)
4445
self.search_engine = (
4546
node_config["search_engine"]
4647
if node_config.get("search_engine")
@@ -94,7 +95,7 @@ def execute(self, state: dict) -> dict:
9495
self.logger.info(f"Search Query: {search_query}")
9596

9697
answer = search_on_web(query=search_query, max_results=self.max_results,
97-
search_engine=self.search_engine)
98+
search_engine=self.search_engine, proxy=self.proxy)
9899

99100
if len(answer) == 0:
100101
raise ValueError("Zero results found for the search query.")

0 commit comments

Comments
 (0)