Skip to content

Commit 8883bce

Browse files
committed
asdd proxy integratrion
1 parent f5e7a8b commit 8883bce

File tree

1 file changed

+27
-9
lines changed

1 file changed

+27
-9
lines changed

scrapegraphai/docloaders/scrape_do.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,41 @@
11
"""
2-
scrape_do module
2+
Scrape_do module
33
"""
44
import urllib.parse
55
import requests
6+
import urllib3
67

7-
def scrape_do_fetch(token, target_url):
8+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
9+
10+
def scrape_do_fetch(token, target_url, use_proxy=False, geoCode=None, super_proxy=False):
811
"""
9-
This function takes a token and a URL as inputs.
10-
It returns the IP address of the machine associated with the given URL.
12+
Fetches the IP address of the machine associated with the given URL using Scrape.do.
1113
1214
Args:
13-
token (str): The API token for scrape.do service.
15+
token (str): The API token for Scrape.do service.
1416
target_url (str): A valid web page URL to fetch its associated IP address.
17+
use_proxy (bool): Whether to use Scrape.do proxy mode. Default is False.
18+
geoCode (str, optional): Specify the country code for
19+
geolocation-based proxies. Default is None.
20+
super_proxy (bool): If True, use Residential & Mobile Proxy Networks. Default is False.
1521
1622
Returns:
17-
str: The IP address of the machine associated with the target URL.
23+
str: The raw response from the target URL.
1824
"""
19-
2025
encoded_url = urllib.parse.quote(target_url)
21-
url = f"http://api.scrape.do?token={token}&url={encoded_url}"
22-
response = requests.request("GET", url)
26+
if use_proxy:
27+
# Create proxy mode URL
28+
proxyModeUrl = f"http://{token}:@proxy.scrape.do:8080"
29+
proxies = {
30+
"http": proxyModeUrl,
31+
"https": proxyModeUrl,
32+
}
33+
# Add optional geoCode and super proxy parameters if provided
34+
params = {"geoCode": geoCode, "super": str(super_proxy).lower()} if geoCode else {}
35+
response = requests.get(target_url, proxies=proxies, verify=False, params=params)
36+
else:
37+
# API Mode URL
38+
url = f"http://api.scrape.do?token={token}&url={encoded_url}"
39+
response = requests.get(url)
40+
2341
return response.text

0 commit comments

Comments
 (0)