Skip to content

Commit f6077d1

Browse files
committed
feat: add new proxy rotation function
1 parent 44bc919 commit f6077d1

File tree

3 files changed

+43
-2
lines changed

3 files changed

+43
-2
lines changed

scrapegraphai/nodes/fetch_node.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from langchain_core.documents import Document
88
from .base_node import BaseNode
99
from ..utils.remover import remover
10+
from ..utils.proxy_rotation import proxy_rotation
1011

1112

1213
class FetchNode(BaseNode):
@@ -37,13 +38,16 @@ class FetchNode(BaseNode):
3738
to succeed.
3839
"""
3940

40-
def __init__(self, input: str, output: List[str], node_name: str = "Fetch"):
41+
def __init__(self, input: str, output: List[str], num_prox: int = True,
42+
node_name: str = "Fetch"):
4143
"""
4244
Initializes the FetchHTMLNode with a node name and node type.
4345
Arguments:
4446
node_name (str): name of the node
47+
prox_rotation (bool): if you wamt to rotate proxies
4548
"""
4649
super().__init__(node_name, "node", input, output, 1)
50+
self.num_prox = num_prox
4751

4852
def execute(self, state):
4953
"""
@@ -78,7 +82,11 @@ def execute(self, state):
7882

7983
# if it is a URL
8084
else:
81-
loader = AsyncHtmlLoader(source)
85+
if self.num_prox > 1:
86+
loader = AsyncHtmlLoader(
87+
source, proxies=proxy_rotation(self.num_prox))
88+
else:
89+
loader = AsyncHtmlLoader(source)
8290
document = loader.load()
8391
compressed_document = [
8492
Document(page_content=remover(str(document)))]

scrapegraphai/utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@
55
from .convert_to_csv import convert_to_csv
66
from .convert_to_json import convert_to_json
77
from .prettify_exec_info import prettify_exec_info
8+
from .proxy_rotation import proxy_rotation

scrapegraphai/utils/proxy_rotation.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""
2+
Module for rotating proxies
3+
"""
4+
from fp.fp import FreeProxy
5+
6+
7+
def proxy_rotation(num_ips: int):
8+
"""
9+
Rotates through a specified number of proxy IPs using the FreeProxy library.
10+
11+
Args:
12+
num_ips (int): The number of proxy IPs to rotate through.
13+
14+
Returns:
15+
dict: A dictionary containing the rotated proxy IPs, indexed by their position in rotation.
16+
17+
Example:
18+
>>> proxy_rotation(5)
19+
{
20+
0: '192.168.1.1:8080',
21+
1: '103.10.63.135:8080',
22+
2: '176.9.75.42:8080',
23+
3: '37.57.216.2:8080',
24+
4: '113.20.31.250:8080'
25+
}
26+
"""
27+
res = {}
28+
29+
for i in range(0, num_ips):
30+
res[i] = FreeProxy().get()
31+
32+
return res

0 commit comments

Comments
 (0)