|
1 | 1 | """
|
2 | 2 | Module for rotating proxies
|
3 | 3 | """
|
| 4 | + |
| 5 | +import ipaddress |
| 6 | +import random |
| 7 | +from typing import List, Optional, Set, TypedDict |
| 8 | + |
| 9 | +import requests |
| 10 | +from fp.errors import FreeProxyException |
4 | 11 | from fp.fp import FreeProxy
|
5 | 12 |
|
6 | 13 |
|
7 |
| -def proxy_generator(num_ips: int) -> list: |
8 |
| - """ |
9 |
| - Generates a specified number of proxy IP addresses using the FreeProxy library. |
| 14 | +class ProxyBrokerCriteria(TypedDict, total=False): |
| 15 | + """proxy broker criteria""" |
| 16 | + |
| 17 | + anonymous: bool |
| 18 | + countryset: Set[str] |
| 19 | + secure: bool |
| 20 | + timeout: float |
| 21 | + search_outside_if_empty: bool |
| 22 | + |
| 23 | + |
| 24 | +class ProxySettings(TypedDict, total=False): |
| 25 | + """proxy settings""" |
| 26 | + |
| 27 | + server: str |
| 28 | + bypass: str |
| 29 | + username: str |
| 30 | + password: str |
| 31 | + |
| 32 | + |
| 33 | +class Proxy(ProxySettings): |
| 34 | + """proxy server information""" |
| 35 | + |
| 36 | + criteria: ProxyBrokerCriteria |
| 37 | + |
| 38 | + |
| 39 | +def search_proxy_servers( |
| 40 | + anonymous: bool = True, |
| 41 | + countryset: Optional[Set[str]] = None, |
| 42 | + secure: bool = False, |
| 43 | + timeout: float = 5.0, |
| 44 | + max_shape: int = 5, |
| 45 | + search_outside_if_empty: bool = True, |
| 46 | +) -> List[str]: |
| 47 | + """search for proxy servers that match the specified broker criteria |
10 | 48 |
|
11 | 49 | Args:
|
12 |
| - num_ips (int): The number of proxy IPs to generate and rotate through. |
| 50 | + anonymous: whether proxy servers should have minimum level-1 anonymity. |
| 51 | + countryset: admissible proxy servers locations. |
| 52 | + secure: whether proxy servers should support HTTP or HTTPS; defaults to HTTP; |
| 53 | + timeout: The maximum timeout for proxy responses; defaults to 5.0 seconds. |
| 54 | + max_shape: The maximum number of proxy servers to return; defaults to 5. |
| 55 | + search_outside_if_empty: whether countryset should be extended if empty. |
13 | 56 |
|
14 | 57 | Returns:
|
15 |
| - list: A list of proxy IP addresses. |
| 58 | + A list of proxy server URLs matching the criteria. |
16 | 59 |
|
17 | 60 | Example:
|
18 |
| - >>> proxy_generator(5) |
| 61 | + >>> search_proxy_servers( |
| 62 | + ... anonymous=True, |
| 63 | + ... countryset={"GB", "US"}, |
| 64 | + ... secure=True, |
| 65 | + ... timeout=1.0 |
| 66 | + ... max_shape=2 |
| 67 | + ... ) |
19 | 68 | [
|
20 |
| - '192.168.1.1:8080', |
21 |
| - '103.10.63.135:8080', |
22 |
| - '176.9.75.42:8080', |
23 |
| - '37.57.216.2:8080', |
24 |
| - '113.20.31.250:8080' |
| 69 | + "http://103.10.63.135:8080", |
| 70 | + "http://113.20.31.250:8080", |
25 | 71 | ]
|
| 72 | + """ |
| 73 | + proxybroker = FreeProxy( |
| 74 | + anonym=anonymous, |
| 75 | + country_id=countryset, |
| 76 | + elite=True, |
| 77 | + https=secure, |
| 78 | + timeout=timeout, |
| 79 | + ) |
| 80 | + |
| 81 | + def search_all(proxybroker: FreeProxy, k: int, search_outside: bool) -> List[str]: |
| 82 | + candidateset = proxybroker.get_proxy_list(search_outside) |
| 83 | + random.shuffle(candidateset) |
| 84 | + |
| 85 | + positive = set() |
| 86 | + |
| 87 | + for address in candidateset: |
| 88 | + setting = {proxybroker.schema: f"http://{address}"} |
| 89 | + |
| 90 | + try: |
| 91 | + server = proxybroker._FreeProxy__check_if_proxy_is_working(setting) |
26 | 92 |
|
27 |
| - This function fetches fresh proxies and indexes them, making it easy to manage multiple proxy configurations. |
| 93 | + if not server: |
| 94 | + continue |
| 95 | + |
| 96 | + positive.add(server) |
| 97 | + |
| 98 | + if len(positive) < k: |
| 99 | + continue |
| 100 | + |
| 101 | + return list(positive) |
| 102 | + |
| 103 | + except requests.exceptions.RequestException: |
| 104 | + continue |
| 105 | + |
| 106 | + n = len(positive) |
| 107 | + |
| 108 | + if n < k and search_outside: |
| 109 | + proxybroker.country_id = None |
| 110 | + |
| 111 | + try: |
| 112 | + negative = set(search_all(proxybroker, k - n, False)) |
| 113 | + except FreeProxyException: |
| 114 | + negative = set() |
| 115 | + |
| 116 | + positive = positive | negative |
| 117 | + |
| 118 | + if not positive: |
| 119 | + raise FreeProxyException("missing proxy servers for criteria") |
| 120 | + |
| 121 | + return list(positive) |
| 122 | + |
| 123 | + return search_all(proxybroker, max_shape, search_outside_if_empty) |
| 124 | + |
| 125 | + |
| 126 | +def _parse_proxy(proxy: ProxySettings) -> ProxySettings: |
| 127 | + """parses a proxy configuration with known server |
| 128 | +
|
| 129 | + Args: |
| 130 | + proxy: The proxy configuration to parse. |
| 131 | +
|
| 132 | + Returns: |
| 133 | + A 'playwright' compliant proxy configuration. |
28 | 134 | """
|
| 135 | + assert "server" in proxy, "missing server in the proxy configuration" |
| 136 | + |
| 137 | + auhtorization = [x in proxy for x in ("username", "password")] |
| 138 | + |
| 139 | + message = "username and password must be provided in pairs or not at all" |
| 140 | + |
| 141 | + assert all(auhtorization) or not any(auhtorization), message |
| 142 | + |
| 143 | + parsed = {"server": proxy["server"]} |
| 144 | + |
| 145 | + if proxy.get("bypass"): |
| 146 | + parsed["bypass"] = proxy["bypass"] |
| 147 | + |
| 148 | + if all(auhtorization): |
| 149 | + parsed["username"] = proxy["username"] |
| 150 | + parsed["password"] = proxy["password"] |
| 151 | + |
| 152 | + return parsed |
| 153 | + |
| 154 | + |
| 155 | +def _search_proxy(proxy: Proxy) -> ProxySettings: |
| 156 | + """searches for a proxy server matching the specified broker criteria |
| 157 | +
|
| 158 | + Args: |
| 159 | + proxy: The proxy configuration to search for. |
| 160 | +
|
| 161 | + Returns: |
| 162 | + A 'playwright' compliant proxy configuration. |
| 163 | + """ |
| 164 | + server = search_proxy_servers(max_shape=1, **proxy.get("criteria", {}))[0] |
| 165 | + |
| 166 | + return {"server": server} |
| 167 | + |
| 168 | + |
| 169 | +def is_ipv4_address(address: str) -> bool: |
| 170 | + """If a proxy address conforms to a IPv4 address""" |
| 171 | + try: |
| 172 | + ipaddress.IPv4Address(address) |
| 173 | + return True |
| 174 | + except ipaddress.AddressValueError: |
| 175 | + return False |
| 176 | + |
| 177 | + |
| 178 | +def parse_or_search_proxy(proxy: Proxy) -> ProxySettings: |
| 179 | + """parses a proxy configuration or searches for a new one matching |
| 180 | + the specified broker criteria |
| 181 | +
|
| 182 | + Args: |
| 183 | + proxy: The proxy configuration to parse or search for. |
| 184 | +
|
| 185 | + Returns: |
| 186 | + A 'playwright' compliant proxy configuration. |
| 187 | +
|
| 188 | + Notes: |
| 189 | + - If the proxy server is a IP address, it is assumed to be |
| 190 | + a proxy server address. |
| 191 | + - If the proxy server is 'broker', a proxy server is searched for |
| 192 | + based on the provided broker criteria. |
| 193 | +
|
| 194 | + Example: |
| 195 | + >>> proxy = { |
| 196 | + ... "server": "broker", |
| 197 | + ... "criteria": { |
| 198 | + ... "anonymous": True, |
| 199 | + ... "countryset": {"GB", "US"}, |
| 200 | + ... "secure": True, |
| 201 | + ... "timeout": 5.0 |
| 202 | + ... "search_outside_if_empty": False |
| 203 | + ... } |
| 204 | + ... } |
| 205 | +
|
| 206 | + >>> parse_or_search_proxy(proxy) |
| 207 | + { |
| 208 | + "server": "<proxy-server-matching-criteria>", |
| 209 | + } |
| 210 | +
|
| 211 | + Example: |
| 212 | + >>> proxy = { |
| 213 | + ... "server": "192.168.1.1:8080", |
| 214 | + ... "username": "<username>", |
| 215 | + ... "password": "<password>" |
| 216 | + ... } |
| 217 | +
|
| 218 | + >>> parse_or_search_proxy(proxy) |
| 219 | + { |
| 220 | + "server": "192.168.1.1:8080", |
| 221 | + "username": "<username>", |
| 222 | + "password": "<password>" |
| 223 | + } |
| 224 | + """ |
| 225 | + assert "server" in proxy, "missing server in the proxy configuration" |
| 226 | + |
| 227 | + server_address = proxy["server"].split(":", maxsplit=1)[0] |
| 228 | + |
| 229 | + if is_ipv4_address(server_address): |
| 230 | + return _parse_proxy(proxy) |
29 | 231 |
|
30 |
| - res = [] |
| 232 | + assert proxy["server"] == "broker", "unknown proxy server" |
31 | 233 |
|
32 |
| - for i in range(0, num_ips): |
33 |
| - res.append(FreeProxy().get()) |
34 |
| - return res |
| 234 | + return _search_proxy(proxy) |
0 commit comments