Skip to content

Commit bb851d7

Browse files
committed
feat: add optional headers to request
1 parent 503dbd1 commit bb851d7

File tree

5 files changed

+63
-4
lines changed

5 files changed

+63
-4
lines changed
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from scrapegraph_py import Client
2+
from scrapegraph_py.logger import sgai_logger
3+
4+
sgai_logger.set_logging(level="INFO")
5+
6+
# Initialize the client with explicit API key
7+
sgai_client = Client(api_key="your-api-key-here")
8+
9+
# SmartScraper request
10+
response = sgai_client.smartscraper(
11+
website_url="https://example.com",
12+
user_prompt="Extract the main heading, description, and summary of the webpage",
13+
headers={
14+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
15+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
16+
"Accept-Language": "en-US,en;q=0.9",
17+
"Accept-Encoding": "gzip, deflate, br",
18+
"Connection": "keep-alive",
19+
"Upgrade-Insecure-Requests": "1",
20+
},
21+
)
22+
23+
24+
# Print the response
25+
print(f"Request ID: {response['request_id']}")
26+
print(f"Result: {response['result']}")
27+
28+
sgai_client.close()

scrapegraph-py/scrapegraph_py/async_client.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,11 +132,15 @@ async def _make_request(self, method: str, url: str, **kwargs) -> Any:
132132
logger.info(f"⏳ Waiting {retry_delay}s before retry {attempt + 2}")
133133
await asyncio.sleep(retry_delay)
134134

135-
async def markdownify(self, website_url: str):
135+
async def markdownify(
136+
self, website_url: str, headers: Optional[dict[str, str]] = None
137+
):
136138
"""Send a markdownify request"""
137139
logger.info(f"🔍 Starting markdownify request for {website_url}")
140+
if headers:
141+
logger.debug("🔧 Using custom headers")
138142

139-
request = MarkdownifyRequest(website_url=website_url)
143+
request = MarkdownifyRequest(website_url=website_url, headers=headers)
140144
logger.debug("✅ Request validation passed")
141145

142146
result = await self._make_request(
@@ -164,6 +168,7 @@ async def smartscraper(
164168
user_prompt: str,
165169
website_url: Optional[str] = None,
166170
website_html: Optional[str] = None,
171+
headers: Optional[dict[str, str]] = None,
167172
output_schema: Optional[BaseModel] = None,
168173
):
169174
"""Send a smartscraper request"""
@@ -172,11 +177,14 @@ async def smartscraper(
172177
logger.debug(f"🌐 URL: {website_url}")
173178
if website_html:
174179
logger.debug("📄 Using provided HTML content")
180+
if headers:
181+
logger.debug("🔧 Using custom headers")
175182
logger.debug(f"📝 Prompt: {user_prompt}")
176183

177184
request = SmartScraperRequest(
178185
website_url=website_url,
179186
website_html=website_html,
187+
headers=headers,
180188
user_prompt=user_prompt,
181189
output_schema=output_schema,
182190
)

scrapegraph-py/scrapegraph_py/client.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,11 +144,13 @@ def _make_request(self, method: str, url: str, **kwargs) -> Any:
144144
logger.error(f"🔴 Connection Error: {str(e)}")
145145
raise ConnectionError(f"Failed to connect to API: {str(e)}")
146146

147-
def markdownify(self, website_url: str):
147+
def markdownify(self, website_url: str, headers: Optional[dict[str, str]] = None):
148148
"""Send a markdownify request"""
149149
logger.info(f"🔍 Starting markdownify request for {website_url}")
150+
if headers:
151+
logger.debug("🔧 Using custom headers")
150152

151-
request = MarkdownifyRequest(website_url=website_url)
153+
request = MarkdownifyRequest(website_url=website_url, headers=headers)
152154
logger.debug("✅ Request validation passed")
153155

154156
result = self._make_request(
@@ -174,6 +176,7 @@ def smartscraper(
174176
user_prompt: str,
175177
website_url: Optional[str] = None,
176178
website_html: Optional[str] = None,
179+
headers: Optional[dict[str, str]] = None,
177180
output_schema: Optional[BaseModel] = None,
178181
):
179182
"""Send a smartscraper request"""
@@ -182,11 +185,14 @@ def smartscraper(
182185
logger.debug(f"🌐 URL: {website_url}")
183186
if website_html:
184187
logger.debug("📄 Using provided HTML content")
188+
if headers:
189+
logger.debug("🔧 Using custom headers")
185190
logger.debug(f"📝 Prompt: {user_prompt}")
186191

187192
request = SmartScraperRequest(
188193
website_url=website_url,
189194
website_html=website_html,
195+
headers=headers,
190196
user_prompt=user_prompt,
191197
output_schema=output_schema,
192198
)

scrapegraph-py/scrapegraph_py/models/markdownify.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,21 @@
11
# Models for markdownify endpoint
22

3+
from typing import Optional
34
from uuid import UUID
45

56
from pydantic import BaseModel, Field, model_validator
67

78

89
class MarkdownifyRequest(BaseModel):
910
website_url: str = Field(..., example="https://scrapegraphai.com/")
11+
headers: Optional[dict[str, str]] = Field(
12+
None,
13+
example={
14+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
15+
"Cookie": "cookie1=value1; cookie2=value2",
16+
},
17+
description="Optional headers to send with the request, including cookies and user agent",
18+
)
1019

1120
@model_validator(mode="after")
1221
def validate_url(self) -> "MarkdownifyRequest":

scrapegraph-py/scrapegraph_py/models/smartscraper.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ class SmartScraperRequest(BaseModel):
2020
example="<html><body><h1>Title</h1><p>Content</p></body></html>",
2121
description="HTML content, maximum size 2MB",
2222
)
23+
headers: Optional[dict[str, str]] = Field(
24+
None,
25+
example={
26+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
27+
"Cookie": "cookie1=value1; cookie2=value2",
28+
},
29+
description="Optional headers to send with the request, including cookies and user agent",
30+
)
2331
output_schema: Optional[Type[BaseModel]] = None
2432

2533
@model_validator(mode="after")

0 commit comments

Comments
 (0)