Skip to content

Commit 167f970

Browse files
committed
feat: fetch_node improved
1 parent 8883bce commit 167f970

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

scrapegraphai/nodes/fetch_node.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,14 @@ def handle_web_source(self, state, source):
285285
metadata={"source": source}) for content in data]
286286
elif self.scrape_do is not None:
287287
from ..docloaders.scrape_do import scrape_do_fetch
288-
data = scrape_do_fetch(self.scrape_do.get("api_key"),
289-
source)
288+
if self.scrape_do.get("use_proxy") is None or self.scrape_do.get("geoCode") is None or self.scrape_do.get("super_proxy") is None:
289+
data = scrape_do_fetch(self.scrape_do.get("api_key"),
290+
source)
291+
else:
292+
data = scrape_do_fetch(self.scrape_do.get("api_key"),
293+
source, self.scrape_do.get("use_proxy"),
294+
self.scrape_do.get("geoCode"),
295+
self.scrape_do.get("super_proxy"))
290296

291297
document = [Document(page_content=data,
292298
metadata={"source": source})]
@@ -295,7 +301,7 @@ def handle_web_source(self, state, source):
295301
document = loader.load()
296302

297303
if not document or not document[0].page_content.strip():
298-
raise ValueError("""No HTML body content found in
304+
raise ValueError("""No HTML body content found in
299305
the document fetched by ChromiumLoader.""")
300306
parsed_content = document[0].page_content
301307

0 commit comments

Comments
 (0)