Skip to content

Commit 80ece21

Browse files
committed
feat: undected_chromedriver support
1 parent ea9ed1a commit 80ece21

File tree

2 files changed

+25
-2
lines changed

2 files changed

+25
-2
lines changed

examples/groq/smart_scraper_groq.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
"api_key": groq_key,
2222
"temperature": 0
2323
},
24-
"headless": False
24+
"headless": False,
25+
"backend": "undetected_chromedriver"
2526
}
2627

2728
# ************************************************

scrapegraphai/docloaders/chromium.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,28 @@ def __init__(
6161
self.urls = urls
6262
self.load_state = load_state
6363

64+
async def ascrape_undetected_chromedriver(self, url: str) -> str:
65+
"""
66+
Asynchronously scrape the content of a given URL using undetected chrome with Selenium.
67+
68+
Args:
69+
url (str): The URL to scrape.
70+
71+
Returns:
72+
str: The scraped HTML content or an error message if an exception occurs.
73+
74+
"""
75+
import undetected_chromedriver as uc
76+
77+
logger.info(f"Starting scraping with {self.backend}...")
78+
results = ""
79+
try:
80+
driver = uc.Chrome()
81+
results = driver.get(url).page_content
82+
except Exception as e:
83+
results = f"Error: {e}"
84+
return results
85+
6486
async def ascrape_playwright(self, url: str) -> str:
6587
"""
6688
Asynchronously scrape the content of a given URL using Playwright's async API.
@@ -75,7 +97,7 @@ async def ascrape_playwright(self, url: str) -> str:
7597
from playwright.async_api import async_playwright
7698
from undetected_playwright import Malenia
7799

78-
logger.info("Starting scraping...")
100+
logger.info(f"Starting scraping with {self.backend}...")
79101
results = ""
80102
async with async_playwright() as p:
81103
browser = await p.chromium.launch(

0 commit comments

Comments
 (0)