Skip to content

Commit b115f96

Browse files
committed
refactoring of code and documentation
1 parent 203ee2c commit b115f96

File tree

6 files changed

+39
-7
lines changed

6 files changed

+39
-7
lines changed

scrapegraphai/graphs/csv_scraper_graph.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,38 @@
1313

1414
class CSVScraperGraph(AbstractGraph):
1515
"""
16-
SmartScraper is a comprehensive web scraping tool that automates the process of extracting
17-
information from web pages using a natural language model to interpret and answer prompts.
16+
A class representing a graph for extracting information from CSV files.
17+
18+
Attributes:
19+
prompt (str): The prompt used to generate an answer.
20+
source (str): The source of the data, which can be either a CSV
21+
file or a directory containing multiple CSV files.
22+
config (dict): Additional configuration parameters needed by some nodes in the graph.
23+
24+
Methods:
25+
__init__ (prompt: str, source: str, config: dict, schema: Optional[BaseModel] = None):
26+
Initializes the CSVScraperGraph with a prompt, source, and configuration.
27+
28+
__init__ initializes the CSVScraperGraph class. It requires the user's prompt as input,
29+
along with the source of the data (which can be either a single CSV file or a directory
30+
containing multiple CSV files), and any necessary configuration parameters.
31+
32+
Methods:
33+
_create_graph (): Creates the graph of nodes representing the workflow for web scraping.
34+
35+
_create_graph generates the web scraping process workflow
36+
represented by a directed acyclic graph.
37+
This method is used internally to create the scraping pipeline
38+
without having to execute it immediately. The result is a BaseGraph instance
39+
containing nodes that fetch and process data from a source, and other helper functions.
40+
41+
Methods:
42+
run () -> str: Executes the web scraping process and returns
43+
the answer to the prompt as a string.
44+
run runs the CSVScraperGraph class to extract information from a CSV file based
45+
on the user's prompt. It requires no additional arguments since all necessary data
46+
is stored within the class instance. The method fetches the relevant chunks of text or speech,
47+
generates an answer based on these chunks, and returns this answer as a string.
1848
"""
1949

2050
def __init__(self, prompt: str, source: str, config: dict, schema: Optional[BaseModel] = None):

scrapegraphai/graphs/pdf_scraper_multi_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ class PdfScraperMultiGraph(AbstractGraph):
4141
>>> result = search_graph.run()
4242
"""
4343

44-
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
44+
def __init__(self, prompt: str, source: List[str],
45+
config: dict, schema: Optional[BaseModel] = None):
4546

4647
if all(isinstance(value, str) for value in config.values()):
4748
self.copy_config = copy(config)

scrapegraphai/graphs/xml_scraper_multi_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ class XMLScraperMultiGraph(AbstractGraph):
4343
>>> result = search_graph.run()
4444
"""
4545

46-
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
46+
def __init__(self, prompt: str, source: List[str],
47+
config: dict, schema: Optional[BaseModel] = None):
4748

4849
if all(isinstance(value, str) for value in config.values()):
4950
self.copy_config = copy(config)

scrapegraphai/telemetry/telemetry.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,4 +188,4 @@ def wrapped_fn(*args, **kwargs):
188188
log_event("function_usage", {"function_name": function_name})
189189
except Exception as e:
190190
logger.debug(f"Failed to send telemetry for function usage. Encountered: {e}")
191-
return wrapped_fn
191+
return wrapped_fn

scrapegraphai/utils/cleanup_html.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
"""
22
Module for minimizing the code
33
"""
4+
from urllib.parse import urljoin
45
from bs4 import BeautifulSoup
56
from minify_html import minify
6-
from urllib.parse import urljoin
77

88
def cleanup_html(html_content: str, base_url: str) -> str:
99
"""

scrapegraphai/utils/convert_to_md.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
convert_to_md modul
2+
convert_to_md module
33
"""
44
from urllib.parse import urlparse
55
import html2text

0 commit comments

Comments
 (0)