ScrapeGraphAI
diff --git a/‎scrapegraphai/graphs/abstract_graph.py
Lines changed: 11 additions & 8 deletions b/‎scrapegraphai/graphs/abstract_graph.py
Lines changed: 11 additions & 8 deletions
diff --git a/‎scrapegraphai/graphs/base_graph.py
Lines changed: 2 additions & 15 deletions b/‎scrapegraphai/graphs/base_graph.py
Lines changed: 2 additions & 15 deletions
diff --git a/‎scrapegraphai/graphs/csv_scraper_graph.py
Lines changed: 2 additions & 1 deletion b/‎scrapegraphai/graphs/csv_scraper_graph.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎scrapegraphai/graphs/csv_scraper_multi_graph.py
Lines changed: 0 additions & 10 deletions b/‎scrapegraphai/graphs/csv_scraper_multi_graph.py
Lines changed: 0 additions & 10 deletions
diff --git a/‎scrapegraphai/graphs/deep_scraper_graph.py
Lines changed: 7 additions & 5 deletions b/‎scrapegraphai/graphs/deep_scraper_graph.py
Lines changed: 7 additions & 5 deletions
diff --git a/‎scrapegraphai/graphs/json_scraper_graph.py
Lines changed: 0 additions & 1 deletion b/‎scrapegraphai/graphs/json_scraper_graph.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎scrapegraphai/graphs/json_scraper_multi_graph.py
Lines changed: 2 additions & 10 deletions b/‎scrapegraphai/graphs/json_scraper_multi_graph.py
Lines changed: 2 additions & 10 deletions
diff --git a/‎scrapegraphai/graphs/markdown_scraper_graph.py
Lines changed: 5 additions & 1 deletion b/‎scrapegraphai/graphs/markdown_scraper_graph.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎scrapegraphai/graphs/markdown_scraper_multi_graph.py
Lines changed: 2 additions & 2 deletions b/‎scrapegraphai/graphs/markdown_scraper_multi_graph.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎scrapegraphai/graphs/omni_search_graph.py
Lines changed: 0 additions & 10 deletions b/‎scrapegraphai/graphs/omni_search_graph.py
Lines changed: 0 additions & 10 deletions
diff --git a/‎scrapegraphai/graphs/pdf_scraper_graph.py
Lines changed: 0 additions & 1 deletion b/‎scrapegraphai/graphs/pdf_scraper_graph.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎scrapegraphai/graphs/pdf_scraper_multi_graph.py
Lines changed: 0 additions & 8 deletions b/‎scrapegraphai/graphs/pdf_scraper_multi_graph.py
Lines changed: 0 additions & 8 deletions
diff --git a/‎scrapegraphai/graphs/script_creator_graph.py
Lines changed: 0 additions & 1 deletion b/‎scrapegraphai/graphs/script_creator_graph.py
Lines changed: 0 additions & 1 deletion
@@ -1,7 +1,6 @@
 """
 AbstractGraph Module
 """
-
 from abc import ABC, abstractmethod
 from typing import Optional
 import uuid
@@ -122,7 +121,7 @@ def _create_llm(self, llm_config: dict) -> object:
         llm_defaults = {"temperature": 0, "streaming": False}
         llm_params = {**llm_defaults, **llm_config}
         rate_limit_params = llm_params.pop("rate_limit", {})
-        
+
         if rate_limit_params:
             requests_per_second = rate_limit_params.get("requests_per_second")
             max_retries = rate_limit_params.get("max_retries")
@@ -138,7 +137,7 @@ def _create_llm(self, llm_config: dict) -> object:
                 self.model_token = llm_params["model_tokens"]
             except KeyError as exc:
                 raise KeyError("model_tokens not specified") from exc
-            return llm_params["model_instance"]    
+            return llm_params["model_instance"]
 
         known_providers = {"openai", "azure_openai", "google_genai", "google_vertexai",
                         "ollama", "oneapi", "nvidia", "groq", "anthropic", "bedrock", "mistralai",
@@ -149,16 +148,18 @@ def _create_llm(self, llm_config: dict) -> object:
         llm_params["model"] = split_model_provider[1]
 
         if llm_params["model_provider"] not in known_providers:
-            raise ValueError(f"Provider {llm_params['model_provider']} is not supported. If possible, try to use a model instance instead.")
+            raise ValueError(f"""Provider {llm_params['model_provider']} is not supported. 
+                             If possible, try to use a model instance instead.""")
 
         try:
             self.model_token = models_tokens[llm_params["model_provider"]][llm_params["model"]]
         except KeyError:
-            print(f"Model {llm_params['model_provider']}/{llm_params['model']} not found, using default token size (8192)")
+            print(f"""Model {llm_params['model_provider']}/{llm_params['model']} not found, 
+                  using default token size (8192)""")
             self.model_token = 8192
 
         try:
-            if llm_params["model_provider"] not in {"oneapi", "nvidia", "ernie", "deepseek", "togetherai"}:
+            if llm_params["model_provider"] not in {"oneapi","nvidia","ernie","deepseek","togetherai"}:
                 if llm_params["model_provider"] == "bedrock":
                     llm_params["model_kwargs"] = { "temperature" : llm_params.pop("temperature") }
                 with warnings.catch_warnings():
@@ -181,14 +182,16 @@ def _create_llm(self, llm_config: dict) -> object:
                     try:
                         from langchain_together import ChatTogether
                     except ImportError:
-                        raise ImportError("The langchain_together module is not installed. Please install it using `pip install scrapegraphai[other-language-models]`.")
+                        raise ImportError("""The langchain_together module is not installed. 
+                                          Please install it using `pip install scrapegraphai[other-language-models]`.""")
                     return ChatTogether(**llm_params)
 
                 elif model_provider == "nvidia":
                     try:
                         from langchain_nvidia_ai_endpoints import ChatNVIDIA
                     except ImportError:
-                        raise ImportError("The langchain_nvidia_ai_endpoints module is not installed. Please install it using `pip install scrapegraphai[other-language-models]`.")
+                        raise ImportError("""The langchain_nvidia_ai_endpoints module is not installed. 
+                                          Please install it using `pip install scrapegraphai[other-language-models]`.""")
                     return ChatNVIDIA(**llm_params)
 
         except Exception as e:
 
@@ -116,36 +116,28 @@ def _execute_standard(self, initial_state: dict) -> Tuple[dict, list]:
             curr_time = time.time()
             current_node = next(node for node in self.nodes if node.node_name == current_node_name)
 
-            # check if there is a "source" key in the node config
             if current_node.__class__.__name__ == "FetchNode":
-                # get the second key name of the state dictionary
                 source_type = list(state.keys())[1]
                 if state.get("user_prompt", None):
-                    # Set 'prompt' if 'user_prompt' is a string, otherwise None
                     prompt = state["user_prompt"] if isinstance(state["user_prompt"], str) else None
 
-                # Convert 'local_dir' source type to 'html_dir'
                 if source_type == "local_dir":
                     source_type = "html_dir"
                 elif source_type == "url":
-                    # If the source is a list, add string URLs to 'source'
                     if isinstance(state[source_type], list):
                         for url in state[source_type]:
                             if isinstance(url, str):
                                 source.append(url)
-                    # If the source is a single string, add it to 'source'
                     elif isinstance(state[source_type], str):
                         source.append(state[source_type])
 
-            # check if there is an "llm_model" variable in the class
             if hasattr(current_node, "llm_model") and llm_model is None:
                 llm_model = current_node.llm_model
                 if hasattr(llm_model, "model_name"):
                     llm_model = llm_model.model_name
                 elif hasattr(llm_model, "model"):
                     llm_model = llm_model.model
 
-            # check if there is an "embedder_model" variable in the class
             if hasattr(current_node, "embedder_model") and embedder_model is None:
                 embedder_model = current_node.embedder_model
                 if hasattr(embedder_model, "model_name"):
@@ -157,7 +149,6 @@ def _execute_standard(self, initial_state: dict) -> Tuple[dict, list]:
                 if isinstance(current_node.node_config,dict):
                     if current_node.node_config.get("schema", None) and schema is None:
                         if not  isinstance(current_node.node_config["schema"], dict):
-                            # convert to dict
                             try:
                                 schema = current_node.node_config["schema"].schema()
                             except Exception as e:
@@ -220,7 +211,6 @@ def _execute_standard(self, initial_state: dict) -> Tuple[dict, list]:
             "exec_time": total_exec_time,
         })
 
-        # Log the graph execution telemetry
         graph_execution_time = time.time() - start_time
         response = state.get("answer", None) if source_type == "url" else None
         content = state.get("parsed_doc", None) if response is not None else None
@@ -272,13 +262,10 @@ def append_node(self, node):
 
         # if node name already exists in the graph, raise an exception
         if node.node_name in {n.node_name for n in self.nodes}:
-            raise ValueError(f"Node with name '{node.node_name}' already exists in the graph. You can change it by setting the 'node_name' attribute.")
+            raise ValueError(f"""Node with name '{node.node_name}' already exists in the graph.
+                             You can change it by setting the 'node_name' attribute.""")
 
-        # get the last node in the list
         last_node = self.nodes[-1]
-        # add the edge connecting the last node to the new node
         self.raw_edges.append((last_node, node))
-        # add the node to the list of nodes
         self.nodes.append(node)
-        # update the edges connecting the last node to the new node
         self.edges = self._create_edges({e for e in self.raw_edges})
@@ -43,7 +43,8 @@ class CSVScraperGraph(AbstractGraph):
             the answer to the prompt as a string.
         run runs the CSVScraperGraph class to extract information from a CSV file based 
             on the user's prompt. It requires no additional arguments since all necessary data 
-            is stored within the class instance. The method fetches the relevant chunks of text or speech,
+            is stored within the class instance. 
+            The method fetches the relevant chunks of text or speech,
             generates an answer based on these chunks, and returns this answer as a string.
     """
 
 
@@ -4,8 +4,6 @@
 
 from typing import List, Optional
 from pydantic import BaseModel
-
-
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from .csv_scraper_graph import CSVScraperGraph
@@ -60,20 +58,12 @@ def _create_graph(self) -> BaseGraph:
             BaseGraph: A graph instance representing the web scraping and searching workflow.
         """
 
-        # ************************************************
-        # Create a CSVScraperGraph instance
-        # ************************************************
-
         smart_scraper_instance = CSVScraperGraph(
             prompt="",
             source="",
             config=self.copy_config,
         )
 
-        # ************************************************
-        # Define the graph nodes
-        # ************************************************
-
         graph_iterator_node = GraphIteratorNode(
             input="user_prompt & jsons",
             output=["results"],
 
@@ -1,7 +1,6 @@
 """
 DeepScraperGraph Module
 """
-
 from typing import Optional
 from pydantic import BaseModel
 from .base_graph import BaseGraph
@@ -54,7 +53,7 @@ class DeepScraperGraph(AbstractGraph):
     """
 
     def __init__(self, prompt: str, source: str, config: dict, schema: Optional[BaseModel] = None):
-    
+
         super().__init__(prompt, config, source, schema)
 
         self.input_key = "url" if source.startswith("http") else "local_dir"
@@ -79,7 +78,7 @@ def _create_repeated_graph(self) -> BaseGraph:
                 "llm_model": self.llm_model
             }
         )
-       
+
         generate_answer_node = GenerateAnswerNode(
             input="user_prompt & (relevant_chunks | parsed_doc | doc)",
             output=["answer"],
@@ -89,13 +88,15 @@ def _create_repeated_graph(self) -> BaseGraph:
                 "schema": self.schema
             }
         )
+
         search_node = SearchLinkNode(
             input="user_prompt & relevant_chunks",
             output=["relevant_links"],
             node_config={
                 "llm_model": self.llm_model,
             }
         )
+
         graph_iterator_node = GraphIteratorNode(
             input="user_prompt & relevant_links",
             output=["results"],
@@ -104,6 +105,7 @@ def _create_repeated_graph(self) -> BaseGraph:
                 "batchsize": 1
             }
         )
+
         merge_answers_node = MergeAnswersNode(
             input="user_prompt & results",
             output=["answer"],
@@ -143,8 +145,8 @@ def _create_graph(self) -> BaseGraph:
         """
 
         base_graph = self._create_repeated_graph()
-        graph_iterator_node = list(filter(lambda x: x.node_name == "GraphIterator", base_graph.nodes))[0]
-        # Graph iterator will repeat the same graph for multiple hyperlinks found within input webpage
+        graph_iterator_node = list(filter(lambda x: x.node_name == "GraphIterator", 
+                                          base_graph.nodes))[0]
         graph_iterator_node.node_config["graph_instance"] = self
         return base_graph
 
 
@@ -1,7 +1,6 @@
 """
 JSONScraperGraph Module
 """
-
 from typing import Optional
 from pydantic import BaseModel
 from .base_graph import BaseGraph
 
@@ -5,7 +5,6 @@
 from copy import deepcopy
 from typing import List, Optional
 from pydantic import BaseModel
-
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from .json_scraper_graph import JSONScraperGraph
@@ -43,7 +42,8 @@ class JSONScraperMultiGraph(AbstractGraph):
         >>> result = search_graph.run()
     """
 
-    def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
+    def __init__(self, prompt: str, source: List[str], 
+                 config: dict, schema: Optional[BaseModel] = None):
 
         self.max_results = config.get("max_results", 3)
 
@@ -61,21 +61,13 @@ def _create_graph(self) -> BaseGraph:
             BaseGraph: A graph instance representing the web scraping and searching workflow.
         """
 
-        # ************************************************
-        # Create a JSONScraperGraph instance
-        # ************************************************
-
         smart_scraper_instance = JSONScraperGraph(
             prompt="",
             source="",
             config=self.copy_config,
             schema=self.copy_schema
         )
 
-        # ************************************************
-        # Define the graph nodes
-        # ************************************************
-
         graph_iterator_node = GraphIteratorNode(
             input="user_prompt & jsons",
             output=["results"],
 
@@ -1,3 +1,6 @@
+"""
+md_scraper module
+"""
 from typing import Optional
 import logging
 from pydantic import BaseModel
@@ -17,7 +20,8 @@ class MDScraperGraph(AbstractGraph):
         config (dict): Configuration parameters for the graph.
         schema (BaseModel): The schema for the graph output.
         llm_model: An instance of a language model client, configured for generating answers.
-        embedder_model: An instance of an embedding model client, configured for generating embeddings.
+        embedder_model: An instance of an embedding model client, 
+                        configured for generating embeddings.
         verbose (bool): A flag indicating whether to show print statements during execution.
         headless (bool): A flag indicating whether to run the graph in headless mode.
 
 
@@ -1,7 +1,6 @@
 """
 MDScraperMultiGraph Module
 """
-
 from copy import copy, deepcopy
 from typing import List, Optional
 from pydantic import BaseModel
@@ -42,7 +41,8 @@ class MDScraperMultiGraph(AbstractGraph):
         >>> result = search_graph.run()
     """
 
-    def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
+    def __init__(self, prompt: str, source: List[str], 
+                 config: dict, schema: Optional[BaseModel] = None):
         self.copy_config = safe_deepcopy(config)
         self.copy_schema = deepcopy(schema)
 
 
@@ -5,11 +5,9 @@
 from copy import deepcopy
 from typing import Optional
 from pydantic import BaseModel
-
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from .omni_scraper_graph import OmniScraperGraph
-
 from ..nodes import (
     SearchInternetNode,
     GraphIteratorNode,
@@ -63,21 +61,13 @@ def _create_graph(self) -> BaseGraph:
             BaseGraph: A graph instance representing the web scraping and searching workflow.
         """
 
-        # ************************************************
-        # Create a OmniScraperGraph instance
-        # ************************************************
-
         omni_scraper_instance = OmniScraperGraph(
             prompt="",
             source="",
             config=self.copy_config,
             schema=self.copy_schema
         )
 
-        # ************************************************
-        # Define the graph nodes
-        # ************************************************
-
         search_internet_node = SearchInternetNode(
             input="user_prompt",
             output=["urls"],
 
@@ -2,7 +2,6 @@
 """
 PDFScraperGraph Module
 """
-
 from typing import Optional
 from pydantic import BaseModel
 from .base_graph import BaseGraph
 
@@ -59,21 +59,13 @@ def _create_graph(self) -> BaseGraph:
             BaseGraph: A graph instance representing the web scraping and searching workflow.
         """
 
-        # ************************************************
-        # Create a PDFScraperGraph instance
-        # ************************************************
-
         pdf_scraper_instance = PDFScraperGraph(
             prompt="",
             source="",
             config=self.copy_config,
             schema=self.copy_schema
         )
 
-        # ************************************************
-        # Define the graph nodes
-        # ************************************************
-
         graph_iterator_node = GraphIteratorNode(
             input="user_prompt & pdfs",
             output=["results"],
 
@@ -1,7 +1,6 @@
 """
 ScriptCreatorGraph Module
 """
-
 from typing import Optional
 from pydantic import BaseModel
 from .base_graph import BaseGraph