ScrapeGraphAI
diff --git a/‎manual deployment/autorequirements.py
Lines changed: 0 additions & 30 deletions b/‎manual deployment/autorequirements.py
Lines changed: 0 additions & 30 deletions
diff --git a/‎manual deployment/commit_and_push.sh
Lines changed: 0 additions & 36 deletions b/‎manual deployment/commit_and_push.sh
Lines changed: 0 additions & 36 deletions
diff --git a/‎manual deployment/commit_and_push_with_tests.sh
Lines changed: 0 additions & 36 deletions b/‎manual deployment/commit_and_push_with_tests.sh
Lines changed: 0 additions & 36 deletions
diff --git a/‎manual deployment/deploy_on_pip.sh
Lines changed: 0 additions & 15 deletions b/‎manual deployment/deploy_on_pip.sh
Lines changed: 0 additions & 15 deletions
diff --git a/‎manual deployment/installation.sh
Lines changed: 0 additions & 8 deletions b/‎manual deployment/installation.sh
Lines changed: 0 additions & 8 deletions
diff --git a/‎scrapegraphai/builders/__init__.py
Lines changed: 1 addition & 1 deletion b/‎scrapegraphai/builders/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎scrapegraphai/builders/graph_builder.py
Lines changed: 0 additions & 1 deletion b/‎scrapegraphai/builders/graph_builder.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎scrapegraphai/helpers/__init__.py
Lines changed: 0 additions & 1 deletion b/‎scrapegraphai/helpers/__init__.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎scrapegraphai/integrations/burr_bridge.py
Lines changed: 0 additions & 1 deletion b/‎scrapegraphai/integrations/burr_bridge.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎scrapegraphai/nodes/fetch_node_level_k.py
Lines changed: 14 additions & 10 deletions b/‎scrapegraphai/nodes/fetch_node_level_k.py
Lines changed: 14 additions & 10 deletions
diff --git a/‎scrapegraphai/nodes/generate_answer_omni_node.py
Lines changed: 1 addition & 6 deletions b/‎scrapegraphai/nodes/generate_answer_omni_node.py
Lines changed: 1 addition & 6 deletions
diff --git a/‎scrapegraphai/nodes/generate_scraper_node.py
Lines changed: 0 additions & 1 deletion b/‎scrapegraphai/nodes/generate_scraper_node.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎scrapegraphai/nodes/search_node_with_context.py
Lines changed: 2 additions & 1 deletion b/‎scrapegraphai/nodes/search_node_with_context.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎scrapegraphai/prompts/merge_generated_scripts_prompts.py
Lines changed: 1 addition & 0 deletions b/‎scrapegraphai/prompts/merge_generated_scripts_prompts.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎scrapegraphai/telemetry/telemetry.py
Lines changed: 0 additions & 3 deletions b/‎scrapegraphai/telemetry/telemetry.py
Lines changed: 0 additions & 3 deletions
diff --git a/‎scrapegraphai/utils/code_error_analysis.py
Lines changed: 8 additions & 5 deletions b/‎scrapegraphai/utils/code_error_analysis.py
Lines changed: 8 additions & 5 deletions
diff --git a/‎scrapegraphai/utils/code_error_correction.py
Lines changed: 0 additions & 1 deletion b/‎scrapegraphai/utils/code_error_correction.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎scrapegraphai/utils/copy.py
Lines changed: 0 additions & 1 deletion b/‎scrapegraphai/utils/copy.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎scrapegraphai/utils/dict_content_compare.py
Lines changed: 0 additions & 1 deletion b/‎scrapegraphai/utils/dict_content_compare.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎scrapegraphai/utils/model_costs.py
Lines changed: 1 addition & 5 deletions b/‎scrapegraphai/utils/model_costs.py
Lines changed: 1 addition & 5 deletions
@@ -1,5 +1,5 @@
 """
-    __init__.py file for builders folder
+__init__.py file for builders folder
 """
 
 from .graph_builder import GraphBuilder
@@ -6,7 +6,6 @@
 from langchain_community.chat_models import ErnieBotChat
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_openai import ChatOpenAI
-
 from ..helpers import nodes_metadata, graph_schema
 
 class GraphBuilder:
 
@@ -1,7 +1,6 @@
 """ 
 __init__.py for the helpers folder
 """
-
 from .nodes_metadata import nodes_metadata
 from .schemas import graph_schema
 from .models_tokens import models_tokens
 
@@ -2,7 +2,6 @@
 Bridge class to integrate Burr into ScrapeGraphAI graphs
 [Burr](https://github.com/DAGWorks-Inc/burr)
 """
-
 import re
 import uuid
 from hashlib import md5
 
@@ -1,11 +1,12 @@
+"""
+fetch_node_level_k module
+"""
 from typing import List, Optional
-from .base_node import BaseNode
-from ..docloaders import ChromiumLoader
-from ..utils.cleanup_html import cleanup_html
-from ..utils.convert_to_md import convert_to_md
+from urllib.parse import urljoin
 from langchain_core.documents import Document
 from bs4 import BeautifulSoup
-from urllib.parse import quote, urljoin
+from .base_node import BaseNode
+from ..docloaders import ChromiumLoader
 
 class FetchNodeLevelK(BaseNode):
     """
@@ -102,17 +103,18 @@ def fetch_content(self, source: str, loader_kwargs) -> Optional[str]:
             Optional[str]: The fetched HTML content or None if fetching failed.
         """
         self.logger.info(f"--- (Fetching HTML from: {source}) ---")
-        
+
         if self.browser_base is not None:
             try:
                 from ..docloaders.browser_base import browser_base_fetch
             except ImportError:
                 raise ImportError("""The browserbase module is not installed. 
                                     Please install it using `pip install browserbase`.""")
 
-            data = browser_base_fetch(self.browser_base.get("api_key"), 
+            data = browser_base_fetch(self.browser_base.get("api_key"),
                                       self.browser_base.get("project_id"), [source])
-            document = [Document(page_content=content, metadata={"source": source}) for content in data]
+            document = [Document(page_content=content,
+                                 metadata={"source": source}) for content in data]
         else:
             loader = ChromiumLoader([source], headless=self.headless, **loader_kwargs)
             document = loader.load()
@@ -179,7 +181,8 @@ def obtain_content(self, documents: List, loader_kwargs) -> List:
                 full_links = self.get_full_links(source, links)
 
                 for link in full_links:
-                    if not any(d.get('source', '') == link for d in documents) and not any(d.get('source', '') == link for d in new_documents):
+                    if not any(d.get('source', '') == link for d in documents) \
+                        and not any(d.get('source', '') == link for d in new_documents):
                         new_documents.append({"source": link})
 
         documents.extend(new_documents)
@@ -208,7 +211,8 @@ def process_links(self, base_url: str, links: list,
 
             if current_depth < depth:
                 new_links = self.extract_links(link_content)
-                content_dict.update(self.process_links(full_link, new_links, loader_kwargs, depth, current_depth + 1))
+                content_dict.update(self.process_links(full_link, new_links,
+                                                       loader_kwargs, depth, current_depth + 1))
             else:
                 self.logger.warning(f"Failed to fetch content for {full_link}")
         return content_dict
@@ -71,10 +71,8 @@ def execute(self, state: dict) -> dict:
 
         self.logger.info(f"--- Executing {self.node_name} Node ---")
 
-        # Interpret input keys based on the provided input expression
         input_keys = self.get_input_keys(state)
 
-        # Fetching data from the state based on the input keys
         input_data = [state[key] for key in input_keys]
 
         user_prompt = input_data[0]
@@ -85,7 +83,7 @@ def execute(self, state: dict) -> dict:
 
             if isinstance(self.llm_model, (ChatOpenAI, ChatMistralAI)):
                 self.llm_model = self.llm_model.with_structured_output(
-                    schema = self.node_config["schema"]) # json schema works only on specific models
+                    schema = self.node_config["schema"])
 
                 output_parser = get_structured_output_parser(self.node_config["schema"])
                 format_instructions = "NA"
@@ -106,8 +104,6 @@ def execute(self, state: dict) -> dict:
             TEMPLATE_CHUNKS_OMNI_prompt = self.additional_info + TEMPLATE_CHUNKS_OMNI_prompt
             TEMPLATE_MERGE_OMNI_prompt = self.additional_info + TEMPLATE_MERGE_OMNI_prompt
 
-
-
         chains_dict = {}
         if len(doc) == 1:
             prompt = PromptTemplate(
@@ -139,7 +135,6 @@ def execute(self, state: dict) -> dict:
                     },
                 )
 
-            # Dynamically name the chains based on their index
             chain_name = f"chunk{i+1}"
             chains_dict[chain_name] = prompt | self.llm_model | output_parser
 
 
@@ -27,7 +27,6 @@ class GenerateScraperNode(BaseNode):
         node_name (str): The unique identifier name for the node, defaulting to "GenerateScraper".
 
     """
-
     def __init__(
         self,
         input: str,
 
@@ -23,7 +23,8 @@ class SearchLinksWithContext(BaseNode):
         input (str): Boolean expression defining the input keys needed from the state.
         output (List[str]): List of output keys to be updated in the state.
         node_config (dict): Additional configuration for the node.
-        node_name (str): The unique identifier name for the node, defaulting to "SearchLinksWithContext".
+        node_name (str): The unique identifier name for the node, 
+        defaulting to "SearchLinksWithContext".
     """
 
     def __init__(
 
@@ -1,6 +1,7 @@
 """
 merge_generated_scripts_prompts module
 """
+
 TEMPLATE_MERGE_SCRIPTS_PROMPT = """
 You are a python expert in web scraping and you have just generated multiple scripts to scrape different URLs.\n
 The scripts are generated based on a user question and the content of the websites.\n
 
@@ -34,10 +34,8 @@
 TIMEOUT = 2
 DEFAULT_CONFIG_LOCATION = os.path.expanduser("~/.scrapegraphai.conf")
 
-
 logger = logging.getLogger(__name__)
 
-
 def _load_config(config_location: str) -> configparser.ConfigParser:
     config = configparser.ConfigParser()
     try:
@@ -58,7 +56,6 @@ def _load_config(config_location: str) -> configparser.ConfigParser:
             pass
     return config
 
-
 def _check_config_and_environ_for_telemetry_flag(
     telemetry_default: bool, config_obj: configparser.ConfigParser
 ) -> bool:
 
@@ -3,11 +3,13 @@
 
 Functions:
 - syntax_focused_analysis: Focuses on syntax-related errors in the generated code.
-- execution_focused_analysis: Focuses on execution-related errors, including generated code and HTML analysis.
-- validation_focused_analysis: Focuses on validation-related errors, considering JSON schema and execution result.
-- semantic_focused_analysis: Focuses on semantic differences in generated code based on a comparison result.
+- execution_focused_analysis: Focuses on execution-related errors, 
+including generated code and HTML analysis.
+- validation_focused_analysis: Focuses on validation-related errors, 
+considering JSON schema and execution result.
+- semantic_focused_analysis: Focuses on semantic differences in 
+generated code based on a comparison result.
 """
-
 from typing import Any, Dict
 import json
 from langchain.prompts import PromptTemplate
@@ -63,7 +65,8 @@ def validation_focused_analysis(state: dict, llm_model) -> str:
     Analyzes the validation errors in the generated code based on a JSON schema.
 
     Args:
-        state (dict): Contains the 'generated_code', 'errors', 'json_schema', and 'execution_result'.
+        state (dict): Contains the 'generated_code', 'errors', 
+        'json_schema', and 'execution_result'.
         llm_model: The language model used for generating the analysis.
 
     Returns:
 
@@ -9,7 +9,6 @@
 - semantic_focused_code_generation: Generates corrected code based on semantic error analysis, 
 comparing generated and reference results.
 """
-
 import json
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 
@@ -4,7 +4,6 @@
 import copy
 from typing import Any
 
-
 class DeepCopyError(Exception):
     """
     Custom exception raised when an object cannot be deep-copied.
 
@@ -8,7 +8,6 @@
 converting strings to lowercase and stripping whitespace.
 - are_content_equal: Compares two dictionaries for semantic equality after normalization.
 """
-
 from typing import Any, Dict, List
 
 def normalize_dict(d: Dict[str, Any]) -> Dict[str, Any]:
 
@@ -1,11 +1,7 @@
-"""
-This file contains the cost of models per 1k tokens for input and output.
-The file is on a best effort basis and may not be up to date. Any contributions are welcome.
-"""
-
 """
 Cost for 1k tokens in input
 """
+
 MODEL_COST_PER_1K_TOKENS_INPUT = {
     ### MistralAI
     # General Purpose