Skip to content

Commit 47645d9

Browse files
committed
removed unused files
1 parent 5211cbe commit 47645d9

23 files changed

+42
-169
lines changed

manual deployment/autorequirements.py

Lines changed: 0 additions & 30 deletions
This file was deleted.

manual deployment/commit_and_push.sh

Lines changed: 0 additions & 36 deletions
This file was deleted.

manual deployment/commit_and_push_with_tests.sh

Lines changed: 0 additions & 36 deletions
This file was deleted.

manual deployment/deploy_on_pip.sh

Lines changed: 0 additions & 15 deletions
This file was deleted.

manual deployment/installation.sh

Lines changed: 0 additions & 8 deletions
This file was deleted.

scrapegraphai/builders/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
__init__.py file for builders folder
2+
__init__.py file for builders folder
33
"""
44

55
from .graph_builder import GraphBuilder

scrapegraphai/builders/graph_builder.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from langchain_community.chat_models import ErnieBotChat
77
from langchain_google_genai import ChatGoogleGenerativeAI
88
from langchain_openai import ChatOpenAI
9-
109
from ..helpers import nodes_metadata, graph_schema
1110

1211
class GraphBuilder:

scrapegraphai/helpers/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""
22
__init__.py for the helpers folder
33
"""
4-
54
from .nodes_metadata import nodes_metadata
65
from .schemas import graph_schema
76
from .models_tokens import models_tokens

scrapegraphai/integrations/burr_bridge.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
Bridge class to integrate Burr into ScrapeGraphAI graphs
33
[Burr](https://github.com/DAGWorks-Inc/burr)
44
"""
5-
65
import re
76
import uuid
87
from hashlib import md5

scrapegraphai/nodes/fetch_node_level_k.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1+
"""
2+
fetch_node_level_k module
3+
"""
14
from typing import List, Optional
2-
from .base_node import BaseNode
3-
from ..docloaders import ChromiumLoader
4-
from ..utils.cleanup_html import cleanup_html
5-
from ..utils.convert_to_md import convert_to_md
5+
from urllib.parse import urljoin
66
from langchain_core.documents import Document
77
from bs4 import BeautifulSoup
8-
from urllib.parse import quote, urljoin
8+
from .base_node import BaseNode
9+
from ..docloaders import ChromiumLoader
910

1011
class FetchNodeLevelK(BaseNode):
1112
"""
@@ -102,17 +103,18 @@ def fetch_content(self, source: str, loader_kwargs) -> Optional[str]:
102103
Optional[str]: The fetched HTML content or None if fetching failed.
103104
"""
104105
self.logger.info(f"--- (Fetching HTML from: {source}) ---")
105-
106+
106107
if self.browser_base is not None:
107108
try:
108109
from ..docloaders.browser_base import browser_base_fetch
109110
except ImportError:
110111
raise ImportError("""The browserbase module is not installed.
111112
Please install it using `pip install browserbase`.""")
112113

113-
data = browser_base_fetch(self.browser_base.get("api_key"),
114+
data = browser_base_fetch(self.browser_base.get("api_key"),
114115
self.browser_base.get("project_id"), [source])
115-
document = [Document(page_content=content, metadata={"source": source}) for content in data]
116+
document = [Document(page_content=content,
117+
metadata={"source": source}) for content in data]
116118
else:
117119
loader = ChromiumLoader([source], headless=self.headless, **loader_kwargs)
118120
document = loader.load()
@@ -179,7 +181,8 @@ def obtain_content(self, documents: List, loader_kwargs) -> List:
179181
full_links = self.get_full_links(source, links)
180182

181183
for link in full_links:
182-
if not any(d.get('source', '') == link for d in documents) and not any(d.get('source', '') == link for d in new_documents):
184+
if not any(d.get('source', '') == link for d in documents) \
185+
and not any(d.get('source', '') == link for d in new_documents):
183186
new_documents.append({"source": link})
184187

185188
documents.extend(new_documents)
@@ -208,7 +211,8 @@ def process_links(self, base_url: str, links: list,
208211

209212
if current_depth < depth:
210213
new_links = self.extract_links(link_content)
211-
content_dict.update(self.process_links(full_link, new_links, loader_kwargs, depth, current_depth + 1))
214+
content_dict.update(self.process_links(full_link, new_links,
215+
loader_kwargs, depth, current_depth + 1))
212216
else:
213217
self.logger.warning(f"Failed to fetch content for {full_link}")
214218
return content_dict

scrapegraphai/nodes/generate_answer_omni_node.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,8 @@ def execute(self, state: dict) -> dict:
7171

7272
self.logger.info(f"--- Executing {self.node_name} Node ---")
7373

74-
# Interpret input keys based on the provided input expression
7574
input_keys = self.get_input_keys(state)
7675

77-
# Fetching data from the state based on the input keys
7876
input_data = [state[key] for key in input_keys]
7977

8078
user_prompt = input_data[0]
@@ -85,7 +83,7 @@ def execute(self, state: dict) -> dict:
8583

8684
if isinstance(self.llm_model, (ChatOpenAI, ChatMistralAI)):
8785
self.llm_model = self.llm_model.with_structured_output(
88-
schema = self.node_config["schema"]) # json schema works only on specific models
86+
schema = self.node_config["schema"])
8987

9088
output_parser = get_structured_output_parser(self.node_config["schema"])
9189
format_instructions = "NA"
@@ -106,8 +104,6 @@ def execute(self, state: dict) -> dict:
106104
TEMPLATE_CHUNKS_OMNI_prompt = self.additional_info + TEMPLATE_CHUNKS_OMNI_prompt
107105
TEMPLATE_MERGE_OMNI_prompt = self.additional_info + TEMPLATE_MERGE_OMNI_prompt
108106

109-
110-
111107
chains_dict = {}
112108
if len(doc) == 1:
113109
prompt = PromptTemplate(
@@ -139,7 +135,6 @@ def execute(self, state: dict) -> dict:
139135
},
140136
)
141137

142-
# Dynamically name the chains based on their index
143138
chain_name = f"chunk{i+1}"
144139
chains_dict[chain_name] = prompt | self.llm_model | output_parser
145140

scrapegraphai/nodes/generate_scraper_node.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ class GenerateScraperNode(BaseNode):
2727
node_name (str): The unique identifier name for the node, defaulting to "GenerateScraper".
2828
2929
"""
30-
3130
def __init__(
3231
self,
3332
input: str,

scrapegraphai/nodes/search_node_with_context.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ class SearchLinksWithContext(BaseNode):
2323
input (str): Boolean expression defining the input keys needed from the state.
2424
output (List[str]): List of output keys to be updated in the state.
2525
node_config (dict): Additional configuration for the node.
26-
node_name (str): The unique identifier name for the node, defaulting to "SearchLinksWithContext".
26+
node_name (str): The unique identifier name for the node,
27+
defaulting to "SearchLinksWithContext".
2728
"""
2829

2930
def __init__(

scrapegraphai/prompts/merge_generated_scripts_prompts.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
merge_generated_scripts_prompts module
33
"""
4+
45
TEMPLATE_MERGE_SCRIPTS_PROMPT = """
56
You are a python expert in web scraping and you have just generated multiple scripts to scrape different URLs.\n
67
The scripts are generated based on a user question and the content of the websites.\n

scrapegraphai/telemetry/telemetry.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,8 @@
3434
TIMEOUT = 2
3535
DEFAULT_CONFIG_LOCATION = os.path.expanduser("~/.scrapegraphai.conf")
3636

37-
3837
logger = logging.getLogger(__name__)
3938

40-
4139
def _load_config(config_location: str) -> configparser.ConfigParser:
4240
config = configparser.ConfigParser()
4341
try:
@@ -58,7 +56,6 @@ def _load_config(config_location: str) -> configparser.ConfigParser:
5856
pass
5957
return config
6058

61-
6259
def _check_config_and_environ_for_telemetry_flag(
6360
telemetry_default: bool, config_obj: configparser.ConfigParser
6461
) -> bool:

scrapegraphai/utils/code_error_analysis.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
44
Functions:
55
- syntax_focused_analysis: Focuses on syntax-related errors in the generated code.
6-
- execution_focused_analysis: Focuses on execution-related errors, including generated code and HTML analysis.
7-
- validation_focused_analysis: Focuses on validation-related errors, considering JSON schema and execution result.
8-
- semantic_focused_analysis: Focuses on semantic differences in generated code based on a comparison result.
6+
- execution_focused_analysis: Focuses on execution-related errors,
7+
including generated code and HTML analysis.
8+
- validation_focused_analysis: Focuses on validation-related errors,
9+
considering JSON schema and execution result.
10+
- semantic_focused_analysis: Focuses on semantic differences in
11+
generated code based on a comparison result.
912
"""
10-
1113
from typing import Any, Dict
1214
import json
1315
from langchain.prompts import PromptTemplate
@@ -63,7 +65,8 @@ def validation_focused_analysis(state: dict, llm_model) -> str:
6365
Analyzes the validation errors in the generated code based on a JSON schema.
6466
6567
Args:
66-
state (dict): Contains the 'generated_code', 'errors', 'json_schema', and 'execution_result'.
68+
state (dict): Contains the 'generated_code', 'errors',
69+
'json_schema', and 'execution_result'.
6770
llm_model: The language model used for generating the analysis.
6871
6972
Returns:

scrapegraphai/utils/code_error_correction.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
- semantic_focused_code_generation: Generates corrected code based on semantic error analysis,
1010
comparing generated and reference results.
1111
"""
12-
1312
import json
1413
from langchain.prompts import PromptTemplate
1514
from langchain_core.output_parsers import StrOutputParser

scrapegraphai/utils/copy.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import copy
55
from typing import Any
66

7-
87
class DeepCopyError(Exception):
98
"""
109
Custom exception raised when an object cannot be deep-copied.

scrapegraphai/utils/dict_content_compare.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
converting strings to lowercase and stripping whitespace.
99
- are_content_equal: Compares two dictionaries for semantic equality after normalization.
1010
"""
11-
1211
from typing import Any, Dict, List
1312

1413
def normalize_dict(d: Dict[str, Any]) -> Dict[str, Any]:

scrapegraphai/utils/model_costs.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
1-
"""
2-
This file contains the cost of models per 1k tokens for input and output.
3-
The file is on a best effort basis and may not be up to date. Any contributions are welcome.
4-
"""
5-
61
"""
72
Cost for 1k tokens in input
83
"""
4+
95
MODEL_COST_PER_1K_TOKENS_INPUT = {
106
### MistralAI
117
# General Purpose

0 commit comments

Comments
 (0)