Skip to content

Commit 8b8d8f0

Browse files
committed
refactoring of the code according to pylint style
1 parent 609316c commit 8b8d8f0

33 files changed

+227
-158
lines changed

scrapegraphai/helpers/models_tokens.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
"""
2+
List of model tokens
3+
"""
4+
15
models_tokens = {
26
"openai": {
37
"gpt-3.5-turbo-0125": 16385,

scrapegraphai/nodes/base_node.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55
import re
66
from abc import ABC, abstractmethod
77
from typing import List, Optional
8-
98
from ..utils import get_logger
109

1110

1211
class BaseNode(ABC):
1312
"""
14-
An abstract base class for nodes in a graph-based workflow, designed to perform specific actions when executed.
13+
An abstract base class for nodes in a graph-based workflow,
14+
designed to perform specific actions when executed.
1515
1616
Attributes:
1717
node_name (str): The unique identifier name for the node.

scrapegraphai/nodes/fetch_node.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
""""
22
FetchNode Module
33
"""
4-
54
import json
65
from typing import List, Optional
76
from langchain_openai import ChatOpenAI, AzureChatOpenAI
@@ -16,10 +15,6 @@
1615
from ..utils.logging import get_logger
1716
from .base_node import BaseNode
1817

19-
20-
""""
21-
FetchNode Module
22-
"""
2318
class FetchNode(BaseNode):
2419
"""
2520
A node responsible for fetching the HTML content of a specified URL and updating
@@ -218,7 +213,7 @@ def handle_local_source(self, state, source):
218213
self.logger.info(f"--- (Fetching HTML from: {source}) ---")
219214
if not source.strip():
220215
raise ValueError("No HTML body content found in the local source.")
221-
216+
222217
parsed_content = source
223218

224219
if (isinstance(self.llm_model, ChatOpenAI) or isinstance(self.llm_model, AzureChatOpenAI)) and not self.script_creator or self.force and not self.script_creator:

scrapegraphai/nodes/generate_answer_csv_node.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
"""
2-
gg
32
Module for generating the answer node
43
"""
54

@@ -10,8 +9,7 @@
109
from tqdm import tqdm
1110
from ..utils.logging import get_logger
1211
from .base_node import BaseNode
13-
from ..prompts.generate_answer_node_csv_prompts import template_chunks_csv, template_no_chunks_csv, template_merge_csv
14-
12+
from ..prompts.generate_answer_node_csv_prompts import TEMPLATE_CHUKS_CSV, TEMPLATE_NO_CHUKS_CSV, TEMPLATE_MERGE_CSV
1513

1614
class GenerateAnswerCSVNode(BaseNode):
1715
"""
@@ -97,22 +95,22 @@ def execute(self, state):
9795
else:
9896
output_parser = JsonOutputParser()
9997

100-
template_no_chunks_csv_prompt = template_no_chunks_csv
101-
template_chunks_csv_prompt = template_chunks_csv
102-
template_merge_csv_prompt = template_merge_csv
98+
TEMPLATE_NO_CHUKS_CSV_prompt = TEMPLATE_NO_CHUKS_CSV
99+
TEMPLATE_CHUKS_CSV_prompt = TEMPLATE_CHUKS_CSV
100+
TEMPLATE_MERGE_CSV_prompt = TEMPLATE_MERGE_CSV
103101

104102
if self.additional_info is not None:
105-
template_no_chunks_csv_prompt = self.additional_info + template_no_chunks_csv
106-
template_chunks_csv_prompt = self.additional_info + template_chunks_csv
107-
template_merge_csv_prompt = self.additional_info + template_merge_csv
103+
TEMPLATE_NO_CHUKS_CSV_prompt = self.additional_info + TEMPLATE_NO_CHUKS_CSV
104+
TEMPLATE_CHUKS_CSV_prompt = self.additional_info + TEMPLATE_CHUKS_CSV
105+
TEMPLATE_MERGE_CSV_prompt = self.additional_info + TEMPLATE_MERGE_CSV
108106

109107
format_instructions = output_parser.get_format_instructions()
110108

111109
chains_dict = {}
112110

113111
if len(doc) == 1:
114112
prompt = PromptTemplate(
115-
template=template_no_chunks_csv_prompt,
113+
template=TEMPLATE_NO_CHUKS_CSV_prompt,
116114
input_variables=["question"],
117115
partial_variables={
118116
"context": doc,
@@ -129,7 +127,7 @@ def execute(self, state):
129127
tqdm(doc, desc="Processing chunks", disable=not self.verbose)
130128
):
131129
prompt = PromptTemplate(
132-
template=template_chunks_csv_prompt,
130+
template=TEMPLATE_CHUKS_CSV_prompt,
133131
input_variables=["question"],
134132
partial_variables={
135133
"context": chunk,
@@ -146,7 +144,7 @@ def execute(self, state):
146144
batch_results = async_runner.invoke({"question": user_prompt})
147145

148146
merge_prompt = PromptTemplate(
149-
template = template_merge_csv_prompt,
147+
template = TEMPLATE_MERGE_CSV_prompt,
150148
input_variables=["context", "question"],
151149
partial_variables={"format_instructions": format_instructions},
152150
)

scrapegraphai/nodes/generate_answer_node.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from tqdm import tqdm
1111
from ..utils.logging import get_logger
1212
from .base_node import BaseNode
13-
from ..prompts import template_chunks, template_no_chunks, template_merge, template_chunks_md, template_no_chunks_md, template_merge_md
13+
from ..prompts import TEMPLATE_CHUNKS, TEMPLATE_NO_CHUNKS, TEMPLATE_MERGE, TEMPLATE_CHUNKS_MD, TEMPLATE_NO_CHUNKS_MD, TEMPLATE_MERGE_MD
1414

1515
class GenerateAnswerNode(BaseNode):
1616
"""
@@ -94,22 +94,22 @@ def execute(self, state: dict) -> dict:
9494
format_instructions = output_parser.get_format_instructions()
9595

9696
if isinstance(self.llm_model, ChatOpenAI) and not self.script_creator or self.force and not self.script_creator or self.is_md_scraper:
97-
template_no_chunks_prompt = template_no_chunks_md
98-
template_chunks_prompt = template_chunks_md
99-
template_merge_prompt = template_merge_md
97+
TEMPLATE_NO_CHUNKS_prompt = TEMPLATE_NO_CHUNKS_MD
98+
TEMPLATE_CHUNKS_prompt = TEMPLATE_CHUNKS_MD
99+
TEMPLATE_MERGE_prompt = TEMPLATE_MERGE_MD
100100
else:
101-
template_no_chunks_prompt = template_no_chunks
102-
template_chunks_prompt = template_chunks
103-
template_merge_prompt = template_merge
101+
TEMPLATE_NO_CHUNKS_prompt = TEMPLATE_NO_CHUNKS
102+
TEMPLATE_CHUNKS_prompt = TEMPLATE_CHUNKS
103+
TEMPLATE_MERGE_prompt = TEMPLATE_MERGE
104104

105105
if self.additional_info is not None:
106-
template_no_chunks_prompt = self.additional_info + template_no_chunks_prompt
107-
template_chunks_prompt = self.additional_info + template_chunks_prompt
108-
template_merge_prompt = self.additional_info + template_merge_prompt
106+
TEMPLATE_NO_CHUNKS_prompt = self.additional_info + TEMPLATE_NO_CHUNKS_prompt
107+
TEMPLATE_CHUNKS_prompt = self.additional_info + TEMPLATE_CHUNKS_prompt
108+
TEMPLATE_MERGE_prompt = self.additional_info + TEMPLATE_MERGE_prompt
109109

110110
if len(doc) == 1:
111111
prompt = PromptTemplate(
112-
template=template_no_chunks_prompt,
112+
template=TEMPLATE_NO_CHUNKS_prompt,
113113
input_variables=["question"],
114114
partial_variables={"context": doc,
115115
"format_instructions": format_instructions})
@@ -123,7 +123,7 @@ def execute(self, state: dict) -> dict:
123123
for i, chunk in enumerate(tqdm(doc, desc="Processing chunks", disable=not self.verbose)):
124124

125125
prompt = PromptTemplate(
126-
template=template_chunks,
126+
template=TEMPLATE_CHUNKS,
127127
input_variables=["question"],
128128
partial_variables={"context": chunk,
129129
"chunk_id": i + 1,
@@ -136,7 +136,7 @@ def execute(self, state: dict) -> dict:
136136
batch_results = async_runner.invoke({"question": user_prompt})
137137

138138
merge_prompt = PromptTemplate(
139-
template = template_merge_prompt,
139+
template = TEMPLATE_MERGE_prompt,
140140
input_variables=["context", "question"],
141141
partial_variables={"format_instructions": format_instructions},
142142
)

scrapegraphai/nodes/generate_answer_omni_node.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
from tqdm import tqdm
99
from langchain_community.chat_models import ChatOllama
1010
from .base_node import BaseNode
11-
from ..prompts.generate_answer_node_omni_prompts import template_no_chunk_omni, template_chunks_omni, template_merge_omni
12-
11+
from ..prompts.generate_answer_node_omni_prompts import TEMPLATE_NO_CHUNKS_OMNI, TEMPLATE_CHUNKS_OMNI, TEMPLATE_MERGE_OMNI
1312

1413
class GenerateAnswerOmniNode(BaseNode):
1514
"""
@@ -82,22 +81,22 @@ def execute(self, state: dict) -> dict:
8281
output_parser = JsonOutputParser(pydantic_object=self.node_config["schema"])
8382
else:
8483
output_parser = JsonOutputParser()
85-
template_no_chunk_omni_prompt = template_no_chunk_omni
86-
template_chunks_omni_prompt = template_chunks_omni
87-
template_merge_omni_prompt= template_merge_omni
84+
TEMPLATE_NO_CHUNKS_OMNI_prompt = TEMPLATE_NO_CHUNKS_OMNI
85+
TEMPLATE_CHUNKS_OMNI_prompt = TEMPLATE_CHUNKS_OMNI
86+
TEMPLATE_MERGE_OMNI_prompt= TEMPLATE_MERGE_OMNI
8887

8988
if self.additional_info is not None:
90-
template_no_chunk_omni_prompt = self.additional_info + template_no_chunk_omni_prompt
91-
template_chunks_omni_prompt = self.additional_info + template_chunks_omni_prompt
92-
template_merge_omni_prompt = self.additional_info + template_merge_omni_prompt
89+
TEMPLATE_NO_CHUNKS_OMNI_prompt = self.additional_info + TEMPLATE_NO_CHUNKS_OMNI_prompt
90+
TEMPLATE_CHUNKS_OMNI_prompt = self.additional_info + TEMPLATE_CHUNKS_OMNI_prompt
91+
TEMPLATE_MERGE_OMNI_prompt = self.additional_info + TEMPLATE_MERGE_OMNI_prompt
9392

9493
format_instructions = output_parser.get_format_instructions()
9594

9695

9796
chains_dict = {}
9897
if len(doc) == 1:
9998
prompt = PromptTemplate(
100-
template=template_no_chunk_omni_prompt,
99+
template=TEMPLATE_NO_CHUNKS_OMNI_prompt,
101100
input_variables=["question"],
102101
partial_variables={
103102
"context": doc,
@@ -116,7 +115,7 @@ def execute(self, state: dict) -> dict:
116115
tqdm(doc, desc="Processing chunks", disable=not self.verbose)
117116
):
118117
prompt = PromptTemplate(
119-
template=template_chunks_omni_prompt,
118+
template=TEMPLATE_CHUNKS_OMNI_prompt,
120119
input_variables=["question"],
121120
partial_variables={
122121
"context": chunk,
@@ -134,7 +133,7 @@ def execute(self, state: dict) -> dict:
134133
batch_results = async_runner.invoke({"question": user_prompt})
135134

136135
merge_prompt = PromptTemplate(
137-
template = template_merge_omni_prompt,
136+
template = TEMPLATE_MERGE_OMNI_prompt,
138137
input_variables=["context", "question"],
139138
partial_variables={"format_instructions": format_instructions},
140139
)

scrapegraphai/nodes/generate_answer_pdf_node.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""
22
Module for generating the answer node
33
"""
4-
54
from typing import List, Optional
65
from langchain.prompts import PromptTemplate
76
from langchain_core.output_parsers import JsonOutputParser
@@ -10,8 +9,7 @@
109
from langchain_community.chat_models import ChatOllama
1110
from ..utils.logging import get_logger
1211
from .base_node import BaseNode
13-
from ..prompts.generate_answer_node_pdf_prompts import template_chunks_pdf, template_no_chunks_pdf, template_merge_pdf
14-
12+
from ..prompts.generate_answer_node_pdf_prompts import TEMPLATE_CHUNKS_PDF, TEMPLATE_NO_CHUNKS_PDF, TEMPLATE_MERGE_PDF
1513

1614
class GenerateAnswerPDFNode(BaseNode):
1715
"""
@@ -98,20 +96,20 @@ def execute(self, state):
9896
output_parser = JsonOutputParser(pydantic_object=self.node_config["schema"])
9997
else:
10098
output_parser = JsonOutputParser()
101-
template_no_chunks_pdf_prompt = template_no_chunks_pdf
102-
template_chunks_pdf_prompt = template_chunks_pdf
103-
template_merge_pdf_prompt = template_merge_pdf
99+
TEMPLATE_NO_CHUNKS_PDF_prompt = TEMPLATE_NO_CHUNKS_PDF
100+
TEMPLATE_CHUNKS_PDF_prompt = TEMPLATE_CHUNKS_PDF
101+
TEMPLATE_MERGE_PDF_prompt = TEMPLATE_MERGE_PDF
104102

105103
if self.additional_info is not None:
106-
template_no_chunks_pdf_prompt = self.additional_info + template_no_chunks_pdf_prompt
107-
template_chunks_pdf_prompt = self.additional_info + template_chunks_pdf_prompt
108-
template_merge_pdf_prompt = self.additional_info + template_merge_pdf_prompt
104+
TEMPLATE_NO_CHUNKS_PDF_prompt = self.additional_info + TEMPLATE_NO_CHUNKS_PDF_prompt
105+
TEMPLATE_CHUNKS_PDF_prompt = self.additional_info + TEMPLATE_CHUNKS_PDF_prompt
106+
TEMPLATE_MERGE_PDF_prompt = self.additional_info + TEMPLATE_MERGE_PDF_prompt
109107

110108
format_instructions = output_parser.get_format_instructions()
111109

112110
if len(doc) == 1:
113111
prompt = PromptTemplate(
114-
template=template_no_chunks_pdf_prompt,
112+
template=TEMPLATE_NO_CHUNKS_PDF_prompt,
115113
input_variables=["question"],
116114
partial_variables={
117115
"context": doc,
@@ -130,7 +128,7 @@ def execute(self, state):
130128
for i, chunk in enumerate(
131129
tqdm(doc, desc="Processing chunks", disable=not self.verbose)):
132130
prompt = PromptTemplate(
133-
template=template_chunks_pdf_prompt,
131+
template=TEMPLATE_CHUNKS_PDF_prompt,
134132
input_variables=["question"],
135133
partial_variables={
136134
"context":chunk,
@@ -147,7 +145,7 @@ def execute(self, state):
147145
batch_results = async_runner.invoke({"question": user_prompt})
148146

149147
merge_prompt = PromptTemplate(
150-
template = template_merge_pdf_prompt,
148+
template = TEMPLATE_MERGE_PDF_prompt,
151149
input_variables=["context", "question"],
152150
partial_variables={"format_instructions": format_instructions},
153151
)

scrapegraphai/nodes/generate_scraper_node.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
"""
22
GenerateScraperNode Module
33
"""
4-
5-
# Imports from standard library
64
from typing import List, Optional
75
from langchain.prompts import PromptTemplate
86
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
@@ -85,7 +83,7 @@ def execute(self, state: dict) -> dict:
8583

8684
format_instructions = output_schema.get_format_instructions()
8785

88-
template_no_chunks = """
86+
TEMPLATE_NO_CHUNKS = """
8987
PROMPT:
9088
You are a website scraper script creator and you have just scraped the
9189
following content from a website.
@@ -103,14 +101,14 @@ def execute(self, state: dict) -> dict:
103101
SCHEMA INSTRUCTIONS: {schema_instructions}
104102
"""
105103
if self.additional_info is not None:
106-
template_no_chunks += self.additional_info
104+
TEMPLATE_NO_CHUNKS += self.additional_info
107105

108106
if len(doc) > 1:
109107
raise NotImplementedError(
110108
"Currently GenerateScraperNode cannot handle more than 1 context chunks"
111109
)
112110
else:
113-
template = template_no_chunks
111+
template = TEMPLATE_NO_CHUNKS
114112

115113
prompt = PromptTemplate(
116114
template=template,

scrapegraphai/nodes/get_probable_tags_node.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from ..utils.logging import get_logger
88
from .base_node import BaseNode
99

10-
1110
class GetProbableTagsNode(BaseNode):
1211
"""
1312
A node that utilizes a language model to identify probable HTML tags within a document that

scrapegraphai/nodes/graph_iterator_node.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""
22
GraphIterator Module
33
"""
4-
54
import asyncio
65
import copy
76
from typing import List, Optional

scrapegraphai/nodes/image_to_text_node.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
"""
22
ImageToTextNode Module
33
"""
4-
54
from typing import List, Optional
65
from ..utils.logging import get_logger
76
from .base_node import BaseNode
87

9-
108
class ImageToTextNode(BaseNode):
119
"""
1210
Retrieve images from a list of URLs and return a description of

scrapegraphai/nodes/merge_answers_node.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
"""
22
MergeAnswersNode Module
33
"""
4-
54
from typing import List, Optional
65
from langchain.prompts import PromptTemplate
76
from langchain_core.output_parsers import JsonOutputParser
87
from ..utils.logging import get_logger
98
from .base_node import BaseNode
10-
from ..prompts import template_combined
9+
from ..prompts import TEMPLATE_COMBINED
1110

1211
class MergeAnswersNode(BaseNode):
1312
"""
@@ -80,7 +79,7 @@ def execute(self, state: dict) -> dict:
8079
format_instructions = output_parser.get_format_instructions()
8180

8281
prompt_template = PromptTemplate(
83-
template=template_combined,
82+
template=TEMPLATE_COMBINED,
8483
input_variables=["user_prompt"],
8584
partial_variables={
8685
"format_instructions": format_instructions,

0 commit comments

Comments
 (0)