Skip to content

Commit e1b9d69

Browse files
committed
dev basic class blockindentifier
1 parent da8c72c commit e1b9d69

File tree

1 file changed

+57
-0
lines changed

1 file changed

+57
-0
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
"""
2+
BlocksIndentifier Module
3+
"""
4+
5+
from typing import List, Optional
6+
from langchain_community.document_loaders import AsyncChromiumLoader
7+
from langchain_core.documents import Document
8+
from .base_node import BaseNode
9+
10+
11+
12+
class BlocksIndentifier(BaseNode):
13+
"""
14+
A node responsible to identify the blocks in the HTML content of a specified HTML content
15+
e.g products in a E-commerce, flights in a travel website etc.
16+
17+
Attributes:
18+
headless (bool): A flag indicating whether the browser should run in headless mode.
19+
verbose (bool): A flag indicating whether to print verbose output during execution.
20+
21+
Args:
22+
input (str): Boolean expression defining the input keys needed from the state.
23+
output (List[str]): List of output keys to be updated in the state.
24+
node_config (Optional[dict]): Additional configuration for the node.
25+
node_name (str): The unique identifier name for the node, defaulting to "BlocksIndentifier".
26+
"""
27+
28+
def __init__(self, input: str, output: List[str], node_config: Optional[dict], node_name: str = "BlocksIndentifier"):
29+
super().__init__(node_name, "node", input, output, 1)
30+
31+
self.headless = True if node_config is None else node_config.get("headless", True)
32+
self.verbose = True if node_config is None else node_config.get("verbose", False)
33+
34+
def execute(self, state):
35+
"""
36+
Executes the node's logic, caracterized by a pre-processing of the HTML content and
37+
subsequent identification of the blocks in the HTML content.
38+
39+
Args:
40+
state (dict): The current state of the graph. The input keys will be used
41+
to fetch the correct data types from the state.
42+
43+
Returns:
44+
dict: The updated state with a new output key containing the fetched HTML content.
45+
46+
Raises:
47+
KeyError: If the input key is not found in the state, indicating that the
48+
necessary information to perform the operation is missing.
49+
"""
50+
if self.verbose:
51+
print(f"--- Executing {self.node_name} Node ---")
52+
53+
# Interpret input keys based on the provided input expression
54+
input_keys = self.get_input_keys(state)
55+
56+
# Fetching data from the state based on the input keys
57+
input_data = [state[key] for key in input_keys]

0 commit comments

Comments
 (0)