1
1
"""
2
2
FetchNode Module
3
3
"""
4
-
4
+ import pandas as pd
5
5
from typing import List , Optional
6
6
from langchain_community .document_loaders import AsyncChromiumLoader
7
7
from langchain_core .documents import Document
@@ -22,19 +22,21 @@ class FetchNode(BaseNode):
22
22
Attributes:
23
23
headless (bool): A flag indicating whether the browser should run in headless mode.
24
24
verbose (bool): A flag indicating whether to print verbose output during execution.
25
-
25
+
26
26
Args:
27
27
input (str): Boolean expression defining the input keys needed from the state.
28
28
output (List[str]): List of output keys to be updated in the state.
29
29
node_config (Optional[dict]): Additional configuration for the node.
30
30
node_name (str): The unique identifier name for the node, defaulting to "Fetch".
31
31
"""
32
32
33
- def __init__ (self , input : str , output : List [str ], node_config : Optional [dict ]= None , node_name : str = "Fetch" ):
33
+ def __init__ (self , input : str , output : List [str ], node_config : Optional [dict ] = None , node_name : str = "Fetch" ):
34
34
super ().__init__ (node_name , "node" , input , output , 1 )
35
35
36
- self .headless = True if node_config is None else node_config .get ("headless" , True )
37
- self .verbose = False if node_config is None else node_config .get ("verbose" , False )
36
+ self .headless = True if node_config is None else node_config .get (
37
+ "headless" , True )
38
+ self .verbose = False if node_config is None else node_config .get (
39
+ "verbose" , False )
38
40
39
41
def execute (self , state ):
40
42
"""
@@ -72,6 +74,16 @@ def execute(self, state):
72
74
loader = PyPDFLoader (source )
73
75
compressed_document = loader .load ()
74
76
77
+ elif self .input == "csv" :
78
+ compressed_document = [Document (page_content = pd .read_csv (source ), metadata = {
79
+ "source" : "xml"
80
+ })]
81
+ elif self .input == "xml" :
82
+ with open (source , 'r' , encoding = 'utf-8' ) as f :
83
+ data = f .read ()
84
+ compressed_document = [Document (page_content = data , metadata = {
85
+ "source" : "xml"
86
+ })]
75
87
elif self .input == "pdf_dir" :
76
88
pass
77
89
@@ -82,7 +94,7 @@ def execute(self, state):
82
94
83
95
else :
84
96
if self .node_config is not None and self .node_config .get ("endpoint" ) is not None :
85
-
97
+
86
98
loader = AsyncChromiumLoader (
87
99
[source ],
88
100
proxies = {"http" : self .node_config ["endpoint" ]},
0 commit comments