Skip to content

Commit 7a13a68

Browse files
committed
feat: refactoring of rag node
1 parent dde0c7e commit 7a13a68

File tree

2 files changed

+18
-2
lines changed

2 files changed

+18
-2
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ docs/source/_static/
2323
venv/
2424
.venv/
2525
.vscode/
26+
.conda/
2627

2728
# exclude pdf, mp3
2829
*.pdf
@@ -38,3 +39,6 @@ lib/
3839
*.html
3940
.idea
4041

42+
# extras
43+
cache/
44+
run_smart_scraper.py

scrapegraphai/nodes/rag_node.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
from typing import List, Optional
6+
import os
67

78
from langchain.docstore.document import Document
89
from langchain.retrievers import ContextualCompressionRetriever
@@ -98,7 +99,18 @@ def execute(self, state: dict) -> dict:
9899
)
99100
embeddings = self.embedder_model
100101

101-
retriever = FAISS.from_documents(chunked_docs, embeddings).as_retriever()
102+
#------
103+
index = FAISS.from_documents(chunked_docs, embeddings)
104+
# Define the folder name
105+
folder_name = "cache"
106+
# Check if the folder exists, if not, create it
107+
if not os.path.exists(folder_name):
108+
os.makedirs(folder_name)
109+
# Save the index to the folder
110+
index.save_local(folder_name)
111+
112+
retriever = index.as_retriever()
113+
#------
102114

103115
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
104116
# similarity_threshold could be set, now k=20
@@ -121,4 +133,4 @@ def execute(self, state: dict) -> dict:
121133
self.logger.info("--- (tokens compressed and vector stored) ---")
122134

123135
state.update({self.output[0]: compressed_docs})
124-
return state
136+
return state

0 commit comments

Comments
 (0)