|
9 | 9 | from langchain.prompts import PromptTemplate
|
10 | 10 | from langchain.output_parsers import CommaSeparatedListOutputParser
|
11 | 11 |
|
12 |
| -from .base_node import BaseNode |
13 | 12 | from langchain.output_parsers import CommaSeparatedListOutputParser
|
14 | 13 | from langchain.prompts import PromptTemplate
|
15 | 14 | from langchain_community.document_loaders import AsyncChromiumLoader
|
|
18 | 17 | from ..utils.logging import get_logger
|
19 | 18 | from .base_node import BaseNode
|
20 | 19 |
|
21 |
| - |
22 | 20 | class RobotsNode(BaseNode):
|
23 | 21 | """
|
24 | 22 | A node responsible for checking if a website is scrapeable or not based on the robots.txt file.
|
@@ -48,13 +46,14 @@ def __init__(
|
48 | 46 | output: List[str],
|
49 | 47 | node_config: Optional[dict] = None,
|
50 | 48 | node_name: str = "Robots",
|
51 |
| - |
52 | 49 | ):
|
53 | 50 | super().__init__(node_name, "node", input, output, 1)
|
54 | 51 |
|
55 | 52 | self.llm_model = node_config["llm_model"]
|
56 | 53 |
|
57 |
| - self.force_scraping = False if node_config is None else node_config.get("force_scraping", False) |
| 54 | + self.force_scraping = ( |
| 55 | + False if node_config is None else node_config.get("force_scraping", False) |
| 56 | + ) |
58 | 57 | self.verbose = (
|
59 | 58 | True if node_config is None else node_config.get("verbose", False)
|
60 | 59 | )
|
@@ -111,14 +110,11 @@ def execute(self, state: dict) -> dict:
|
111 | 110 | base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
112 | 111 | loader = AsyncChromiumLoader(f"{base_url}/robots.txt")
|
113 | 112 | document = loader.load()
|
114 |
| - if "ollama" in self.llm_model["model_name"]: |
115 |
| - self.llm_model["model_name"] = self.llm_model["model_name"].split("/")[ |
116 |
| - -1 |
117 |
| - ] |
118 |
| - model = self.llm_model["model_name"].split("/")[-1] |
119 |
| - |
| 113 | + if "ollama" in self.llm_model.model_name: |
| 114 | + self.llm_model.model_name = self.llm_model.model_name.split("/")[-1] |
| 115 | + model = self.llm_model.model_name.split("/")[-1] |
120 | 116 | else:
|
121 |
| - model = self.llm_model["model_name"] |
| 117 | + model = self.llm_model.model_name |
122 | 118 | try:
|
123 | 119 | agent = robots_dictionary[model]
|
124 | 120 |
|
|
0 commit comments