refactoring of the nodes

VinciGit00 · VinciGit00 · commit c72c077eb6bf · 2024-08-19T01:15:45.000+02:00
diff --git a/scrapegraphai/graphs/screenshot_scraper_graph.py b/scrapegraphai/graphs/screenshot_scraper_graph.py
@@ -1,22 +1,32 @@
+""" 
+ScreenshotScraperGraph Module 
 """
-ScreenshotScraperGraph Module
-"""
-
 from typing import Optional
 import logging
 from pydantic import BaseModel
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
+from ..nodes import ( FetchScreenNode, GenerateAnswerFromImageNode, )
 
-from ..nodes import (
-    FetchScreenNode,
-    GenerateAnswerFromImageNode,
-)
+class ScreenshotScraperGraph(AbstractGraph): 
+    """ 
+    A graph instance representing the web scraping workflow for images.
 
-class ScreenshotScraperGraph(AbstractGraph):
-    """
-   smart_scraper.run()
-        )
+    Attributes:
+        prompt (str): The input text to be scraped.
+        config (dict): Configuration parameters for the graph.
+        source (str): The source URL or image link to scrape from.
+
+    Methods:
+        __init__(prompt: str, source: str, config: dict, schema: Optional[BaseModel] = None)
+            Initializes the ScreenshotScraperGraph instance with the given prompt, 
+            source, and configuration parameters.
+
+        _create_graph()
+            Creates a graph of nodes representing the web scraping workflow for images.
+
+        run()
+            Executes the scraping process and returns the answer to the prompt.
     """
 
     def __init__(self, prompt: str, source: str, config: dict, schema: Optional[BaseModel] = None):
@@ -25,10 +35,10 @@ def __init__(self, prompt: str, source: str, config: dict, schema: Optional[Base
 
     def _create_graph(self) -> BaseGraph:
         """
-        Creates the graph of nodes representing the workflow for web scraping.
+        Creates the graph of nodes representing the workflow for web scraping with images.
 
         Returns:
-            BaseGraph: A graph instance representing the web scraping workflow.
+            BaseGraph: A graph instance representing the web scraping workflow for images.
         """
         fetch_screen_node = FetchScreenNode(
             input="url",
@@ -38,8 +48,8 @@ def _create_graph(self) -> BaseGraph:
             }
         )
         generate_answer_from_image_node = GenerateAnswerFromImageNode(
-            input="doc",
-            output=["parsed_doc"],
+            input="imgs",
+            output=["answer"],
             node_config={
                 "config": self.config
             }
diff --git a/scrapegraphai/nodes/fetch_screen_node.py b/scrapegraphai/nodes/fetch_screen_node.py
@@ -25,8 +25,6 @@ def execute(self, state: dict) -> dict:
         Captures screenshots from the input URL and stores them in the state dictionary as bytes.
         """
 
-        screenshots = []
-
         with sync_playwright() as p:
             browser = p.chromium.launch()
             page = browser.new_page()
@@ -49,10 +47,7 @@ def capture_screenshot(scroll_position, counter):
 
             browser.close()
 
-        for screenshot_data in screenshot_data_list:
-            screenshots.append(screenshot_data)
-
         state["link"] = self.url
-        state['screenshots'] = screenshots
+        state['screenshots'] = screenshot_data_list
 
         return state
diff --git a/scrapegraphai/nodes/generate_answer_from_image_node.py b/scrapegraphai/nodes/generate_answer_from_image_node.py
@@ -31,10 +31,10 @@ def execute(self, state: dict) -> dict:
 
         api_key = self.node_config.get("config", {}).get("llm", {}).get("api_key", "")
 
-        supported_models = ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo"]
+        supported_models = ("gpt-4o", "gpt-4o-mini", "gpt-4-turbo")
 
         if self.node_config["config"]["llm"]["model"] not in supported_models:
-            raise ValueError(f"""Model '{self.node_config['config']['llm']['model']}' 
+            raise ValueError(f"""Model '{self.node_config['config']['llm']['model']}'
                              is not supported. Supported models are: 
                              {', '.join(supported_models)}.""")
 
@@ -47,7 +47,7 @@ def execute(self, state: dict) -> dict:
             }
 
             payload = {
-                "model": "gpt-4o-mini",
+                "model": self.node_config["config"]["llm"]["model"],
                 "messages": [
                     {
                         "role": "user",
@@ -72,7 +72,7 @@ def execute(self, state: dict) -> dict:
             response = requests.post("https://api.openai.com/v1/chat/completions",
                                      headers=headers,
                                      json=payload,
-                                     timeout=10 )
+                                     timeout=10)
             result = response.json()
 
             response_text = result.get('choices',