Skip to content

fix: deepcopy fail for coping model_instance config #613

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions scrapegraphai/graphs/csv_scraper_multi_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@
CSVScraperMultiGraph Module
"""

from copy import copy, deepcopy
from typing import List, Optional
from pydantic import BaseModel


from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from .csv_scraper_graph import CSVScraperGraph
from ..nodes import (
GraphIteratorNode,
MergeAnswersNode
)
from ..utils.copy import safe_deepcopy

class CSVScraperMultiGraph(AbstractGraph):
"""
Expand Down Expand Up @@ -46,10 +48,7 @@ def __init__(self, prompt: str, source: List[str],

self.max_results = config.get("max_results", 3)

if all(isinstance(value, str) for value in config.values()):
self.copy_config = copy(config)
else:
self.copy_config = deepcopy(config)
self.copy_config = safe_deepcopy(config)

super().__init__(prompt, config, source, schema)

Expand Down
9 changes: 4 additions & 5 deletions scrapegraphai/graphs/json_scraper_multi_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@
JSONScraperMultiGraph Module
"""

from copy import copy, deepcopy
from copy import deepcopy
from typing import List, Optional
from pydantic import BaseModel

from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from .json_scraper_graph import JSONScraperGraph
from ..nodes import (
GraphIteratorNode,
MergeAnswersNode
)
from ..utils.copy import safe_deepcopy

class JSONScraperMultiGraph(AbstractGraph):
"""
Expand Down Expand Up @@ -45,10 +47,7 @@ def __init__(self, prompt: str, source: List[str], config: dict, schema: Optiona

self.max_results = config.get("max_results", 3)

if all(isinstance(value, str) for value in config.values()):
self.copy_config = copy(config)
else:
self.copy_config = deepcopy(config)
self.copy_config = safe_deepcopy(config)

self.copy_schema = deepcopy(schema)

Expand Down
7 changes: 2 additions & 5 deletions scrapegraphai/graphs/markdown_scraper_multi_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
GraphIteratorNode,
MergeAnswersNode
)
from ..utils.copy import safe_deepcopy

class MDScraperMultiGraph(AbstractGraph):
"""
Expand Down Expand Up @@ -42,11 +43,7 @@ class MDScraperMultiGraph(AbstractGraph):
"""

def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
if all(isinstance(value, str) for value in config.values()):
self.copy_config = copy(config)
else:
self.copy_config = deepcopy(config)

self.copy_config = safe_deepcopy(config)
self.copy_schema = deepcopy(schema)

super().__init__(prompt, config, source, schema)
Expand Down
8 changes: 3 additions & 5 deletions scrapegraphai/graphs/omni_search_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
OmniSearchGraph Module
"""

from copy import copy, deepcopy
from copy import deepcopy
from typing import Optional
from pydantic import BaseModel

Expand All @@ -15,6 +15,7 @@
GraphIteratorNode,
MergeAnswersNode
)
from ..utils.copy import safe_deepcopy


class OmniSearchGraph(AbstractGraph):
Expand Down Expand Up @@ -48,10 +49,7 @@ def __init__(self, prompt: str, config: dict, schema: Optional[BaseModel] = None

self.max_results = config.get("max_results", 3)

if all(isinstance(value, str) for value in config.values()):
self.copy_config = copy(config)
else:
self.copy_config = deepcopy(config)
self.copy_config = safe_deepcopy(config)

self.copy_schema = deepcopy(schema)

Expand Down
8 changes: 3 additions & 5 deletions scrapegraphai/graphs/pdf_scraper_multi_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
PdfScraperMultiGraph Module
"""

from copy import copy, deepcopy
from copy import deepcopy
from typing import List, Optional
from pydantic import BaseModel
from .base_graph import BaseGraph
Expand All @@ -12,6 +12,7 @@
GraphIteratorNode,
MergeAnswersNode
)
from ..utils.copy import safe_deepcopy

class PdfScraperMultiGraph(AbstractGraph):
"""
Expand Down Expand Up @@ -44,10 +45,7 @@ class PdfScraperMultiGraph(AbstractGraph):
def __init__(self, prompt: str, source: List[str],
config: dict, schema: Optional[BaseModel] = None):

if all(isinstance(value, str) for value in config.values()):
self.copy_config = copy(config)
else:
self.copy_config = deepcopy(config)
self.copy_config = safe_deepcopy(config)

self.copy_schema = deepcopy(schema)

Expand Down
7 changes: 2 additions & 5 deletions scrapegraphai/graphs/script_creator_multi_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
ScriptCreatorMultiGraph Module
"""

from copy import copy, deepcopy
from typing import List, Optional

from pydantic import BaseModel
Expand All @@ -15,6 +14,7 @@
GraphIteratorNode,
MergeGeneratedScriptsNode
)
from ..utils.copy import safe_deepcopy

class ScriptCreatorMultiGraph(AbstractGraph):
"""
Expand Down Expand Up @@ -47,10 +47,7 @@ def __init__(self, prompt: str, source: List[str], config: dict, schema: Optiona

self.max_results = config.get("max_results", 3)

if all(isinstance(value, str) for value in config.values()):
self.copy_config = copy(config)
else:
self.copy_config = deepcopy(config)
self.copy_config = safe_deepcopy(config)

super().__init__(prompt, config, source, schema)

Expand Down
8 changes: 3 additions & 5 deletions scrapegraphai/graphs/search_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
SearchGraph Module
"""

from copy import copy, deepcopy
from copy import deepcopy
from typing import Optional, List
from pydantic import BaseModel

Expand All @@ -15,6 +15,7 @@
GraphIteratorNode,
MergeAnswersNode
)
from ..utils.copy import safe_deepcopy

class SearchGraph(AbstractGraph):
"""
Expand Down Expand Up @@ -47,10 +48,7 @@ class SearchGraph(AbstractGraph):
def __init__(self, prompt: str, config: dict, schema: Optional[BaseModel] = None):
self.max_results = config.get("max_results", 3)

if all(isinstance(value, str) for value in config.values()):
self.copy_config = copy(config)
else:
self.copy_config = deepcopy(config)
self.copy_config = safe_deepcopy(config)
self.copy_schema = deepcopy(schema)
self.considered_urls = [] # New attribute to store URLs

Expand Down
8 changes: 3 additions & 5 deletions scrapegraphai/graphs/smart_scraper_multi_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
SmartScraperMultiGraph Module
"""

from copy import copy, deepcopy
from copy import deepcopy
from typing import List, Optional
from pydantic import BaseModel

Expand All @@ -14,6 +14,7 @@
GraphIteratorNode,
MergeAnswersNode
)
from ..utils.copy import safe_deepcopy

class SmartScraperMultiGraph(AbstractGraph):
"""
Expand Down Expand Up @@ -48,10 +49,7 @@ def __init__(self, prompt: str, source: List[str],

self.max_results = config.get("max_results", 3)

if all(isinstance(value, str) for value in config.values()):
self.copy_config = copy(config)
else:
self.copy_config = deepcopy(config)
self.copy_config = safe_deepcopy(config)

self.copy_schema = deepcopy(schema)

Expand Down
8 changes: 3 additions & 5 deletions scrapegraphai/graphs/xml_scraper_multi_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
XMLScraperMultiGraph Module
"""

from copy import copy, deepcopy
from copy import deepcopy
from typing import List, Optional
from pydantic import BaseModel

Expand All @@ -14,6 +14,7 @@
GraphIteratorNode,
MergeAnswersNode
)
from ..utils.copy import safe_deepcopy

class XMLScraperMultiGraph(AbstractGraph):
"""
Expand Down Expand Up @@ -46,10 +47,7 @@ class XMLScraperMultiGraph(AbstractGraph):
def __init__(self, prompt: str, source: List[str],
config: dict, schema: Optional[BaseModel] = None):

if all(isinstance(value, str) for value in config.values()):
self.copy_config = copy(config)
else:
self.copy_config = deepcopy(config)
self.copy_config = safe_deepcopy(config)

self.copy_schema = deepcopy(schema)

Expand Down
75 changes: 75 additions & 0 deletions scrapegraphai/utils/copy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import copy
from typing import Any, Dict, Optional
from pydantic.v1 import BaseModel

class DeepCopyError(Exception):
"""Custom exception raised when an object cannot be deep-copied."""
pass

def safe_deepcopy(obj: Any) -> Any:
"""
Attempts to create a deep copy of the object using `copy.deepcopy`
whenever possible. If that fails, it falls back to custom deep copy
logic. If that also fails, it raises a `DeepCopyError`.

Args:
obj (Any): The object to be copied, which can be of any type.

Returns:
Any: A deep copy of the object if possible; otherwise, a shallow
copy if deep copying fails; if neither is possible, the original
object is returned.
Raises:
DeepCopyError: If the object cannot be deep-copied or shallow-copied.
"""

try:

# Try to use copy.deepcopy first
return copy.deepcopy(obj)
except (TypeError, AttributeError) as e:
# If deepcopy fails, handle specific types manually

# Handle dictionaries
if isinstance(obj, dict):
new_obj = {}

for k, v in obj.items():
new_obj[k] = safe_deepcopy(v)
return new_obj

# Handle lists
elif isinstance(obj, list):
new_obj = []

for v in obj:
new_obj.append(safe_deepcopy(v))
return new_obj

# Handle tuples (immutable, but might contain mutable objects)
elif isinstance(obj, tuple):
new_obj = tuple(safe_deepcopy(v) for v in obj)

return new_obj

# Handle frozensets (immutable, but might contain mutable objects)
elif isinstance(obj, frozenset):
new_obj = frozenset(safe_deepcopy(v) for v in obj)
return new_obj

# Handle objects with attributes
elif hasattr(obj, "__dict__"):
# If an object cannot be deep copied, then the sub-properties of \
# the object will not be analyzed and shallow copy will be used directly.
try:
return copy.copy(obj)
except (TypeError, AttributeError):
raise DeepCopyError(f"Cannot deep copy the object of type {type(obj)}") from e


# Attempt shallow copy as a fallback
try:
return copy.copy(obj)
except (TypeError, AttributeError):
raise DeepCopyError(f"Cannot deep copy the object of type {type(obj)}") from e

Loading
Loading