Skip to content

Code Generator functionalities and graph #698

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 36 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
280dd53
Code generatot graph creation
vedovati-matteo Sep 12, 2024
9862425
JsonDescriptorNode created
vedovati-matteo Sep 12, 2024
42318a1
Creation of PromptRefiner
vedovati-matteo Sep 12, 2024
2a760a1
initial promptrefiner prompt
vedovati-matteo Sep 12, 2024
545970c
html_reduction script
vedovati-matteo Sep 13, 2024
330c22f
Update prompt_refiner_node.py
vedovati-matteo Sep 19, 2024
a2490e3
html analyzer node added
vedovati-matteo Sep 19, 2024
470e768
Update code generator graph
vedovati-matteo Sep 19, 2024
0f4b011
generate code node added
vedovati-matteo Sep 19, 2024
eb9c77c
code generator graph fixed
vedovati-matteo Sep 19, 2024
3ea1f20
Update fetch_node.py
VinciGit00 Sep 20, 2024
2616289
Merge pull request #2 from VinciGit00/pre/beta
vedovati-matteo Sep 20, 2024
5b579b3
gode generator v0.1
vedovati-matteo Sep 21, 2024
afa00d1
Reasoning loop created
vedovati-matteo Sep 21, 2024
3459066
Code generator updated version
vedovati-matteo Sep 21, 2024
f38c5e1
removed test code
vedovati-matteo Sep 21, 2024
2ff0f01
Added logs
vedovati-matteo Sep 21, 2024
657ef71
raise keyerror exception for the schema
VinciGit00 Sep 24, 2024
36a8a1c
refining and refactoring of the code
VinciGit00 Sep 24, 2024
5a3ece9
Merge branch 'pre/beta' into temp
VinciGit00 Sep 24, 2024
df90770
Update generate_code_node.py
vedovati-matteo Sep 24, 2024
84abaa6
Merge pull request #5 from ScrapeGraphAI/temp
vedovati-matteo Sep 24, 2024
cd55c92
Merge branch 'pre/beta' into pre/beta
vedovati-matteo Sep 24, 2024
0d77ba3
Merge pull request #3 from VinciGit00/pre/beta
vedovati-matteo Sep 24, 2024
04ac736
i don't like comments
VinciGit00 Sep 24, 2024
d38a501
Update html_analyzer_node.py
VinciGit00 Sep 24, 2024
54ebb39
Update generate_code_node.py
VinciGit00 Sep 24, 2024
9927397
Merge pull request #6 from VinciGit00/patch-3
vedovati-matteo Sep 24, 2024
b0d9b37
Merge pull request #7 from VinciGit00/patch-4
vedovati-matteo Sep 24, 2024
8578bf1
Merge pull request #8 from VinciGit00/patch-5
vedovati-matteo Sep 24, 2024
d6a7702
Validator fixed
vedovati-matteo Sep 24, 2024
2d2c719
Template refactoring
vedovati-matteo Sep 24, 2024
ce841e2
Code generation refactoring
vedovati-matteo Sep 24, 2024
bcf02e5
add possiibility to save the code
VinciGit00 Sep 24, 2024
04fdb5d
Merge pull request #9 from VinciGit00/pre/beta
vedovati-matteo Sep 25, 2024
fb87901
Add code generator examples
vedovati-matteo Sep 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions examples/anthropic/code_generator_graph_anthropic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""
Basic example of scraping pipeline using Code Generator with schema
"""

import os, json
from typing import List
from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph

load_dotenv()

# ************************************************
# Define the output schema for the graph
# ************************************************

class Project(BaseModel):
title: str = Field(description="The title of the project")
description: str = Field(description="The description of the project")

class Projects(BaseModel):
projects: List[Project]

# ************************************************
# Define the configuration for the graph
# ************************************************

anthropic_key = os.getenv("ANTHROPIC_API_KEY")

graph_config = {
"llm": {
"api_key":anthropic_key,
"model": "anthropic/claude-3-haiku-20240307",
},
"verbose": True,
"headless": False,
"reduction": 2,
"max_iterations": {
"overall": 10,
"syntax": 3,
"execution": 3,
"validation": 3,
"semantic": 3
},
"output_file_name": "extracted_data.py"
}

# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

code_generator_graph = CodeGeneratorGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/",
schema=Projects,
config=graph_config
)

result = code_generator_graph.run()
print(result)
58 changes: 58 additions & 0 deletions examples/azure/code_generator_graph_azure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""
Basic example of scraping pipeline using Code Generator with schema
"""

import os, json
from typing import List
from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph

load_dotenv()

# ************************************************
# Define the output schema for the graph
# ************************************************

class Project(BaseModel):
title: str = Field(description="The title of the project")
description: str = Field(description="The description of the project")

class Projects(BaseModel):
projects: List[Project]

# ************************************************
# Define the configuration for the graph
# ************************************************

graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-3.5-turbo",
},
"verbose": True,
"headless": False,
"reduction": 2,
"max_iterations": {
"overall": 10,
"syntax": 3,
"execution": 3,
"validation": 3,
"semantic": 3
},
"output_file_name": "extracted_data.py"
}

# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

code_generator_graph = CodeGeneratorGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/",
schema=Projects,
config=graph_config
)

result = code_generator_graph.run()
print(result)
60 changes: 60 additions & 0 deletions examples/bedrock/code_generator_graph_bedrock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""
Basic example of scraping pipeline using Code Generator with schema
"""

import os, json
from typing import List
from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph

load_dotenv()

# ************************************************
# Define the output schema for the graph
# ************************************************

class Project(BaseModel):
title: str = Field(description="The title of the project")
description: str = Field(description="The description of the project")

class Projects(BaseModel):
projects: List[Project]

# ************************************************
# Define the configuration for the graph
# ************************************************


graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
},
"verbose": True,
"headless": False,
"reduction": 2,
"max_iterations": {
"overall": 10,
"syntax": 3,
"execution": 3,
"validation": 3,
"semantic": 3
},
"output_file_name": "extracted_data.py"
}

# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

code_generator_graph = CodeGeneratorGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/",
schema=Projects,
config=graph_config
)

result = code_generator_graph.run()
print(result)
60 changes: 60 additions & 0 deletions examples/deepseek/code_generator_graph_deepseek.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""
Basic example of scraping pipeline using Code Generator with schema
"""

import os, json
from typing import List
from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph

load_dotenv()

# ************************************************
# Define the output schema for the graph
# ************************************************

class Project(BaseModel):
title: str = Field(description="The title of the project")
description: str = Field(description="The description of the project")

class Projects(BaseModel):
projects: List[Project]

# ************************************************
# Define the configuration for the graph
# ************************************************

deepseek_key = os.getenv("DEEPSEEK_APIKEY")

graph_config = {
"llm": {
"model": "deepseek/deepseek-chat",
"api_key": deepseek_key,
},
"verbose": True,
"headless": False,
"reduction": 2,
"max_iterations": {
"overall": 10,
"syntax": 3,
"execution": 3,
"validation": 3,
"semantic": 3
},
"output_file_name": "extracted_data.py"
}

# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

code_generator_graph = CodeGeneratorGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/",
schema=Projects,
config=graph_config
)

result = code_generator_graph.run()
print(result)
62 changes: 62 additions & 0 deletions examples/ernie/code_generator_graph_ernie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""
Basic example of scraping pipeline using Code Generator with schema
"""

import os, json
from typing import List
from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph

load_dotenv()

# ************************************************
# Define the output schema for the graph
# ************************************************

class Project(BaseModel):
title: str = Field(description="The title of the project")
description: str = Field(description="The description of the project")

class Projects(BaseModel):
projects: List[Project]

# ************************************************
# Define the configuration for the graph
# ************************************************

openai_key = os.getenv("OPENAI_APIKEY")

graph_config = {
"llm": {
"model": "ernie/ernie-bot-turbo",
"ernie_client_id": "<ernie_client_id>",
"ernie_client_secret": "<ernie_client_secret>",
"temperature": 0.1
},
"verbose": True,
"headless": False,
"reduction": 2,
"max_iterations": {
"overall": 10,
"syntax": 3,
"execution": 3,
"validation": 3,
"semantic": 3
},
"output_file_name": "extracted_data.py"
}

# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

code_generator_graph = CodeGeneratorGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/",
schema=Projects,
config=graph_config
)

result = code_generator_graph.run()
print(result)
60 changes: 60 additions & 0 deletions examples/fireworks/code_generator_graph_fireworks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""
Basic example of scraping pipeline using Code Generator with schema
"""

import os, json
from typing import List
from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph

load_dotenv()

# ************************************************
# Define the output schema for the graph
# ************************************************

class Project(BaseModel):
title: str = Field(description="The title of the project")
description: str = Field(description="The description of the project")

class Projects(BaseModel):
projects: List[Project]

# ************************************************
# Define the configuration for the graph
# ************************************************

fireworks_api_key = os.getenv("FIREWORKS_APIKEY")

graph_config = {
"llm": {
"api_key": fireworks_api_key,
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
},
"verbose": True,
"headless": False,
"reduction": 2,
"max_iterations": {
"overall": 10,
"syntax": 3,
"execution": 3,
"validation": 3,
"semantic": 3
},
"output_file_name": "extracted_data.py"
}

# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

code_generator_graph = CodeGeneratorGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/",
schema=Projects,
config=graph_config
)

result = code_generator_graph.run()
print(result)
Loading