ScrapeGraphAI
diff --git a/‎.gitignore
Lines changed: 2 additions & 1 deletion b/‎.gitignore
Lines changed: 2 additions & 1 deletion
diff --git a/‎CHANGELOG.md
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/azure/smart_scraper_azure_openai.py
Lines changed: 63 additions & 0 deletions b/‎examples/azure/smart_scraper_azure_openai.py
Lines changed: 63 additions & 0 deletions
diff --git a/‎examples/gemini/csv_scraper_gemini.py
Lines changed: 60 additions & 0 deletions b/‎examples/gemini/csv_scraper_gemini.py
Lines changed: 60 additions & 0 deletions
diff --git a/‎examples/gemini/inputs/username.csv
Lines changed: 7 additions & 0 deletions b/‎examples/gemini/inputs/username.csv
Lines changed: 7 additions & 0 deletions
diff --git a/‎examples/gemini/scrape_xml_gemini.py
Lines changed: 1 addition & 0 deletions b/‎examples/gemini/scrape_xml_gemini.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/local_models/Docker/csv_scraper_docker.py
Lines changed: 54 additions & 0 deletions b/‎examples/local_models/Docker/csv_scraper_docker.py
Lines changed: 54 additions & 0 deletions
diff --git a/‎examples/local_models/Docker/inputs/username.csv
Lines changed: 7 additions & 0 deletions b/‎examples/local_models/Docker/inputs/username.csv
Lines changed: 7 additions & 0 deletions
diff --git a/‎examples/local_models/Ollama/csv_scraper_ollama.py
Lines changed: 56 additions & 0 deletions b/‎examples/local_models/Ollama/csv_scraper_ollama.py
Lines changed: 56 additions & 0 deletions
diff --git a/‎examples/local_models/Ollama/inputs/username.csv
Lines changed: 7 additions & 0 deletions b/‎examples/local_models/Ollama/inputs/username.csv
Lines changed: 7 additions & 0 deletions
diff --git a/‎examples/openai/csv_scraper_openai.py
Lines changed: 53 additions & 0 deletions b/‎examples/openai/csv_scraper_openai.py
Lines changed: 53 additions & 0 deletions
diff --git a/‎examples/openai/inputs/username.csv
Lines changed: 7 additions & 0 deletions b/‎examples/openai/inputs/username.csv
Lines changed: 7 additions & 0 deletions
diff --git a/‎examples/openai/scrape_plain_text_openai.py
Lines changed: 1 addition & 0 deletions b/‎examples/openai/scrape_plain_text_openai.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎pyproject.toml
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml
Lines changed: 1 addition & 0 deletions
diff --git a/‎scrapegraphai/graphs/__init__.py
Lines changed: 1 addition & 0 deletions b/‎scrapegraphai/graphs/__init__.py
Lines changed: 1 addition & 0 deletions
@@ -28,7 +28,8 @@ venv/
 *.sqlite
 *.google-cookie
 examples/graph_examples/ScrapeGraphAI_generated_graph
-examples/**/*.csv
+examples/**/result.csv
+examples/**/result.json
 main.py
 poetry.lock
 
 
@@ -55,6 +55,7 @@
 * **release:** 0.5.0-beta.5 [skip ci] ([5ac97e2](https://github.com/VinciGit00/Scrapegraph-ai/commit/5ac97e2fb321be40c9787fbf8cb53fa62cf0ce06))
 * **release:** 0.5.0-beta.6 [skip ci] ([9356124](https://github.com/VinciGit00/Scrapegraph-ai/commit/9356124ce39568e88f7d2965181579c4ff0a5752))
 
+
 ## [0.5.0-beta.6](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.5.0-beta.5...v0.5.0-beta.6) (2024-04-30)
 
 
 
@@ -0,0 +1,63 @@
+""" 
+Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
+"""
+
+import os
+from dotenv import load_dotenv
+from langchain_openai import AzureChatOpenAI
+from langchain_openai import AzureOpenAIEmbeddings
+from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+
+
+## required environment variable in .env
+# AZURE_OPENAI_ENDPOINT
+# AZURE_OPENAI_CHAT_DEPLOYMENT_NAME
+# MODEL_NAME
+# AZURE_OPENAI_API_KEY
+# OPENAI_API_TYPE
+# AZURE_OPENAI_API_VERSION
+# AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME
+load_dotenv()
+
+
+# ************************************************
+# Initialize the model instances
+# ************************************************
+
+llm_model_instance = AzureChatOpenAI(
+    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+    azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"]
+)
+
+embedder_model_instance = AzureOpenAIEmbeddings(
+    azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME"],
+    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+)
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+graph_config = {
+    "llm": {"model_instance": llm_model_instance},
+    "embeddings": {"model_instance": embedder_model_instance}
+}
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time, event_end_date, event_end_time, location, event_mode, event_category, third_party_redirect, no_of_days, 
+time_in_hours, hosted_or_attending, refreshments_type,  registration_available, registration_link",
+    # also accepts a string with the already downloaded HTML code
+    source="https://www.hmhco.com/event",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
@@ -0,0 +1,60 @@
+"""
+Basic example of scraping pipeline using CSVScraperGraph from CSV documents
+"""
+
+import os
+from dotenv import load_dotenv
+import pandas as pd
+from scrapegraphai.graphs import CSVScraperGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+load_dotenv()
+
+# ************************************************
+# Read the csv file
+# ************************************************
+
+text = pd.read_csv("inputs/username.csv")
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "model": "ollama/mistral",
+        "temperature": 0,
+        "format": "json",  # Ollama needs the format to be specified explicitly
+        # "model_tokens": 2000, # set context length arbitrarily
+        "base_url": "http://localhost:11434",
+    },
+    "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        "base_url": "http://localhost:11434",
+    }
+}
+
+# ************************************************
+# Create the CSVScraperGraph instance and run it
+# ************************************************
+
+csv_scraper_graph = CSVScraperGraph(
+    prompt="List me all the last names",
+    source=str(text),  # Pass the content of the file, not the file object
+    config=graph_config
+)
+
+result = csv_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = csv_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json or csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
@@ -0,0 +1,7 @@
+Username; Identifier;First name;Last name
+booker12;9012;Rachel;Booker
+grey07;2070;Laura;Grey
+johnson81;4081;Craig;Johnson
+jenkins46;9346;Mary;Jenkins
+smith79;5079;Jamie;Smith
+
@@ -6,6 +6,7 @@
 from dotenv import load_dotenv
 from scrapegraphai.graphs import SmartScraperGraph
 from scrapegraphai.utils import prettify_exec_info
+
 load_dotenv()
 
 # ************************************************
 
@@ -0,0 +1,54 @@
+"""
+Basic example of scraping pipeline using CSVScraperGraph from CSV documents
+"""
+
+import pandas as pd
+from scrapegraphai.graphs import CSVScraperGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Read the csv file
+# ************************************************
+
+text = pd.read_csv("inputs/username.csv")
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "model": "ollama/mistral",
+        "temperature": 0,
+        "format": "json",  # Ollama needs the format to be specified explicitly
+        # "model_tokens": 2000, # set context length arbitrarily
+    },
+    "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+    }
+}
+
+# ************************************************
+# Create the CSVScraperGraph instance and run it
+# ************************************************
+
+csv_scraper_graph = CSVScraperGraph(
+    prompt="List me all the last names",
+    source=str(text),  # Pass the content of the file, not the file object
+    config=graph_config
+)
+
+result = csv_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = csv_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json or csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
@@ -0,0 +1,7 @@
+Username; Identifier;First name;Last name
+booker12;9012;Rachel;Booker
+grey07;2070;Laura;Grey
+johnson81;4081;Craig;Johnson
+jenkins46;9346;Mary;Jenkins
+smith79;5079;Jamie;Smith
+
@@ -0,0 +1,56 @@
+"""
+Basic example of scraping pipeline using CSVScraperGraph from CSV documents
+"""
+
+import pandas as pd
+from scrapegraphai.graphs import CSVScraperGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Read the csv file
+# ************************************************
+
+text = pd.read_csv("inputs/username.csv")
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "model": "ollama/mistral",
+        "temperature": 0,
+        "format": "json",  # Ollama needs the format to be specified explicitly
+        # "model_tokens": 2000, # set context length arbitrarily
+        "base_url": "http://localhost:11434",
+    },
+    "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        "base_url": "http://localhost:11434",
+    }
+}
+
+# ************************************************
+# Create the CSVScraperGraph instance and run it
+# ************************************************
+
+csv_scraper_graph = CSVScraperGraph(
+    prompt="List me all the last names",
+    source=str(text),  # Pass the content of the file, not the file object
+    config=graph_config
+)
+
+result = csv_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = csv_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json or csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
@@ -0,0 +1,7 @@
+Username; Identifier;First name;Last name
+booker12;9012;Rachel;Booker
+grey07;2070;Laura;Grey
+johnson81;4081;Craig;Johnson
+jenkins46;9346;Mary;Jenkins
+smith79;5079;Jamie;Smith
+
@@ -0,0 +1,53 @@
+"""
+Basic example of scraping pipeline using CSVScraperGraph from CSV documents
+"""
+
+import os
+from dotenv import load_dotenv
+import pandas as pd
+from scrapegraphai.graphs import CSVScraperGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+load_dotenv()
+# ************************************************
+# Read the csv file
+# ************************************************
+
+text = pd.read_csv("inputs/username.csv")
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+openai_key = os.getenv("OPENAI_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key": openai_key,
+        "model": "gpt-3.5-turbo",
+    },
+}
+
+# ************************************************
+# Create the CSVScraperGraph instance and run it
+# ************************************************
+
+csv_scraper_graph = CSVScraperGraph(
+    prompt="List me all the last names",
+    source=str(text),  # Pass the content of the file, not the file object
+    config=graph_config
+)
+
+result = csv_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = csv_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json or csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
@@ -0,0 +1,7 @@
+Username; Identifier;First name;Last name
+booker12;9012;Rachel;Booker
+grey07;2070;Laura;Grey
+johnson81;4081;Craig;Johnson
+jenkins46;9346;Mary;Jenkins
+smith79;5079;Jamie;Smith
+
@@ -6,6 +6,7 @@
 from dotenv import load_dotenv
 from scrapegraphai.graphs import SmartScraperGraph
 from scrapegraphai.utils import prettify_exec_info
+
 load_dotenv()
 
 # ************************************************
 
@@ -3,6 +3,7 @@ name = "scrapegraphai"
 
 version = "0.5.2"
 
+
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [
     "Marco Vinciguerra <[email protected]>",
 
@@ -9,3 +9,4 @@
 from .script_creator_graph import ScriptCreatorGraph
 from .xml_scraper_graph import XMLScraperGraph
 from .json_scraper_graph import JSONScraperGraph
+from .csv_scraper_graph import CSVScraperGraph