webscraping-ai
diff --git a/‎.github/workflows/python.yml
Lines changed: 5 additions & 12 deletions b/‎.github/workflows/python.yml
Lines changed: 5 additions & 12 deletions
diff --git a/‎.gitlab-ci.yml
Lines changed: 3 additions & 3 deletions b/‎.gitlab-ci.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎.openapi-generator/VERSION
Lines changed: 1 addition & 1 deletion b/‎.openapi-generator/VERSION
Lines changed: 1 addition & 1 deletion
diff --git a/‎.travis.yml
Lines changed: 2 additions & 2 deletions b/‎.travis.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 17 additions & 17 deletions b/‎README.md
Lines changed: 17 additions & 17 deletions
@@ -13,26 +13,19 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install flake8 pytest
-          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-          if [ -f test-requirements.txt ]; then pip install -r test-requirements.txt; fi
-      - name: Lint with flake8
-        run: |
-          # stop the build if there are Python syntax errors or undefined names
-          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-          # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-          flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+          pip install -r requirements.txt
+          pip install -r test-requirements.txt
       - name: Test with pytest
         run: |
-          pytest
+          pytest --cov={{packageName}}
@@ -14,9 +14,6 @@ stages:
    - pip install -r test-requirements.txt
    - pytest --cov=webscraping_ai
 
-pytest-3.7:
-  extends: .pytest
-  image: python:3.7-alpine
 pytest-3.8:
   extends: .pytest
   image: python:3.8-alpine
@@ -29,3 +26,6 @@ pytest-3.10:
 pytest-3.11:
   extends: .pytest
   image: python:3.11-alpine
+pytest-3.12:
+  extends: .pytest
+  image: python:3.12-alpine
@@ -1 +1 @@
-7.2.0
+7.11.0
@@ -1,13 +1,13 @@
 # ref: https://docs.travis-ci.com/user/languages/python
 language: python
 python:
-  - "3.7"
   - "3.8"
   - "3.9"
   - "3.10"
   - "3.11"
+  - "3.12"
   # uncomment the following if needed
-  #- "3.11-dev"  # 3.11 development branch
+  #- "3.12-dev"  # 3.12 development branch
   #- "nightly"  # nightly build
 # command to install dependencies
 install:
 
@@ -1,16 +1,17 @@
 # webscraping-ai
-WebScraping.AI scraping API provides GPT-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
+WebScraping.AI scraping API provides LLM-powered tools with Chromium JavaScript rendering, rotating proxies, and built-in HTML parsing.
 
 This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project:
 
-- API version: 3.1.3
-- Package version: 3.1.3
+- API version: 3.2.0
+- Package version: 3.2.0
+- Generator version: 7.11.0
 - Build package: org.openapitools.codegen.languages.PythonClientCodegen
 For more information, please visit [https://webscraping.ai](https://webscraping.ai)
 
 ## Requirements.
 
-Python 3.7+
+Python 3.8+
 
 ## Installation & Usage
 ### pip install
@@ -51,7 +52,6 @@ Please follow the [installation procedure](#installation--usage) and then run th
 
 ```python
 
-import time
 import webscraping_ai
 from webscraping_ai.rest import ApiException
 from pprint import pprint
@@ -79,28 +79,27 @@ with webscraping_ai.ApiClient(configuration) as api_client:
     # Create an instance of the API class
     api_instance = webscraping_ai.AIApi(api_client)
     url = 'https://example.com' # str | URL of the target page.
-    question = 'What is the summary of this page content?' # str | Question or instructions to ask the LLM model about the target page. (optional)
-    context_limit = 4000 # int | Maximum number of tokens to use as context for the LLM model (4000 by default). (optional) (default to 4000)
-    response_tokens = 100 # int | Maximum number of tokens to return in the LLM model response. The total context size (context_limit) includes the question, the target page content and the response, so this parameter reserves tokens for the response (see also on_context_limit). (optional) (default to 100)
-    on_context_limit = 'error' # str | What to do if the context_limit parameter is exceeded (truncate by default). The context is exceeded when the target page content is too long. (optional) (default to 'error')
+    fields = {'key': '{\"title\":\"Main product title\",\"price\":\"Current product price\",\"description\":\"Full product description\"}'} # Dict[str, str] | Object describing fields to extract from the page and their descriptions
     headers = {'key': '{\"Cookie\":\"session=some_id\"}'} # Dict[str, str] | HTTP headers to pass to the target page. Can be specified either via a nested query parameter (...&headers[One]=value1&headers=[Another]=value2) or as a JSON encoded object (...&headers={\"One\": \"value1\", \"Another\": \"value2\"}). (optional)
     timeout = 10000 # int | Maximum web page retrieval time in ms. Increase it in case of timeout errors (10000 by default, maximum is 30000). (optional) (default to 10000)
     js = True # bool | Execute on-page JavaScript using a headless browser (true by default). (optional) (default to True)
     js_timeout = 2000 # int | Maximum JavaScript rendering time in ms. Increase it in case if you see a loading indicator instead of data on the target page. (optional) (default to 2000)
-    proxy = 'datacenter' # str | Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (optional) (default to 'datacenter')
-    country = 'us' # str | Country of the proxy to use (US by default). Only available on Startup and Custom plans. (optional) (default to 'us')
-    device = 'desktop' # str | Type of device emulation. (optional) (default to 'desktop')
+    wait_for = 'wait_for_example' # str | CSS selector to wait for before returning the page content. Useful for pages with dynamic content loading. Overrides js_timeout. (optional)
+    proxy = datacenter # str | Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default). Note that residential proxy requests are more expensive than datacenter, see the pricing page for details. (optional) (default to datacenter)
+    country = us # str | Country of the proxy to use (US by default). (optional) (default to us)
+    custom_proxy = 'custom_proxy_example' # str | Your own proxy URL to use instead of our built-in proxy pool in \"http://user:password@host:port\" format (<a target=\"_blank\" href=\"https://webscraping.ai/proxies/smartproxy\">Smartproxy</a> for example). (optional)
+    device = desktop # str | Type of device emulation. (optional) (default to desktop)
     error_on_404 = False # bool | Return error on 404 HTTP status on the target page (false by default). (optional) (default to False)
     error_on_redirect = False # bool | Return error on redirect on the target page (false by default). (optional) (default to False)
-    js_script = 'document.querySelector('button').click();' # str | Custom JavaScript code to execute on the target page. (optional)
+    js_script = 'document.querySelector(\'button\').click();' # str | Custom JavaScript code to execute on the target page. (optional)
 
     try:
-        # Get an answer to a question about a given web page
-        api_response = api_instance.get_question(url, question=question, context_limit=context_limit, response_tokens=response_tokens, on_context_limit=on_context_limit, headers=headers, timeout=timeout, js=js, js_timeout=js_timeout, proxy=proxy, country=country, device=device, error_on_404=error_on_404, error_on_redirect=error_on_redirect, js_script=js_script)
-        print("The response of AIApi->get_question:\n")
+        # Extract structured data fields from a web page
+        api_response = api_instance.get_fields(url, fields, headers=headers, timeout=timeout, js=js, js_timeout=js_timeout, wait_for=wait_for, proxy=proxy, country=country, custom_proxy=custom_proxy, device=device, error_on_404=error_on_404, error_on_redirect=error_on_redirect, js_script=js_script)
+        print("The response of AIApi->get_fields:\n")
         pprint(api_response)
     except ApiException as e:
-        print("Exception when calling AIApi->get_question: %s\n" % e)
+        print("Exception when calling AIApi->get_fields: %s\n" % e)
 
 ```
 
@@ -110,6 +109,7 @@ All URIs are relative to *https://api.webscraping.ai*
 
 Class | Method | HTTP request | Description
 ------------ | ------------- | ------------- | -------------
+*AIApi* | [**get_fields**](docs/AIApi.md#get_fields) | **GET** /ai/fields | Extract structured data fields from a web page
 *AIApi* | [**get_question**](docs/AIApi.md#get_question) | **GET** /ai/question | Get an answer to a question about a given web page
 *AccountApi* | [**account**](docs/AccountApi.md#account) | **GET** /account | Information about your account calls quota
 *HTMLApi* | [**get_html**](docs/HTMLApi.md#get_html) | **GET** /html | Page HTML by URL