Fix numpy version and remove scipy optional (#347)

tylerhutcherson · web-flow · commit defa62a3f291 · 2025-06-05T13:51:03.000-04:00
Original goal of the task was to relax the forced `numpy` version constraint that was causing issues in Python 3.11 environments where numpy 2.x.x was required (like Google Colab) ![Screenshot 2025-06-03 at 1 58 50 PM](https://github.com/user-attachments/assets/d1f1bfca-6a81-4cad-8669-e379fca4ba75) ## Problem However, I quickly ran into dependency chain issues with: - `scipy` -- the scientific computing stack is a bit like a dinosaur still - `boto3` -- always an issue - `ranx` -- not really sure why, but this one has 13+ separate dependencies and includes a pin on `numba` that expects certain things from `numpy` and then it gets easily circular ## Solution: Working together with claude, I was able to resolve and rebuild the environment and the lockfile with one major change. - `ranx` is still an optional, but it's not an explicit extra. - What does that mean? Literally only means that if devs are using `ranx` they have to install it with `pip install ranx` instead of `pip install redisvl[ranx]` This could be some kind of limitation with the poetry version I was using. But I spent 6 hours on this with Claude and probably spent 50$ in credits on working through an onion of python environment. It now works and our issues on google colab are gone. I also think the package ranges are a bit more generous now.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -52,6 +52,7 @@ jobs:
       - name: Install dependencies
         run: |
           poetry install --all-extras
+          poetry run pip install ranx
 
       - name: Authenticate to Google Cloud
         uses: google-github-actions/auth@v1
@@ -116,19 +117,19 @@ jobs:
       - name: Install dependencies
         run: |
           poetry install --all-extras
+          poetry run pip install ranx
 
-      - name: Install specific redis-py version
-        run: |
+          # Install right redis version based on redis py
           if [[ "${{ matrix.redis-py-version }}" == "5.x" ]]; then
             poetry add "redis>=5.0.0,<6.0.0"
           else
             poetry add "redis>=6.0.0,<7.0.0"
           fi
 
-      - name: Install hiredis if needed
-        if: matrix.connection == 'hiredis'
-        run: |
-          poetry add hiredis
+          # Install hiredis if selected
+          if [[ "${{ matrix.connection }}" == "hiredis" ]]; then
+              poetry add hiredis
+          fi
 
       - name: Set Redis image name
         run: |
@@ -179,7 +180,7 @@ jobs:
           if [[ "${{ matrix.python-version }}" > "3.9" ]]; then
             make test-notebooks
           else
-            poetry run test-notebooks --ignore ./docs/user_guide/09_threshold_optimization.ipynb --ignore ./docs/user_guide/release_guide/0_5_0_release.ipynb
+            poetry run test-notebooks --ignore ./docs/user_guide/09_threshold_optimization.ipynb --ignore ./docs/user_guide/release_guide/0_5_1_release.ipynb
           fi
 
   docs:
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,42 +21,39 @@ packages = [{ include = "redisvl", from = "." }]
 
 [tool.poetry.dependencies]
 python = ">=3.9,<3.14"
-numpy = [
-  { version = "^1", python = "<3.12" },
-  { version = ">=1.26.0,<3", python = ">=3.12" },
-]
+numpy = ">=1.26.0,<3"
 pyyaml = ">=5.4,<7.0"
 redis = ">=5.0,<7.0"
-pydantic = "^2"
+pydantic = ">=2,<3"
 tenacity = ">=8.2.2"
 ml-dtypes = ">=0.4.0,<1.0.0"
-python-ulid = "^3.0.0"
+python-ulid = ">=3.0.0"
+jsonpath-ng = ">=1.5.0"
 nltk = { version = "^3.8.1", optional = true }
-jsonpath-ng = "^1.5.0"
-openai = { version = "^1.13.0", optional = true }
-sentence-transformers = { version = "^3.4.0", optional = true }
-scipy = [
-  { version = "<1.15", python = "<3.10", optional = true },
-  { version = "^1.15", python = ">=3.10", optional = true }
-]
-google-cloud-aiplatform = { version = "^1.26", optional = true }
-protobuf = { version = "^5.29.1", optional = true }
+openai = { version = ">=1.1.0", optional = true }
+google-cloud-aiplatform = { version = ">=1.26,<2.0.0", optional = true }
+protobuf = { version = ">=5.28.0,<6.0.0", optional = true }
 cohere = { version = ">=4.44", optional = true }
 mistralai = { version = ">=1.0.0", optional = true }
 voyageai = { version = ">=0.2.2", optional = true }
-ranx = { version = "^0.3.0", python=">=3.10", optional = true }
-boto3 = {version = "1.36.0", optional = true, extras = ["bedrock"]}
+sentence-transformers = { version = "^3.4.0", optional = true }
+scipy = [
+  { version = ">=1.9.0,<1.14", python = "<3.10", optional = true },
+  { version = ">=1.14.0,<1.16", python = ">=3.10", optional = true }
+]
+boto3 = { version = "^1.36.0", optional = true }
+urllib3 = { version = "<2.2.0", optional = true }
+ranx = {version = "^0.3.20", optional = true}
 
 [tool.poetry.extras]
+mistralai = ["mistralai"]
 openai = ["openai"]
-sentence-transformers = ["sentence-transformers", "scipy"]
-vertexai = ["google_cloud_aiplatform", "protobuf"]
+nltk = ["nltk"]
 cohere = ["cohere"]
-mistralai = ["mistralai"]
 voyageai = ["voyageai"]
-ranx = ["ranx"]
-bedrock = ["boto3"]
-nltk = ["nltk"]
+sentence-transformers = ["sentence-transformers", "scipy"]
+vertexai = ["google-cloud-aiplatform", "protobuf"]
+bedrock = ["boto3", "urllib3"]
 
 [tool.poetry.group.dev.dependencies]
 black = "^25.1.0"
@@ -66,7 +63,7 @@ pytest = "^8.1.1"
 pytest-asyncio = "^0.23.6"
 pytest-xdist = {extras = ["psutil"], version = "^3.6.1"}
 pre-commit = "^4.1.0"
-mypy = "1.9.0"
+mypy = "^1.11.0"
 nbval = "^0.11.0"
 types-pyyaml = "*"
 types-pyopenssl = "*"
diff --git a/redisvl/redis/connection.py b/redisvl/redis/connection.py
@@ -357,6 +357,9 @@ def sync_to_async_redis(
                 "RedisCluster is not supported for sync-to-async conversion."
             )
 
+        # At this point, redis_client is guaranteed to be Redis type
+        assert isinstance(redis_client, Redis)  # Type narrowing for MyPy
+
         # pick the right connection class
         connection_class: Type[AsyncAbstractConnection] = (
             AsyncSSLConnection
diff --git a/redisvl/utils/optimize/cache.py b/redisvl/utils/optimize/cache.py
@@ -1,9 +1,15 @@
-from typing import Any, Callable, Dict, List
+from typing import TYPE_CHECKING, Any, Callable, Dict, List
 
 from redisvl.utils.utils import lazy_import
 
+if TYPE_CHECKING:
+    from ranx import Qrels, Run, evaluate
+else:
+    Qrels = lazy_import("ranx.Qrels")
+    Run = lazy_import("ranx.Run")
+    evaluate = lazy_import("ranx.evaluate")
+
 np = lazy_import("numpy")
-from ranx import Qrels, Run, evaluate
 
 from redisvl.extensions.cache.llm.semantic import SemanticCache
 from redisvl.query import RangeQuery
@@ -12,7 +18,7 @@
 from redisvl.utils.optimize.utils import NULL_RESPONSE_KEY, _format_qrels
 
 
-def _generate_run_cache(test_data: List[LabeledData], threshold: float) -> Run:
+def _generate_run_cache(test_data: List[LabeledData], threshold: float) -> "Run":
     """Format observed data for evaluation with ranx"""
     run_dict: Dict[str, Dict[str, int]] = {}
 
@@ -32,7 +38,7 @@ def _generate_run_cache(test_data: List[LabeledData], threshold: float) -> Run:
 
 
 def _eval_cache(
-    test_data: List[LabeledData], threshold: float, qrels: Qrels, metric: str
+    test_data: List[LabeledData], threshold: float, qrels: "Qrels", metric: str
 ) -> float:
     """Formats run data and evaluates supported metric"""
     run = _generate_run_cache(test_data, threshold)
diff --git a/redisvl/utils/optimize/router.py b/redisvl/utils/optimize/router.py
@@ -1,18 +1,24 @@
 import random
-from typing import Any, Callable, Dict, List
+from typing import TYPE_CHECKING, Any, Callable, Dict, List
 
 from redisvl.utils.utils import lazy_import
 
+if TYPE_CHECKING:
+    from ranx import Qrels, Run, evaluate
+else:
+    Qrels = lazy_import("ranx.Qrels")
+    Run = lazy_import("ranx.Run")
+    evaluate = lazy_import("ranx.evaluate")
+
 np = lazy_import("numpy")
-from ranx import Qrels, Run, evaluate
 
 from redisvl.extensions.router.semantic import SemanticRouter
 from redisvl.utils.optimize.base import BaseThresholdOptimizer, EvalMetric
 from redisvl.utils.optimize.schema import LabeledData
 from redisvl.utils.optimize.utils import NULL_RESPONSE_KEY, _format_qrels
 
 
-def _generate_run_router(test_data: List[LabeledData], router: SemanticRouter) -> Run:
+def _generate_run_router(test_data: List[LabeledData], router: SemanticRouter) -> "Run":
     """Format router results into format for ranx Run"""
     run_dict: Dict[Any, Any] = {}
 
@@ -28,7 +34,10 @@ def _generate_run_router(test_data: List[LabeledData], router: SemanticRouter) -
 
 
 def _eval_router(
-    router: SemanticRouter, test_data: List[LabeledData], qrels: Qrels, eval_metric: str
+    router: SemanticRouter,
+    test_data: List[LabeledData],
+    qrels: "Qrels",
+    eval_metric: str,
 ) -> float:
     """Evaluate acceptable metric given run and qrels data"""
     run = _generate_run_router(test_data, router)
@@ -58,7 +67,7 @@ def _router_random_search(
 def _random_search_opt_router(
     router: SemanticRouter,
     test_data: List[LabeledData],
-    qrels: Qrels,
+    qrels: "Qrels",
     eval_metric: EvalMetric,
     **kwargs: Any,
 ):
diff --git a/redisvl/utils/optimize/utils.py b/redisvl/utils/optimize/utils.py
@@ -1,16 +1,20 @@
-from typing import List
+from typing import TYPE_CHECKING, List
 
 from redisvl.utils.utils import lazy_import
 
+if TYPE_CHECKING:
+    from ranx import Qrels
+else:
+    Qrels = lazy_import("ranx.Qrels")
+
 np = lazy_import("numpy")
-from ranx import Qrels
 
 from redisvl.utils.optimize.schema import LabeledData
 
 NULL_RESPONSE_KEY = "no_match"
 
 
-def _format_qrels(test_data: List[LabeledData]) -> Qrels:
+def _format_qrels(test_data: List[LabeledData]) -> "Qrels":
     """Utility function for creating qrels for evaluation with ranx"""
     qrels_dict = {}
 
diff --git a/tests/unit/test_threshold_optimizer_utility.py b/tests/unit/test_threshold_optimizer_utility.py
@@ -5,7 +5,9 @@
 if sys.version_info.major == 3 and sys.version_info.minor < 10:
     pytest.skip("Test requires Python 3.10 or higher", allow_module_level=True)
 
-from ranx import evaluate
+from redisvl.utils.utils import lazy_import
+
+evaluate = lazy_import("ranx.evaluate")
 
 from redisvl.utils.optimize import LabeledData
 from redisvl.utils.optimize.cache import _generate_run_cache

Original file line number	Diff line number	Diff line change
`@@ -357,6 +357,9 @@ def sync_to_async_redis(`
`357`	`357`	`"RedisCluster is not supported for sync-to-async conversion."`
`358`	`358`	`)`
`359`	`359`
	`360`	`+ # At this point, redis_client is guaranteed to be Redis type`
	`361`	`+ assert isinstance(redis_client, Redis) # Type narrowing for MyPy`
	`362`	`+`
`360`	`363`	`# pick the right connection class`
`361`	`364`	`connection_class: Type[AsyncAbstractConnection] = (`
`362`	`365`	`AsyncSSLConnection`