Skip to content

Commit 6ded93f

Browse files
authored
Merge pull request #52 from blink1073/INTPYTHON-416-2
INTPYTHON-416 Make AI/ML testing framework runnable locally
2 parents 05e74d4 + e1a77b5 commit 6ded93f

File tree

25 files changed

+161
-94
lines changed

25 files changed

+161
-94
lines changed

.evergreen/config.yml

Lines changed: 9 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -37,40 +37,29 @@ functions:
3737
args: [.evergreen/fetch-secrets.sh]
3838

3939
"fetch repo":
40-
- command: shell.exec
40+
- command: subprocess.exec
4141
type: setup
4242
params:
43+
include_expansions_in_env: [DIR]
4344
working_dir: "src"
44-
script: |
45-
if [ ! -d "${DIR}" ]; then
46-
echo '${REPO_NAME} could not be found' 1>&2
47-
exit 1
48-
fi
49-
# Apply patches to upstream repo if desired.
50-
cd ${DIR}
51-
git clone ${CLONE_URL}
52-
if [ -d "patches" ]; then
53-
cd ${REPO_NAME}
54-
echo "Applying patches."
55-
git apply ../patches/*
56-
fi
45+
binary: bash
46+
args: [.evergreen/fetch-repo.sh]
5747

5848
"execute tests":
5949
- command: subprocess.exec
6050
type: test
6151
params:
62-
add_expansions_to_env: true
63-
working_dir: "src/${DIR}/${REPO_NAME}"
52+
include_expansions_in_env: [DIR]
53+
working_dir: "src"
6454
binary: bash
65-
args:
66-
- ../run.sh
55+
args: [.evergreen/execute-tests.sh]
6756

6857
"setup local atlas":
6958
- command: subprocess.exec
7059
type: setup
7160
retry_on_failure: true
7261
params:
73-
add_expansions_to_env: true
62+
include_expansions_in_env: [DIR]
7463
working_dir: "src"
7564
binary: bash
7665
args:
@@ -80,7 +69,7 @@ functions:
8069
- command: subprocess.exec
8170
type: setup
8271
params:
83-
add_expansions_to_env: true
72+
include_expansions_in_env: [DIR]
8473
working_dir: "src"
8574
binary: bash
8675
args: [.evergreen/setup-remote.sh]
@@ -194,10 +183,6 @@ buildvariants:
194183
display_name: LlamaIndex RHEL KV Store
195184
expansions:
196185
DIR: llama-index-python-kvstore
197-
REPO_NAME: llama_index
198-
# TODO - Update CLONE_URL: [PYTHON-4522] [INTPYTHON-326]
199-
CLONE_URL: -b PYTHON-4522 --single-branch https://github.com/shruti-sridhar/llama_index.git
200-
DATABASE: llama_index_test_db
201186
run_on:
202187
- rhel87-small
203188
tasks:
@@ -209,9 +194,6 @@ buildvariants:
209194
display_name: Semantic-Kernel RHEL Python
210195
expansions:
211196
DIR: semantic-kernel-python
212-
REPO_NAME: semantic-kernel
213-
CLONE_URL: https://github.com/microsoft/semantic-kernel.git
214-
DATABASE: pyMSKTest
215197
run_on:
216198
- rhel87-small
217199
tasks:
@@ -224,9 +206,6 @@ buildvariants:
224206
display_name: Semantic-Kernel RHEL CSharp
225207
expansions:
226208
DIR: semantic-kernel-csharp
227-
REPO_NAME: semantic-kernel
228-
CLONE_URL: https://github.com/microsoft/semantic-kernel.git
229-
DATABASE: dotnetMSKNearestTest
230209
run_on:
231210
- rhel87-small
232211
tasks:
@@ -238,9 +217,6 @@ buildvariants:
238217
display_name: Langchain RHEL Python
239218
expansions:
240219
DIR: langchain-python
241-
REPO_NAME: langchain-mongodb
242-
CLONE_URL: https://github.com/langchain-ai/langchain-mongodb.git
243-
DATABASE: langchain_test_db
244220
run_on:
245221
- rhel87-small
246222
tasks:
@@ -252,9 +228,6 @@ buildvariants:
252228
display_name: Langgraph RHEL Python
253229
expansions:
254230
DIR: langgraph-python
255-
REPO_NAME: langchain-mongodb
256-
CLONE_URL: https://github.com/langchain-ai/langchain-mongodb.git
257-
DATABASE: langgraph-test
258231
run_on:
259232
- rhel87-small
260233
tasks:
@@ -266,9 +239,6 @@ buildvariants:
266239
display_name: ChatGPT Retrieval Plugin
267240
expansions:
268241
DIR: chatgpt-retrieval-plugin
269-
REPO_NAME: chatgpt-retrieval-plugin
270-
CLONE_URL: https://github.com/openai/chatgpt-retrieval-plugin.git
271-
DATABASE: chatgpt_retrieval_plugin_test_db
272242
run_on:
273243
- rhel87-small
274244
tasks:
@@ -280,9 +250,6 @@ buildvariants:
280250
display_name: LlamaIndex RHEL Vector Store
281251
expansions:
282252
DIR: llama-index-python-vectorstore
283-
REPO_NAME: llama_index
284-
CLONE_URL: https://github.com/run-llama/llama_index.git
285-
DATABASE: llama_index_test_db
286253
run_on:
287254
- rhel87-small
288255
tasks:
@@ -295,9 +262,6 @@ buildvariants:
295262
display_name: DocArray RHEL
296263
expansions:
297264
DIR: docarray
298-
REPO_NAME: docarray
299-
CLONE_URL: https://github.com/docarray/docarray.git
300-
DATABASE: docarray_test_db
301265
run_on:
302266
- rhel87-small
303267
tasks:

.evergreen/execute-tests.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
3+
set -eu
4+
5+
SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})")
6+
ROOT_DIR=$(dirname $SCRIPT_DIR)
7+
8+
9+
# Source the configuration.
10+
cd ${ROOT_DIR}/${DIR}
11+
set -a
12+
source config.env
13+
set +a
14+
15+
cd ${REPO_NAME}
16+
bash ${ROOT_DIR}/${DIR}/run.sh

.evergreen/fetch-repo.sh

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
3+
set -eu
4+
5+
if [ ! -d "${DIR}" ]; then
6+
echo '${REPO_NAME} could not be found' 1>&2
7+
exit 1
8+
fi
9+
10+
cd ${DIR}
11+
12+
# Source the configuration.
13+
set -a
14+
source config.env
15+
set +a
16+
17+
rm -rf ${REPO_NAME}
18+
git clone ${CLONE_URL}
19+
20+
# Apply patches to upstream repo if desired.
21+
if [ -d "patches" ]; then
22+
cd ${REPO_NAME}
23+
echo "Applying patches."
24+
git apply ../patches/*
25+
fi

.evergreen/fetch-secrets.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
set -eu
44

55
# Clone drivers-evergeen-tools.
6-
git clone https://github.com/mongodb-labs/drivers-evergreen-tools
6+
git clone https://github.com/mongodb-labs/drivers-evergreen-tools || true
77

88
# Get the secrets for drivers/ai-ml-pipeline-testing.
99
. drivers-evergreen-tools/.evergreen/secrets_handling/setup-secrets.sh drivers/ai-ml-pipeline-testing

.evergreen/provision-atlas.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@ set -eu
33

44
. .evergreen/utils.sh
55

6+
# Source the config
7+
pushd $DIR
8+
set -a
9+
. config.env
10+
set +x
11+
popd
12+
613
setup_local_atlas
714
scaffold_atlas
815

.evergreen/setup-remote.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@ if [ -z "${DIR:-}" ]; then
88
exit 1
99
fi
1010

11+
# Source the config
12+
pushd $DIR
13+
set -a
14+
. config.env
15+
set +x
16+
popd
17+
1118
# Get the correct remote URI.
1219
case $DIR in
1320
llama-index-python-kvstore)

.evergreen/utils.sh

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ setup_local_atlas() {
6868
IMAGE=artifactory.corp.mongodb.com/dockerhub/mongodb/mongodb-atlas-local:latest
6969
retry podman pull $IMAGE
7070

71-
CONTAINER_ID=$(podman run --rm -d -e DO_NOT_TRACK=1 -P --health-cmd "/usr/local/bin/runner healthcheck" mongodb/mongodb-atlas-local:latest)
71+
CONTAINER_ID=$(podman run --rm -d -e DO_NOT_TRACK=1 -P --health-cmd "/usr/local/bin/runner healthcheck" $IMAGE)
7272

7373
echo "waiting for container to become healthy..."
7474
function wait() {
@@ -104,13 +104,13 @@ setup_local_atlas() {
104104
wait "$CONTAINER_ID"
105105
EXPOSED_PORT=$(podman inspect --format='{{ (index (index .NetworkSettings.Ports "27017/tcp") 0).HostPort }}' "$CONTAINER_ID")
106106
export CONN_STRING="mongodb://127.0.0.1:$EXPOSED_PORT/?directConnection=true"
107-
# shellcheck disable=SC2154
108-
echo "CONN_STRING=mongodb://127.0.0.1:$EXPOSED_PORT/?directConnection=true" > $workdir/src/.evergreen/.local_atlas_uri
107+
SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})")
108+
echo "CONN_STRING=mongodb://127.0.0.1:$EXPOSED_PORT/?directConnection=true" > $SCRIPT_DIR/.local_atlas_uri
109109
}
110110

111111
fetch_local_atlas_uri() {
112-
# shellcheck disable=SC2154
113-
. $workdir/src/.evergreen/.local_atlas_uri
112+
SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})")
113+
. $SCRIPT_DIR/.local_atlas_uri
114114

115115
export CONN_STRING=$CONN_STRING
116116
echo "$CONN_STRING"
@@ -120,8 +120,7 @@ fetch_local_atlas_uri() {
120120
scaffold_atlas() {
121121
PYTHON_BINARY=$(find_python3)
122122

123-
# Should be called from src
124-
EVERGREEN_PATH=$(pwd)/.evergreen
123+
EVERGREEN_PATH=$(realpath "$(dirname ${BASH_SOURCE[0]})")
125124
TARGET_DIR=$(pwd)/$DIR
126125
SCAFFOLD_SCRIPT=$EVERGREEN_PATH/scaffold_atlas.py
127126

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ xunit-results/
5050
# Miscellaneous
5151
.DS_Store
5252
drivers-evergreen-tools
53+
atlas
54+
.evergreen/.local_atlas_uri
5355

5456
# Secrets
5557
secrets-export.sh

README.md

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ Each subdirectory is scoped to run only one AI/ML integration's suite of tests f
2222
Within each subdirectory you should expect to have:
2323

2424
- `run.sh` -- A script that should handle any additional library installations and steps for executing the test suite. This script should not populate the Atlas database with any required test data.
25+
- `config.env` - A file that defines the following environment variables:
26+
- `REPO_NAME` -- The name of the AI/ML framework repository that will get cloned
27+
- `CLONE_URL` -- The Github URL to clone into the specified `DIR`
28+
- `DATABASE` -- The optional database where the Atlas CLI will load your index configs
2529
- `database/` -- An optional directory used by `.evergreen/scaffold_atlas.py` to populate a MongoDB database with test data. Only provide this if your tests require pre-populated data.
2630
- `database/{collection}.json` -- An optional JSON file containing one or more MongoDB documents that will be uploaded to `$DATABASE.{collection}` in the local Atlas instance. Only provide this if your tests require pre-populated data.
2731
- `indexConfig.json` -- An optional file containing configuration for a specified Atlas Search Index.
@@ -40,12 +44,15 @@ The general layout of this repo looks like this:
4044
│ │ └── furthestSearch.json # Populates $DATABASE.furthestSearch
4145
│ ├── indexes # Optional Index definitions directory
4246
│ │ └── indexConfig.json # Optional Search index definition
47+
| ├── config.env # Configuration file
4348
│ └── run.sh # Script that executes test
49+
|
4450
├── semantic-kernel-python # Folder scoped for one Integration
4551
│ ├── database # Optional database definition
4652
│ │ └── nearestSearch.json # Populates $DATABASE.nearestSearch
4753
│ │ └── furthestSearch.json # Populates $DATABASE.furthestSearch
4854
│ ├── indexConfig.json # Creates Search Index on $DATABASE
55+
| ├── config.env # Configuration file
4956
│ └── run.sh # Script that executes test
5057
```
5158

@@ -54,13 +61,28 @@ The general layout of this repo looks like this:
5461
Each test subdirectory will automatically have its own local Atlas deployment. As a result, database and collection names will not conflict between different AI/ML integrations. To connect to your local Atlas using a connection string, `utils.sh` has a `fetch_local_atlas_uri` that you can call from the `run.sh` script within your subdirectory. For example:
5562

5663
```bash
57-
. $workdir/src/.evergreen/utils.sh
64+
. .evergreen/utils.sh
5865

5966
CONN_STRING=$(fetch_local_atlas_uri)
6067
```
6168

6269
Stores the local Atlas URI within the `CONN_STRING` var. The script can then pass `CONN_STRING` as an environment variable to the test suite.
6370

71+
#### Running tests locally.
72+
73+
We can run the tests with a local checkout of the repo.
74+
75+
For example, to run the `docarray` tests using local atlas:
76+
77+
```bash
78+
export DIR=docarray
79+
bash .evergreen/fetch-repo.sh
80+
bash .evergreen/provision-atlas.sh
81+
bash .evergreen/execute-tests.sh
82+
```
83+
84+
Use `.evergreen/setup-remote.sh` instead of `.evergreen/provision-atlas.sh` to test against the remote cluster.
85+
6486
#### Pre-populating the Local Atlas Deployment
6587

6688
You can pre-populate a test's local Atlas deployment before running the `run.sh` script by providing JSON files in the optional `database` directory of the created subdirectory. The `.evergreen/scaffold_atlas.py` file will search for every JSON file within this database directory and upload the documents to the database provided by the `DATABASE` expansion provided in the build variant of the `.evergreen/config.yml` setup. The collection the script uploads to is based on the name of your JSON file:
@@ -82,9 +104,6 @@ Test execution flow is defined in `.evergreen/config.yml`. The test pipeline's c
82104
- [`expansions`](https://docs.devprod.prod.corp.mongodb.com/evergreen/Project-Configuration/Project-Configuration-Files/#expansions) -- Build variant specific variables. Expansions that need to be maintained as secrets should be stored in [the Evergreen project settings](https://spruce.mongodb.com/project/ai-ml-pipeline-testing/settings/variables) using [variables](https://docs.devprod.prod.corp.mongodb.com/evergreen/Project-Configuration/Project-and-Distro-Settings#variables). Some common expansions needed are:
83105

84106
- `DIR` -- The subdirectory where the tasks will run
85-
- `REPO_NAME` -- The name of the AI/ML framework repository that will get cloned
86-
- `CLONE_URL` -- The Github URL to clone into the specified `DIR`
87-
- `DATABASE` -- The optional database where the Atlas CLI will load your index configs
88107

89108
- `run_on` -- Specified platform to run on. `rhel87-small` should be used by default. Any other distro may fail Atlas CLI setup.
90109
- `tasks` -- Tasks to run. See below for more details

chatgpt-retrieval-plugin/config.env

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
REPO_NAME=chatgpt-retrieval-plugin
2+
CLONE_URL="https://github.com/openai/chatgpt-retrieval-plugin.git"
3+
DATABASE=chatgpt_retrieval_plugin_test_db

chatgpt-retrieval-plugin/run.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
#!/bin/sh
1+
#!/bin/bash
22

33
# chat-gpt-retrieval-plugin is a poetry run project
44

55
set -eu
66

77
# Get the MONGODB_URI and OPENAI_API_KEY.
8-
# shellcheck disable=SC2154
9-
. $workdir/src/env.sh
8+
SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})")
9+
ROOT_DIR=$(dirname $SCRIPT_DIR)
10+
. $ROOT_DIR/env.sh
1011

11-
# shellcheck disable=SC2154
12-
. $workdir/src/.evergreen/utils.sh
12+
. $ROOT_DIR/.evergreen/utils.sh
1313

1414
PYTHON_BINARY=$(find_python3)
1515
$PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')"

docarray/config.env

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
REPO_NAME=docarray
2+
CLONE_URL="https://github.com/docarray/docarray.git"
3+
DATABASE=docarray_test_db

docarray/run.sh

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1-
#!/bin/sh
1+
#!/bin/bash
22

33
# Sets up a virtual environment (poetry)
44
# Runs the mongodb tests of the upstream repo
55

66
set -eu
77

88
# Get the MONGODB_URI.
9-
# shellcheck disable=SC2154
10-
. $workdir/src/env.sh
9+
SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})")
10+
ROOT_DIR=$(dirname $SCRIPT_DIR)
11+
12+
. $ROOT_DIR/env.sh
13+
14+
. $ROOT_DIR/.evergreen/utils.sh
1115

12-
# shellcheck disable=SC2154
13-
. $workdir/src/.evergreen/utils.sh
1416
PYTHON_BINARY=$(find_python3)
1517
$PYTHON_BINARY -c "import sys; print(f'Python version found: {sys.version_info}')"
1618

langchain-python/config.env

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
REPO_NAME=langchain-mongodb
2+
CLONE_URL="https://github.com/langchain-ai/langchain-mongodb.git"
3+
DATABASE=langchain_test_db

0 commit comments

Comments
 (0)