Skip to content

PYTHON-3464 Add FaaS platform to handshake metadata #1204

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 5, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 105 additions & 1 deletion pymongo/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@
import threading
import time
import weakref
from typing import Any, NoReturn, Optional
from typing import Any, Dict, NoReturn, Optional

import bson
from bson import DEFAULT_CODEC_OPTIONS
from bson.son import SON
from pymongo import __version__, _csot, auth, helpers
Expand Down Expand Up @@ -231,6 +232,103 @@ def _set_keepalive_times(sock):
)


def _is_lambda() -> bool:
return bool(os.getenv("AWS_EXECUTION_ENV") or os.getenv("AWS_LAMBDA_RUNTIME_API"))


def _is_azure_func() -> bool:
return bool(os.getenv("FUNCTIONS_WORKER_RUNTIME"))


def _is_gcp_func() -> bool:
return bool(os.getenv("K_SERVICE") or os.getenv("FUNCTION_NAME"))


def _is_vercel() -> bool:
return bool(os.getenv("VERCEL"))


def _getenv_int(key: str) -> Optional[int]:
"""Like os.getenv but returns an int, or None if the value is missing/malformed."""
val = os.getenv(key)
if not val:
return None
try:
return int(val)
except ValueError:
return None


def _metadata_env() -> Dict[str, Any]:
env: Dict[str, Any] = {}
# Skip if multiple (or no) envs are matched.
if (_is_lambda(), _is_azure_func(), _is_gcp_func(), _is_vercel()).count(True) != 1:
return env
if _is_lambda():
env["name"] = "aws.lambda"
region = os.getenv("AWS_REGION")
if region:
env["region"] = region
memory_mb = _getenv_int("AWS_LAMBDA_FUNCTION_MEMORY_SIZE")
if memory_mb is not None:
env["memory_mb"] = memory_mb
elif _is_azure_func():
env["name"] = "azure.func"
elif _is_gcp_func():
env["name"] = "gcp.func"
region = os.getenv("FUNCTION_REGION")
if region:
env["region"] = region
memory_mb = _getenv_int("FUNCTION_MEMORY_MB")
if memory_mb is not None:
env["memory_mb"] = memory_mb
timeout_sec = _getenv_int("FUNCTION_TIMEOUT_SEC")
if timeout_sec is not None:
env["timeout_sec"] = timeout_sec
elif _is_vercel():
env["name"] = "vercel"
region = os.getenv("VERCEL_REGION")
if region:
env["region"] = region
return env


_MAX_METADATA_SIZE = 512


# See: https://github.com/mongodb/specifications/blob/5112bcc/source/mongodb-handshake/handshake.rst#limitations
def _truncate_metadata(metadata):
"""Perform metadata truncation."""
if len(bson.encode(metadata)) <= _MAX_METADATA_SIZE:
return
# 1. Omit fields from env except env.name.
env_name = metadata.get("env", {}).get("name")
if env_name:
metadata["env"] = {"name": env_name}
if len(bson.encode(metadata)) <= _MAX_METADATA_SIZE:
return
# 2. Omit fields from os except os.type.
os_type = metadata.get("os", {}).get("type")
if os_type:
metadata["os"] = {"type": os_type}
if len(bson.encode(metadata)) <= _MAX_METADATA_SIZE:
return
# 3. Omit the env document entirely.
metadata.pop("env", None)
encoded_size = len(bson.encode(metadata))
if encoded_size <= _MAX_METADATA_SIZE:
return
# 4. Truncate platform.
overflow = encoded_size - _MAX_METADATA_SIZE
plat = metadata.get("platform", "")
if plat:
plat = plat[:-overflow]
if plat:
metadata["platform"] = plat
else:
metadata.pop("platform", None)


# If the first getaddrinfo call of this interpreter's life is on a thread,
# while the main thread holds the import lock, getaddrinfo deadlocks trying
# to import the IDNA codec. Import it here, where presumably we're on the
Expand Down Expand Up @@ -364,6 +462,12 @@ def __init__(
if driver.platform:
self.__metadata["platform"] = "%s|%s" % (_METADATA["platform"], driver.platform)

env = _metadata_env()
if env:
self.__metadata["env"] = env

_truncate_metadata(self.__metadata)

@property
def _credentials(self):
"""A :class:`~pymongo.auth.MongoCredentials` instance or None."""
Expand Down
61 changes: 60 additions & 1 deletion test/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import threading
import time
from typing import Iterable, Type, no_type_check
from unittest.mock import patch

sys.path[0:0] = [""]

Expand Down Expand Up @@ -113,7 +114,6 @@ class ClientUnitTest(unittest.TestCase):
client: MongoClient

@classmethod
@client_context.require_connection
def setUpClass(cls):
cls.client = rs_or_single_client(connect=False, serverSelectionTimeoutMS=100)

Expand Down Expand Up @@ -1867,6 +1867,65 @@ def test_exhaust_getmore_network_error(self):
self.assertNotIn(sock_info, pool.sockets)
self.assertEqual(0, pool.requests)

def _test_handshake(self, env_vars, expected_env):
mock_env = os.environ.copy()
mock_env.update(env_vars)
with patch.dict("os.environ", mock_env):
metadata = copy.deepcopy(_METADATA)
if expected_env is not None:
metadata["env"] = expected_env
with rs_or_single_client(serverSelectionTimeoutMS=10000) as client:
client.admin.command("ping")
options = client._MongoClient__options
self.assertEqual(options.pool_options.metadata, metadata)

def test_handshake_01_aws(self):
self._test_handshake(
{
"AWS_EXECUTION_ENV": "AWS_Lambda_python3.9",
"AWS_REGION": "us-east-2",
"AWS_LAMBDA_FUNCTION_MEMORY_SIZE": "1024",
},
{"name": "aws.lambda", "region": "us-east-2", "memory_mb": 1024},
)

def test_handshake_02_azure(self):
self._test_handshake({"FUNCTIONS_WORKER_RUNTIME": "python"}, {"name": "azure.func"})

def test_handshake_03_gcp(self):
self._test_handshake(
{
"K_SERVICE": "servicename",
"FUNCTION_MEMORY_MB": "1024",
"FUNCTION_TIMEOUT_SEC": "60",
"FUNCTION_REGION": "us-central1",
},
{"name": "gcp.func", "region": "us-central1", "memory_mb": 1024, "timeout_sec": 60},
)

def test_handshake_04_vercel(self):
self._test_handshake(
{"VERCEL": "1", "VERCEL_REGION": "cdg1"}, {"name": "vercel", "region": "cdg1"}
)

def test_handshake_05_multiple(self):
self._test_handshake(
{"AWS_EXECUTION_ENV": "AWS_Lambda_python3.9", "FUNCTIONS_WORKER_RUNTIME": "python"},
None,
)

def test_handshake_06_region_too_long(self):
self._test_handshake(
{"AWS_EXECUTION_ENV": "AWS_Lambda_python3.9", "AWS_REGION": "a" * 512},
{"name": "aws.lambda"},
)

def test_handshake_07_memory_invalid_int(self):
self._test_handshake(
{"AWS_EXECUTION_ENV": "AWS_Lambda_python3.9", "AWS_LAMBDA_FUNCTION_MEMORY_SIZE": "big"},
{"name": "aws.lambda"},
)


class TestClientLazyConnect(IntegrationTest):
"""Test concurrent operations on a lazily-connecting MongoClient."""
Expand Down