Skip to content

Commit 51c9ec0

Browse files
authored
Implement AwsSpanMetricsProcessor and MetricsAttributeGenerator (aws-observability#8)
In this commit, we are implementing AwsSpanMetricsProcessor and AwsSpanMetricsProcessorBuilder. We needed to also implement the MetricsAttributeGenerator interface, since it is a dependency of AwsSpanMetricsProcessor, and we needed to implement a stub for AwsMetricsAttributeGenerator, since it is a dependency of AwsSpanMetricsProcessorBuilder. As much as possible, we are attempting to mirror the implementation of these clases found in https://github.com/aws-observability/aws-otel-java-instrumentation By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
2 parents dc54290 + 29b75ad commit 51c9ec0

File tree

4 files changed

+242
-0
lines changed

4 files changed

+242
-0
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
from metric_attribute_generator import MetricAttributeGenerator
4+
5+
from opentelemetry.sdk.resources import Resource
6+
from opentelemetry.sdk.trace import BoundedAttributes, ReadableSpan
7+
8+
9+
class AwsMetricAttributeGenerator(MetricAttributeGenerator):
10+
"""AwsMetricAttributeGenerator generates specific metric attributes for incoming and outgoing traffic.
11+
12+
AwsMetricAttributeGenerator generates very specific metric attributes based on low-cardinality span and resource
13+
attributes. If such attributes are not present, we fallback to default values.
14+
15+
The goal of these particular metric attributes is to get metrics for incoming and outgoing traffic for a service.
16+
Namely, SpanKind#SERVER and SpanKind#CONSUMER spans represent "incoming" traffic, SpanKind#CLIENT and
17+
SpanKind#PRODUCER spans represent "outgoing" traffic, and SpanKind#INTERNAL spans are ignored.
18+
"""
19+
20+
@staticmethod
21+
def generate_metric_attributes_dict_from_span(span: ReadableSpan, resource: Resource) -> [str, BoundedAttributes]:
22+
"""This method is used by the AwsSpanMetricsProcessor to generate service and dependency metrics"""
23+
# TODO
24+
return {}
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
from typing import Optional
4+
5+
from metric_attribute_generator import MetricAttributeGenerator
6+
from typing_extensions import override
7+
8+
from opentelemetry.context import Context
9+
from opentelemetry.metrics import Histogram
10+
from opentelemetry.sdk.resources import Resource
11+
from opentelemetry.sdk.trace import BoundedAttributes, ReadableSpan, Span, SpanProcessor, StatusCode
12+
from opentelemetry.semconv.trace import SpanAttributes
13+
14+
_HTTP_STATUS_CODE = SpanAttributes.HTTP_STATUS_CODE
15+
_NANOS_TO_MILLIS: float = 1_000_000.0
16+
17+
# Constants for deriving error and fault metrics
18+
_ERROR_CODE_LOWER_BOUND: int = 400
19+
_ERROR_CODE_UPPER_BOUND: int = 499
20+
_FAULT_CODE_LOWER_BOUND: int = 500
21+
_FAULT_CODE_UPPER_BOUND: int = 599
22+
23+
24+
class AwsSpanMetricsProcessor(SpanProcessor):
25+
"""AwsSpanMetricsProcessor is SpanProcessor that generates metrics from spans
26+
27+
This processor will generate metrics based on span data. It depends on a MetricAttributeGenerator being provided on
28+
instantiation, which will provide a means to determine attributes which should be used to create metrics. A Resource
29+
must also be provided, which is used to generate metrics. Finally, three Histogram must be provided, which will be
30+
used to actually create desired metrics (see below)
31+
32+
AwsSpanMetricsProcessor produces metrics for errors (e.g. HTTP 4XX status codes), faults (e.g. HTTP 5XX status
33+
codes), and latency (in Milliseconds). Errors and faults are counted, while latency is measured with a histogram.
34+
Metrics are emitted with attributes derived from span attributes.
35+
36+
For highest fidelity metrics, this processor should be coupled with the AlwaysRecordSampler, which will result in
37+
100% of spans being sent to the processor.
38+
"""
39+
40+
# Metric instruments
41+
_error_histogram: Histogram
42+
_fault_histogram: Histogram
43+
_latency_histogram: Histogram
44+
45+
_generator: MetricAttributeGenerator
46+
_resource: Resource
47+
48+
def __init__(
49+
self,
50+
error_histogram: Histogram,
51+
fault_histogram: Histogram,
52+
latency_histogram: Histogram,
53+
generator: MetricAttributeGenerator,
54+
resource: Resource,
55+
):
56+
self._error_histogram = error_histogram
57+
self._fault_histogram = fault_histogram
58+
self._latency_histogram = latency_histogram
59+
self._generator = generator
60+
self._resource = resource
61+
62+
# pylint: disable=no-self-use
63+
@override
64+
def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
65+
return
66+
67+
@override
68+
def on_end(self, span: ReadableSpan) -> None:
69+
attribute_dict: dict[str, BoundedAttributes] = self._generator.generate_metric_attributes_dict_from_span(
70+
span, self._resource
71+
)
72+
map(lambda attributes: self._record_metrics(span, attributes), attribute_dict.values())
73+
74+
@override
75+
def shutdown(self) -> None:
76+
self.force_flush()
77+
78+
# pylint: disable=no-self-use
79+
@override
80+
def force_flush(self, timeout_millis: int = None) -> bool:
81+
return True
82+
83+
def _record_metrics(self, span: ReadableSpan, attributes: BoundedAttributes) -> None:
84+
# Only record metrics if non-empty attributes are returned.
85+
if len(attributes) > 0:
86+
self._record_error_or_fault(span, attributes)
87+
self._record_latency(span, attributes)
88+
89+
def _record_error_or_fault(self, span: ReadableSpan, attributes: BoundedAttributes) -> None:
90+
# The logic to record error and fault should be kept in sync with the aws-xray exporter whenever possible except
91+
# for the throttle.
92+
# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/exporter/awsxrayexporter/internal/translator/cause.go#L121-L160
93+
http_status_code: int = span.attributes.get(_HTTP_STATUS_CODE)
94+
status_code: StatusCode = span.status.status_code
95+
96+
if http_status_code is None:
97+
http_status_code = attributes.get(_HTTP_STATUS_CODE)
98+
99+
if _is_not_error_or_fault(http_status_code):
100+
if StatusCode.ERROR == status_code:
101+
self._error_histogram.record(0, attributes)
102+
self._fault_histogram.record(1, attributes)
103+
else:
104+
self._error_histogram.record(0, attributes)
105+
self._fault_histogram.record(0, attributes)
106+
elif _ERROR_CODE_LOWER_BOUND <= http_status_code <= _ERROR_CODE_UPPER_BOUND:
107+
self._error_histogram.record(1, attributes)
108+
self._fault_histogram.record(0, attributes)
109+
elif _FAULT_CODE_LOWER_BOUND <= http_status_code <= _FAULT_CODE_UPPER_BOUND:
110+
self._error_histogram.record(0, attributes)
111+
self._fault_histogram.record(1, attributes)
112+
113+
def _record_latency(self, span: ReadableSpan, attributes: BoundedAttributes) -> None:
114+
nanos: int = span.end_time - span.start_time
115+
millis: float = nanos / _NANOS_TO_MILLIS
116+
self._latency_histogram.record(millis, attributes)
117+
118+
119+
def _is_not_error_or_fault(http_status_code: int) -> bool:
120+
return (
121+
http_status_code is None
122+
or http_status_code < _ERROR_CODE_LOWER_BOUND
123+
or http_status_code > _FAULT_CODE_UPPER_BOUND
124+
)
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
from aws_metric_attribute_generator import AwsMetricAttributeGenerator
4+
from aws_span_metrics_processor import AwsSpanMetricsProcessor
5+
from metric_attribute_generator import MetricAttributeGenerator
6+
7+
from opentelemetry.sdk.metrics import Histogram, Meter, MeterProvider
8+
from opentelemetry.sdk.resources import Resource
9+
10+
# Metric instrument configuration constants
11+
_ERROR: str = "Error"
12+
_FAULT: str = "Fault"
13+
_LATENCY: str = "Latency"
14+
_LATENCY_UNITS: str = "Milliseconds"
15+
16+
# Defaults
17+
_DEFAULT_GENERATOR: MetricAttributeGenerator = AwsMetricAttributeGenerator()
18+
_DEFAULT_SCOPE_NAME: str = "AwsSpanMetricsProcessor"
19+
20+
21+
class AwsSpanMetricsProcessorBuilder:
22+
"""A builder for AwsSpanMetricsProcessor"""
23+
24+
# Required builder elements
25+
_meter_provider: MeterProvider
26+
_resource: Resource
27+
28+
# Optional builder elements
29+
_generator: MetricAttributeGenerator = _DEFAULT_GENERATOR
30+
_scope_name: str = _DEFAULT_SCOPE_NAME
31+
32+
def __init__(self, meter_provider: MeterProvider, resource: Resource):
33+
self.meter_provider = meter_provider
34+
self.resource = resource
35+
36+
def set_generator(self, generator: MetricAttributeGenerator) -> "AwsSpanMetricsProcessorBuilder":
37+
"""
38+
Sets the generator used to generate attributes used in metrics produced by span metrics processor. If unset,
39+
defaults to _DEFAULT_GENERATOR. Must not be None.
40+
"""
41+
if generator is None:
42+
raise ValueError("generator must not be None")
43+
self._generator = generator
44+
return self
45+
46+
def set_scope_name(self, scope_name: str) -> "AwsSpanMetricsProcessorBuilder":
47+
"""
48+
Sets the scope name used in the creation of metrics by the span metrics processor. If unset, defaults to
49+
_DEFAULT_SCOPE_NAME. Must not be None.
50+
"""
51+
if scope_name is None:
52+
raise ValueError("scope_name must not be None")
53+
self._scope_name = scope_name
54+
return self
55+
56+
def build(self) -> AwsSpanMetricsProcessor:
57+
meter: Meter = self._meter_provider.get_meter(self._scope_name)
58+
error_histogram: Histogram = meter.create_histogram(_ERROR)
59+
fault_histogram: Histogram = meter.create_histogram(_FAULT)
60+
latency_histogram: Histogram = meter.create_histogram(_LATENCY, unit=_LATENCY_UNITS)
61+
62+
return AwsSpanMetricsProcessor(
63+
error_histogram, fault_histogram, latency_histogram, self._generator, self._resource
64+
)
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
from opentelemetry.sdk.resources import Resource
4+
from opentelemetry.sdk.trace import BoundedAttributes, ReadableSpan
5+
6+
7+
class MetricAttributeGenerator:
8+
"""MetricAttributeGenerator is an interface for generating metric attributes from a span.
9+
10+
Metric attribute generator defines an interface for classes that can generate specific attributes to be used by an
11+
AwsSpanMetricsProcessor to produce metrics and by AwsMetricAttributesSpanExporter to wrap the original span.
12+
"""
13+
14+
SERVICE_METRIC: str = "Service"
15+
DEPENDENCY_METRIC: str = "Dependency"
16+
17+
@staticmethod
18+
def generate_metric_attributes_dict_from_span(span: ReadableSpan, resource: Resource) -> [str, BoundedAttributes]:
19+
"""Generate metric attributes from a span.
20+
21+
Given a span and associated resource, produce meaningful metric attributes for metrics produced from the span.
22+
If no metrics should be generated from this span, return empty attributes.
23+
24+
Args:
25+
span - ReadableSpan to be used to generate metric attributes.
26+
resource - Resource associated with Span to be used to generate metric attributes.
27+
Returns:
28+
A dictionary of Attributes objects with values assigned to key "Service" or "Dependency". It will contain
29+
either 0, 1, or 2 items.
30+
"""

0 commit comments

Comments
 (0)