Skip to content

breaking: Use sha256 to hash StepFunctions trace id and manually set _dd.p.tid #490

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions datadog_lambda/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,14 +354,18 @@ def extract_context_from_kinesis_event(event, lambda_context):
return extract_context_from_lambda_context(lambda_context)


def _deterministic_md5_hash(s: str) -> int:
def _deterministic_md5_hash(s: str, bits: int) -> (int, int):
"""MD5 here is to generate trace_id, not for any encryption."""
hex_number = hashlib.md5(s.encode("ascii")).hexdigest()
binary = bin(int(hex_number, 16))
binary_str = str(binary)
binary_str_remove_0b = binary_str[2:].rjust(128, "0")
most_significant_64_bits_without_leading_1 = "0" + binary_str_remove_0b[1:-64]
result = int(most_significant_64_bits_without_leading_1, 2)
sha256_hash = hashlib.sha256(s.encode()).hexdigest()

# First two chars is '0b'. zfill to ensure 256 bits
binary_hash = bin(int(sha256_hash, 16))[2:].zfill(256)
if bits == 64:
updated_binary_hash = '0' + binary_hash[1: 64]
else: # bits == 128
# set 1st and 65th bit to '0'
updated_binary_hash = '0' + binary_hash[1: 64] + '0' + binary_hash[65: 128]
result = int(updated_binary_hash, 2)
if result == 0:
return 1
return result
Expand All @@ -376,9 +380,13 @@ def extract_context_from_step_functions(event, lambda_context):
execution_id = event.get("Execution").get("Id")
state_name = event.get("State").get("Name")
state_entered_time = event.get("State").get("EnteredTime")
trace_id = _deterministic_md5_hash(execution_id)
# returning 128 bits since 128bit traceId will be break up into
# traditional traceId and _dd.p.tid tag
# https://github.com/DataDog/dd-trace-py/blob/3e34d21cb9b5e1916e549047158cb119317b96ab/ddtrace/propagation/http.py#L232-L240
trace_id = _deterministic_md5_hash(execution_id, 128)

parent_id = _deterministic_md5_hash(
f"{execution_id}#{state_name}#{state_entered_time}"
f"{execution_id}#{state_name}#{state_entered_time}", 64
)
sampling_priority = SamplingPriority.AUTO_KEEP
return Context(
Expand Down
21 changes: 14 additions & 7 deletions tests/test_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1992,19 +1992,26 @@ def test_mark_trace_as_error_for_5xx_responses_sends_error_metric_and_set_error_

class TestStepFunctionsTraceContext(unittest.TestCase):
def test_deterministic_m5_hash(self):
result = _deterministic_md5_hash("some_testing_random_string")
self.assertEqual(2251275791555400689, result)
result = _deterministic_md5_hash("some_testing_random_string", 128)
self.assertEqual(80506605202309154694697844088692857990, result)

def test_deterministic_m5_hash__result_the_same_as_backend(self):
def test_deterministic_m5_hash__result_the_same_as_backend_1(self):
result = _deterministic_md5_hash(
"arn:aws:states:sa-east-1:601427271234:express:DatadogStateMachine:acaf1a67-336a-e854-1599-2a627eb2dd8a"
":c8baf081-31f1-464d-971f-70cb17d01111#step-one#2022-12-08T21:08:19.224Z"
"arn:aws:states:sa-east-1:425362996713:stateMachine:MyStateMachine-b276uka1j"
"#lambda#1", 64
)
self.assertEqual(8034507082463708833, result)
self.assertEqual(3711631873188331089, result)

def test_deterministic_m5_hash__result_the_same_as_backend_2(self):
result = _deterministic_md5_hash(
"arn:aws:states:sa-east-1:425362996713:stateMachine:MyStateMachine-b276uka1j"
"#lambda#2", 64
)
self.assertEqual(5759173372325510050, result)

def test_deterministic_m5_hash__always_leading_with_zero(self):
for i in range(100):
result = _deterministic_md5_hash(str(i))
result = _deterministic_md5_hash(str(i), 64)
result_in_binary = bin(int(result))
# Leading zeros will be omitted, so only test for full 64 bits present
if len(result_in_binary) == 66: # "0b" + 64 bits.
Expand Down
Loading