Skip to content

Commit 4c71260

Browse files
authored
ref(backup): Do deepcopy before validation (#53346)
Previously, all validation mutated the inputs in-place. This was performant (no need to make deep copies of two potentially large JSON blobs), but potentially bug prone if not held carefully, since the validation itself mutated the inputs via JSONScrubbingComparators. This means that the post-comparison input objects MUST NOT be re-used, since they are meaningfully different from the inputs that were declared valid. Instead, we now do deepcopies under the hood, leaving the originally passed-in inputs untouched. Issue: getsentry/team-ospo#156
1 parent e190c22 commit 4c71260

File tree

2 files changed

+15
-8
lines changed

2 files changed

+15
-8
lines changed

src/sentry/runner/commands/backup.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from abc import ABC, abstractmethod
4+
from copy import deepcopy
45
from datetime import datetime, timedelta, timezone
56
from difflib import unified_diff
67
from io import StringIO
@@ -159,10 +160,13 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> Comparator
159160

160161

161162
def validate(
162-
expect: JSONData, actual: JSONData, comparators: ComparatorMap = DEFAULT_COMPARATORS
163+
expect: JSONData,
164+
actual: JSONData,
165+
comparators: ComparatorMap = DEFAULT_COMPARATORS,
163166
) -> ComparatorFindings:
164167
"""Ensures that originally imported data correctly matches actual outputted data, and produces a
165-
list of reasons why not when it doesn't"""
168+
list of reasons why not when it doesn't.
169+
"""
166170

167171
def json_lines(obj: JSONData) -> list[str]:
168172
"""Take a JSONData object and pretty-print it as JSON."""
@@ -176,6 +180,11 @@ def json_lines(obj: JSONData) -> list[str]:
176180
id = InstanceID(model["model"], model["pk"])
177181
exp_models[id] = model
178182

183+
# Because we may be scrubbing data from the objects as we compare them, we may (optionally) make
184+
# deep copies to start to avoid potentially mangling the input data.
185+
expect = deepcopy(expect)
186+
actual = deepcopy(actual)
187+
179188
# Ensure that the actual JSON contains no duplicates - we assume that the expected JSON did not.
180189
for model in actual:
181190
id = InstanceID(model["model"], model["pk"])

tests/sentry/backup/test_correctness.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020

2121

2222
def import_export_then_validate(
23-
tmp_path: Path, fixture_file_name: str, map: ComparatorMap = EMPTY_COMPARATORS_FOR_TESTING
23+
tmp_path: Path,
24+
fixture_file_name: str,
25+
map: ComparatorMap = EMPTY_COMPARATORS_FOR_TESTING,
2426
) -> None:
2527
"""Test helper that validates that data imported from a fixture `.json` file correctly matches
2628
the actual outputted export data."""
@@ -34,11 +36,7 @@ def import_export_then_validate(
3436
rv = CliRunner().invoke(import_, [str(fixture_file_path)])
3537
assert rv.exit_code == 0, rv.output
3638

37-
res = validate(
38-
expect,
39-
tmp_export_to_file(tmp_path.joinpath("tmp_test_file.json")),
40-
map,
41-
)
39+
res = validate(expect, tmp_export_to_file(tmp_path.joinpath("tmp_test_file.json")), map)
4240
if res.findings:
4341
raise ValidationError(res)
4442

0 commit comments

Comments
 (0)