Skip to content

Commit 9e36c51

Browse files
authored
fix(grouping): Only run grouping calculation once (#78630)
During grouping, the slowest part of the process is calculating the variants. Before Seer and before grouphash metadata, we only needed them once, but now we use them in two places in the Seer flow and are about to use them in another place for grouphash metadata. To avoid calculating them now potentially up to four times, this PR refactors things so that they're passed from the place where they're initially calculated (in `event.get_hashes`) through the various intermediate functions to the spots in the Seer flow where they're currently used. Along this path is the place where we'll need them from grouphash metadata also. Notes: - In order to not have to change unrelated uses of `get_hashes`, instead of changing its return value I instead extracted most of its inner logic into a separate `get_hashes_and_variants` method. Now `get_hashes` calls `get_hashes_and_variants` (and just ignores the variants) and in the spot in ingest where we used to call `get_hashes`, we now call `get_hashes_and_variants`. - We have a pair of helpers, `run_primary_grouping` and `maybe_run_secondary_grouping`, for calculating primary and secondary hashes, respectively, which need to have matching signatures, because they're both passed to `get_hashes_and_grouphashes` as the `hash_calculation_function` parameter. We don't ever need (or want) variants from the secondary hash calculation, so in place of real variants data `maybe_run_secondary_grouping` just passes an empty dictionary.
1 parent 7ef3587 commit 9e36c51

File tree

9 files changed

+93
-72
lines changed

9 files changed

+93
-72
lines changed

src/sentry/event_manager.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
check_for_group_creation_load_shed,
6969
is_non_error_type_group,
7070
)
71+
from sentry.grouping.variants import BaseVariant
7172
from sentry.ingest.inbound_filters import FilterStatKeys
7273
from sentry.integrations.tasks.kick_off_status_syncs import kick_off_status_syncs
7374
from sentry.issues.grouptype import ErrorGroupType
@@ -1318,7 +1319,9 @@ def assign_event_to_group(
13181319
result = "found_secondary"
13191320
# If we still haven't found a group, ask Seer for a match (if enabled for the project)
13201321
else:
1321-
seer_matched_grouphash = maybe_check_seer_for_matching_grouphash(event, all_grouphashes)
1322+
seer_matched_grouphash = maybe_check_seer_for_matching_grouphash(
1323+
event, primary.variants, all_grouphashes
1324+
)
13221325

13231326
if seer_matched_grouphash:
13241327
group_info = handle_existing_grouphash(job, seer_matched_grouphash, all_grouphashes)
@@ -1358,7 +1361,7 @@ def get_hashes_and_grouphashes(
13581361
job: Job,
13591362
hash_calculation_function: Callable[
13601363
[Project, Job, MutableTags],
1361-
tuple[GroupingConfig, list[str]],
1364+
tuple[GroupingConfig, list[str], dict[str, BaseVariant]],
13621365
],
13631366
metric_tags: MutableTags,
13641367
) -> GroupHashInfo:
@@ -1373,14 +1376,14 @@ def get_hashes_and_grouphashes(
13731376
project = job["event"].project
13741377

13751378
# These will come back as Nones if the calculation decides it doesn't need to run
1376-
grouping_config, hashes = hash_calculation_function(project, job, metric_tags)
1379+
grouping_config, hashes, variants = hash_calculation_function(project, job, metric_tags)
13771380

13781381
if hashes:
13791382
grouphashes = get_or_create_grouphashes(project, hashes, grouping_config["id"])
13801383

13811384
existing_grouphash = find_grouphash_with_group(grouphashes)
13821385

1383-
return GroupHashInfo(grouping_config, hashes, grouphashes, existing_grouphash)
1386+
return GroupHashInfo(grouping_config, variants, hashes, grouphashes, existing_grouphash)
13841387
else:
13851388
return NULL_GROUPHASH_INFO
13861389

src/sentry/eventstore/models.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,29 @@ def get_grouping_config(self) -> GroupingConfig:
332332

333333
return get_grouping_config_dict_for_event_data(self.data, self.project)
334334

335+
def get_hashes_and_variants(
336+
self, config: StrategyConfiguration | None = None
337+
) -> tuple[list[str], dict[str, BaseVariant]]:
338+
"""
339+
Return the event's hash values, calculated using the given config, along with the
340+
`variants` data used in grouping.
341+
"""
342+
343+
variants = self.get_grouping_variants(config)
344+
# Sort the variants so that the system variant (if any) is always last, in order to resolve
345+
# ambiguities when choosing primary_hash for Snuba
346+
sorted_variants = sorted(
347+
variants.items(),
348+
key=lambda name_and_variant: 1 if name_and_variant[0] == "system" else 0,
349+
)
350+
# Get each variant's hash value, filtering out Nones
351+
hashes = list({variant.get_hash() for _, variant in sorted_variants} - {None})
352+
353+
# Write to event before returning
354+
self.data["hashes"] = hashes
355+
356+
return (hashes, variants)
357+
335358
def get_hashes(self, force_config: StrategyConfiguration | None = None) -> list[str]:
336359
"""
337360
Returns the calculated hashes for the event. This uses the stored
@@ -353,20 +376,7 @@ def get_hashes(self, force_config: StrategyConfiguration | None = None) -> list[
353376
return hashes
354377

355378
# Create fresh hashes
356-
357-
variants = self.get_grouping_variants(force_config)
358-
# Sort the variants so that the system variant (if any) is always last, in order to resolve
359-
# ambiguities when choosing primary_hash for Snuba
360-
sorted_variants = sorted(
361-
variants.items(),
362-
key=lambda name_and_variant: 1 if name_and_variant[0] == "system" else 0,
363-
)
364-
# Get each variant's hash value, filtering out Nones
365-
hashes = list({variant.get_hash() for _, variant in sorted_variants} - {None})
366-
367-
# Write to event before returning
368-
self.data["hashes"] = hashes
369-
return hashes
379+
return self.get_hashes_and_variants(force_config)[0]
370380

371381
def normalize_stacktraces_for_grouping(self, grouping_config: StrategyConfiguration) -> None:
372382
"""Normalize stacktraces and clear memoized interfaces

src/sentry/grouping/api.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,14 @@
4242
@dataclass
4343
class GroupHashInfo:
4444
config: GroupingConfig
45+
variants: dict[str, BaseVariant]
4546
hashes: list[str]
4647
grouphashes: list[GroupHash]
4748
existing_grouphash: GroupHash | None
4849

4950

5051
NULL_GROUPING_CONFIG: GroupingConfig = {"id": "", "enhancements": ""}
51-
NULL_GROUPHASH_INFO = GroupHashInfo(NULL_GROUPING_CONFIG, [], [], None)
52+
NULL_GROUPHASH_INFO = GroupHashInfo(NULL_GROUPING_CONFIG, {}, [], [], None)
5253

5354

5455
class GroupingConfigNotFound(LookupError):

src/sentry/grouping/ingest/hashing.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
load_grouping_config,
2222
)
2323
from sentry.grouping.ingest.config import is_in_transition
24+
from sentry.grouping.variants import BaseVariant
2425
from sentry.models.grouphash import GroupHash
2526
from sentry.models.grouphashmetadata import GroupHashMetadata
2627
from sentry.models.project import Project
@@ -37,10 +38,10 @@
3738

3839
def _calculate_event_grouping(
3940
project: Project, event: Event, grouping_config: GroupingConfig
40-
) -> list[str]:
41+
) -> tuple[list[str], dict[str, BaseVariant]]:
4142
"""
42-
Calculate hashes for the event using the given grouping config, and add them into the event
43-
data.
43+
Calculate hashes for the event using the given grouping config, add them to the event data, and
44+
return them, along with the variants data upon which they're based.
4445
"""
4546
metric_tags: MutableTags = {
4647
"grouping_config": grouping_config["id"],
@@ -59,7 +60,7 @@ def _calculate_event_grouping(
5960
# The active grouping config was put into the event in the
6061
# normalize step before. We now also make sure that the
6162
# fingerprint was set to `'{{ default }}' just in case someone
62-
# removed it from the payload. The call to get_hashes will then
63+
# removed it from the payload. The call to `get_hashes_and_variants` will then
6364
# look at `grouping_config` to pick the right parameters.
6465
event.data["fingerprint"] = event.data.data.get("fingerprint") or ["{{ default }}"]
6566
apply_server_fingerprinting(
@@ -69,9 +70,9 @@ def _calculate_event_grouping(
6970
)
7071

7172
with metrics.timer("event_manager.event.get_hashes", tags=metric_tags):
72-
hashes = event.get_hashes(loaded_grouping_config)
73+
hashes, variants = event.get_hashes_and_variants(loaded_grouping_config)
7374

74-
return hashes
75+
return (hashes, variants)
7576

7677

7778
def maybe_run_background_grouping(project: Project, job: Job) -> None:
@@ -100,12 +101,12 @@ def _calculate_background_grouping(
100101
"sdk": normalized_sdk_tag_from_event(event.data),
101102
}
102103
with metrics.timer("event_manager.background_grouping", tags=metric_tags):
103-
return _calculate_event_grouping(project, event, config)
104+
return _calculate_event_grouping(project, event, config)[0]
104105

105106

106107
def maybe_run_secondary_grouping(
107108
project: Project, job: Job, metric_tags: MutableTags
108-
) -> tuple[GroupingConfig, list[str]]:
109+
) -> tuple[GroupingConfig, list[str], dict[str, BaseVariant]]:
109110
"""
110111
If the projct is in a grouping config transition phase, calculate a set of secondary hashes for
111112
the job's event.
@@ -119,18 +120,20 @@ def maybe_run_secondary_grouping(
119120
secondary_grouping_config = SecondaryGroupingConfigLoader().get_config_dict(project)
120121
secondary_hashes = _calculate_secondary_hashes(project, job, secondary_grouping_config)
121122

122-
return (secondary_grouping_config, secondary_hashes)
123+
# Return an empty variants dictionary because we need the signature of this function to match
124+
# that of `run_primary_grouping` (so we have to return something), but we don't ever actually
125+
# need the variant information
126+
return (secondary_grouping_config, secondary_hashes, {})
123127

124128

125129
def _calculate_secondary_hashes(
126130
project: Project, job: Job, secondary_grouping_config: GroupingConfig
127131
) -> list[str]:
128-
"""Calculate secondary hash for event using a fallback grouping config for a period of time.
129-
This happens when we upgrade all projects that have not opted-out to automatic upgrades plus
130-
when the customer changes the grouping config.
131-
This causes extra load in save_event processing.
132132
"""
133-
secondary_hashes = []
133+
Calculate hashes based on an older grouping config, so that unknown hashes calculated by the
134+
current config can be matched to an existing group if there is one.
135+
"""
136+
secondary_hashes: list[str] = []
134137
try:
135138
with sentry_sdk.start_span(
136139
op="event_manager",
@@ -139,7 +142,7 @@ def _calculate_secondary_hashes(
139142
# create a copy since `_calculate_event_grouping` modifies the event to add all sorts
140143
# of grouping info and we don't want the secondary grouping data in there
141144
event_copy = copy.deepcopy(job["event"])
142-
secondary_hashes = _calculate_event_grouping(
145+
secondary_hashes, _ = _calculate_event_grouping(
143146
project, event_copy, secondary_grouping_config
144147
)
145148
except Exception as err:
@@ -150,9 +153,9 @@ def _calculate_secondary_hashes(
150153

151154
def run_primary_grouping(
152155
project: Project, job: Job, metric_tags: MutableTags
153-
) -> tuple[GroupingConfig, list[str]]:
156+
) -> tuple[GroupingConfig, list[str], dict[str, BaseVariant]]:
154157
"""
155-
Get the primary grouping config and primary hashes for the event.
158+
Get the primary grouping config, primary hashes, and variants for the event.
156159
"""
157160
with metrics.timer("event_manager.load_grouping_config"):
158161
grouping_config = get_grouping_config_dict_for_project(project)
@@ -165,16 +168,16 @@ def run_primary_grouping(
165168
),
166169
metrics.timer("event_manager.calculate_event_grouping", tags=metric_tags),
167170
):
168-
hashes = _calculate_primary_hashes(project, job, grouping_config)
171+
hashes, variants = _calculate_primary_hashes_and_variants(project, job, grouping_config)
169172

170-
return (grouping_config, hashes)
173+
return (grouping_config, hashes, variants)
171174

172175

173-
def _calculate_primary_hashes(
176+
def _calculate_primary_hashes_and_variants(
174177
project: Project, job: Job, grouping_config: GroupingConfig
175-
) -> list[str]:
178+
) -> tuple[list[str], dict[str, BaseVariant]]:
176179
"""
177-
Get the primary hash for the event.
180+
Get the primary hash and variants for the event.
178181
179182
This is pulled out into a separate function mostly in order to make testing easier.
180183
"""

src/sentry/grouping/ingest/seer.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
1111
from sentry.eventstore.models import Event
1212
from sentry.grouping.grouping_info import get_grouping_info_from_variants
13+
from sentry.grouping.variants import BaseVariant
1314
from sentry.models.grouphash import GroupHash
1415
from sentry.models.project import Project
1516
from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer
@@ -27,7 +28,7 @@
2728
logger = logging.getLogger("sentry.events.grouping")
2829

2930

30-
def should_call_seer_for_grouping(event: Event) -> bool:
31+
def should_call_seer_for_grouping(event: Event, variants: dict[str, BaseVariant]) -> bool:
3132
"""
3233
Use event content, feature flags, rate limits, killswitches, seer health, etc. to determine
3334
whether a call to Seer should be made.
@@ -42,7 +43,7 @@ def should_call_seer_for_grouping(event: Event) -> bool:
4243
return False
4344

4445
if (
45-
_has_customized_fingerprint(event)
46+
_has_customized_fingerprint(event, variants)
4647
or killswitch_enabled(project.id, event)
4748
or _circuit_breaker_broken(event, project)
4849
# **Do not add any new checks after this.** The rate limit check MUST remain the last of all
@@ -79,7 +80,7 @@ def _project_has_similarity_grouping_enabled(project: Project) -> bool:
7980
# combined with some other value). To the extent to which we're then using this function to decide
8081
# whether or not to call Seer, this means that the calculations giving rise to the default part of
8182
# the value never involve Seer input. In the long run, we probably want to change that.
82-
def _has_customized_fingerprint(event: Event) -> bool:
83+
def _has_customized_fingerprint(event: Event, variants: dict[str, BaseVariant]) -> bool:
8384
fingerprint = event.data.get("fingerprint", [])
8485

8586
if "{{ default }}" in fingerprint:
@@ -97,7 +98,6 @@ def _has_customized_fingerprint(event: Event) -> bool:
9798
return True
9899

99100
# Fully customized fingerprint (from either us or the user)
100-
variants = event.get_grouping_variants()
101101
fingerprint_variant = variants.get("custom-fingerprint") or variants.get("built-in-fingerprint")
102102

103103
if fingerprint_variant:
@@ -178,6 +178,7 @@ def _circuit_breaker_broken(event: Event, project: Project) -> bool:
178178

179179
def get_seer_similar_issues(
180180
event: Event,
181+
variants: dict[str, BaseVariant],
181182
num_neighbors: int = 1,
182183
) -> tuple[dict[str, Any], GroupHash | None]:
183184
"""
@@ -186,9 +187,7 @@ def get_seer_similar_issues(
186187
should go in (if any), or None if no neighbor was near enough.
187188
"""
188189
event_hash = event.get_primary_hash()
189-
stacktrace_string = get_stacktrace_string(
190-
get_grouping_info_from_variants(event.get_grouping_variants())
191-
)
190+
stacktrace_string = get_stacktrace_string(get_grouping_info_from_variants(variants))
192191
exception_type = get_path(event.data, "exception", "values", -1, "type")
193192

194193
request_data: SimilarIssuesEmbeddingsRequest = {
@@ -232,11 +231,11 @@ def get_seer_similar_issues(
232231

233232

234233
def maybe_check_seer_for_matching_grouphash(
235-
event: Event, all_grouphashes: list[GroupHash]
234+
event: Event, variants: dict[str, BaseVariant], all_grouphashes: list[GroupHash]
236235
) -> GroupHash | None:
237236
seer_matched_grouphash = None
238237

239-
if should_call_seer_for_grouping(event):
238+
if should_call_seer_for_grouping(event, variants):
240239
metrics.incr(
241240
"grouping.similarity.did_call_seer",
242241
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
@@ -246,7 +245,7 @@ def maybe_check_seer_for_matching_grouphash(
246245
try:
247246
# If no matching group is found in Seer, we'll still get back result
248247
# metadata, but `seer_matched_grouphash` will be None
249-
seer_response_data, seer_matched_grouphash = get_seer_similar_issues(event)
248+
seer_response_data, seer_matched_grouphash = get_seer_similar_issues(event, variants)
250249
except Exception as e: # Insurance - in theory we shouldn't ever land here
251250
sentry_sdk.capture_exception(
252251
e, tags={"event": event.event_id, "project": event.project.id}

tests/sentry/event_manager/grouping/test_assign_to_group.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from sentry.event_manager import _create_group
1111
from sentry.eventstore.models import Event
1212
from sentry.grouping.ingest.hashing import (
13-
_calculate_primary_hashes,
13+
_calculate_primary_hashes_and_variants,
1414
_calculate_secondary_hashes,
1515
find_grouphash_with_group,
1616
)
@@ -29,7 +29,9 @@
2929
@contextmanager
3030
def patch_grouping_helpers(return_values: dict[str, Any]):
3131
wrapped_find_grouphash_with_group = capture_results(find_grouphash_with_group, return_values)
32-
wrapped_calculate_primary_hashes = capture_results(_calculate_primary_hashes, return_values)
32+
wrapped_calculate_primary_hashes = capture_results(
33+
_calculate_primary_hashes_and_variants, return_values
34+
)
3335
wrapped_calculate_secondary_hashes = capture_results(_calculate_secondary_hashes, return_values)
3436

3537
with (
@@ -38,7 +40,7 @@ def patch_grouping_helpers(return_values: dict[str, Any]):
3840
wraps=wrapped_find_grouphash_with_group,
3941
) as find_grouphash_with_group_spy,
4042
mock.patch(
41-
"sentry.grouping.ingest.hashing._calculate_primary_hashes",
43+
"sentry.grouping.ingest.hashing._calculate_primary_hashes_and_variants",
4244
wraps=wrapped_calculate_primary_hashes,
4345
) as calculate_primary_hashes_spy,
4446
mock.patch(
@@ -59,7 +61,7 @@ def patch_grouping_helpers(return_values: dict[str, Any]):
5961
):
6062
yield {
6163
"find_grouphash_with_group": find_grouphash_with_group_spy,
62-
"_calculate_primary_hashes": calculate_primary_hashes_spy,
64+
"_calculate_primary_hashes_and_variants": calculate_primary_hashes_spy,
6365
"_calculate_secondary_hashes": calculate_secondary_hashes_spy,
6466
"_create_group": create_group_spy,
6567
"record_calculation_metrics": record_calculation_metrics_spy,
@@ -145,7 +147,7 @@ def get_results_from_saving_event(
145147
with patch_grouping_helpers(return_values) as spies:
146148
calculate_secondary_hash_spy = spies["_calculate_secondary_hashes"]
147149
create_group_spy = spies["_create_group"]
148-
calculate_primary_hash_spy = spies["_calculate_primary_hashes"]
150+
calculate_primary_hash_spy = spies["_calculate_primary_hashes_and_variants"]
149151
record_calculation_metrics_spy = spies["record_calculation_metrics"]
150152

151153
set_grouping_configs(
@@ -173,7 +175,7 @@ def get_results_from_saving_event(
173175
primary_hash_calculated = calculate_primary_hash_spy.call_count == 1
174176
secondary_hash_calculated = calculate_secondary_hash_spy.call_count == 1
175177

176-
primary_hash = return_values["_calculate_primary_hashes"][0][0]
178+
primary_hash = return_values["_calculate_primary_hashes_and_variants"][0][0][0]
177179
primary_hash_found = bool(hash_search_result) and hash_search_result.hash == primary_hash
178180

179181
new_group_created = create_group_spy.call_count == 1

tests/sentry/event_manager/grouping/test_group_creation_lock.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def test_group_creation_race(monkeypatch, default_project, lock_disabled):
6767
with (
6868
patch(
6969
"sentry.grouping.ingest.hashing._calculate_event_grouping",
70-
return_value=["pound sign", "octothorpe"],
70+
return_value=(["pound sign", "octothorpe"], {}),
7171
),
7272
patch(
7373
"sentry.event_manager._get_group_processing_kwargs",

0 commit comments

Comments
 (0)