Skip to content

Commit e3f4723

Browse files
author
NisanthanNanthakumar
authored
feat(eventuser): Migrate IssuesByTagProcessor away from EventUser (#59672)
## Objective: Create the `for_tags` method in the EventUser dataclass. Migrates `get_eventuser_callback` to use the dataclass.
1 parent 7a9cb56 commit e3f4723

File tree

7 files changed

+374
-136
lines changed

7 files changed

+374
-136
lines changed

src/sentry/analytics/events/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from .cron_monitor_created import * # noqa: F401,F403
1313
from .eventuser_endpoint_request import * # noqa: F401,F403
1414
from .eventuser_equality_check import * # noqa: F401,F403
15+
from .eventuser_snuba_query import * # noqa: F401,F403
1516
from .first_cron_checkin_sent import * # noqa: F401,F403
1617
from .first_event_sent import * # noqa: F401,F403
1718
from .first_feedback_sent import * # noqa: F401,F403
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from sentry import analytics
2+
3+
4+
class EventUserSnubaQuery(analytics.Event):
5+
type = "eventuser_snuba.query"
6+
7+
attributes = (
8+
analytics.Attribute("project_ids", type=list),
9+
analytics.Attribute("query"),
10+
analytics.Attribute("count_rows_returned", required=True, type=int),
11+
analytics.Attribute("count_rows_filtered", required=True, type=int),
12+
analytics.Attribute("query_time_ms", type=int),
13+
)
14+
15+
16+
analytics.register(EventUserSnubaQuery)

src/sentry/data_export/processors/issues_by_tag.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from __future__ import annotations
22

33
from sentry import tagstore
4-
from sentry.models.eventuser import EventUser
4+
from sentry.models.eventuser import EventUser as EventUser_model
55
from sentry.models.group import Group, get_group_with_redirect
66
from sentry.models.project import Project
7+
from sentry.utils.eventuser import EventUser
78

89
from ..base import ExportError
910

@@ -97,7 +98,13 @@ def serialize_row(item, key):
9798
}
9899
if key == "user":
99100
euser = item._eventuser
100-
result["id"] = euser.ident if euser else ""
101+
result["id"] = (
102+
euser.user_ident
103+
if euser and isinstance(euser, EventUser)
104+
else euser.ident
105+
if euser and isinstance(euser, EventUser_model)
106+
else ""
107+
)
101108
result["email"] = euser.email if euser else ""
102109
result["username"] = euser.username if euser else ""
103110
result["ip_address"] = euser.ip_address if euser else ""

src/sentry/search/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def get_user_tag(projects: Sequence[Project], key: str, value: str) -> str:
5151
# TODO(dcramer): do something with case of multiple matches
5252
try:
5353
if features.has("organizations:eventuser-from-snuba", projects[0].organization):
54-
euser = EventUser.for_projects(projects, {key: value})[0]
54+
euser = EventUser.for_projects(projects, {key: [value]})[0]
5555
else:
5656
lookup = EventUser_model.attr_from_keyword(key)
5757
euser = EventUser_model.objects.filter(

src/sentry/utils/eventuser.py

Lines changed: 128 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from __future__ import annotations
22

33
import logging
4+
import time
45
from dataclasses import dataclass
5-
from datetime import datetime, timedelta
6-
from typing import Any, List, Mapping, Optional, Tuple
6+
from datetime import datetime
7+
from typing import Any, Dict, List, Mapping, Optional
78

89
from snuba_sdk import (
910
BooleanCondition,
@@ -13,14 +14,15 @@
1314
Direction,
1415
Entity,
1516
Function,
16-
Limit,
1717
Op,
1818
OrderBy,
1919
Query,
2020
Request,
2121
)
2222

23+
from sentry import analytics, features
2324
from sentry.eventstore.models import Event
25+
from sentry.models.eventuser import EventUser as EventUser_model
2426
from sentry.models.project import Project
2527
from sentry.snuba.dataset import Dataset, EntityKey
2628
from sentry.utils.avatar import get_gravatar_url
@@ -35,7 +37,7 @@
3537
{
3638
("user_id"): "id",
3739
("user_name"): "username",
38-
("email"): "email",
40+
("user_email"): "email",
3941
("ip_address_v4", "ip_address_v6"): "ip",
4042
}
4143
)
@@ -82,12 +84,18 @@ def get_display_name(self):
8284

8385
@classmethod
8486
def for_projects(
85-
self, projects: List[Project], keyword_filters: Mapping[str, Any]
87+
self,
88+
projects: List[Project],
89+
keyword_filters: Mapping[str, List[Any]],
90+
filter_boolean=BooleanOp.AND,
91+
return_all=False,
8692
) -> List[EventUser]:
8793
"""
8894
Fetch the EventUser with a Snuba query that exists within a list of projects
8995
and valid `keyword_filters`. The `keyword_filter` keys are in `KEYWORD_MAP`.
9096
"""
97+
start_time = time.time()
98+
9199
oldest_project = min(projects, key=lambda item: item.date_added)
92100

93101
where_conditions = [
@@ -96,59 +104,108 @@ def for_projects(
96104
Condition(Column("timestamp"), Op.GTE, oldest_project.date_added),
97105
]
98106

107+
keyword_where_conditions = []
99108
for keyword, value in keyword_filters.items():
109+
if not isinstance(value, list):
110+
raise ValueError(f"{keyword} filter must be a list of values")
111+
100112
snuba_column = SNUBA_KEYWORD_MAP.get_key(keyword)
101113
if isinstance(snuba_column, tuple):
102-
where_conditions.append(
103-
BooleanCondition(
104-
BooleanOp.OR,
105-
[
106-
Condition(
107-
Column(column),
108-
Op.EQ,
109-
value
110-
if SNUBA_COLUMN_COALASCE.get(column, None) is None
111-
else Function(
112-
SNUBA_COLUMN_COALASCE.get(column), parameters=[value]
113-
),
114-
)
115-
for column in snuba_column
116-
],
114+
for filter_value in value:
115+
keyword_where_conditions.append(
116+
BooleanCondition(
117+
BooleanOp.OR,
118+
[
119+
Condition(
120+
Column(column),
121+
Op.IN,
122+
value
123+
if SNUBA_COLUMN_COALASCE.get(column, None) is None
124+
else Function(
125+
SNUBA_COLUMN_COALASCE.get(column), parameters=[filter_value]
126+
),
127+
)
128+
for column in snuba_column
129+
],
130+
)
117131
)
132+
else:
133+
keyword_where_conditions.append(Condition(Column(snuba_column), Op.IN, value))
134+
135+
if len(keyword_where_conditions) > 1:
136+
where_conditions.append(
137+
BooleanCondition(
138+
filter_boolean,
139+
keyword_where_conditions,
118140
)
141+
)
119142

120-
else:
121-
where_conditions.append(Condition(Column(snuba_column), Op.EQ, value))
143+
if len(keyword_where_conditions) == 1:
144+
where_conditions.extend(
145+
keyword_where_conditions,
146+
)
147+
148+
columns = [
149+
Column("project_id"),
150+
Column("ip_address_v6"),
151+
Column("ip_address_v4"),
152+
Column("user_id"),
153+
Column("user_name"),
154+
Column("user_email"),
155+
]
122156

123157
query = Query(
124158
match=Entity(EntityKey.Events.value),
125159
select=[
126-
Column("project_id"),
127-
Column("group_id"),
128-
Column("ip_address_v6"),
129-
Column("ip_address_v4"),
130-
Column("event_id"),
131-
Column("user_id"),
132-
Column("user"),
133-
Column("user_name"),
134-
Column("user_email"),
160+
*columns,
161+
Function("max", [Column("timestamp")], "latest_timestamp"),
135162
],
136163
where=where_conditions,
137-
limit=Limit(1),
138-
orderby=[OrderBy(Column("timestamp"), Direction.DESC)],
164+
groupby=[*columns],
165+
orderby=[OrderBy(Column("latest_timestamp"), Direction.DESC)],
139166
)
140167

168+
if not return_all:
169+
query.set_limit(1)
170+
141171
request = Request(
142172
dataset=Dataset.Events.value,
143173
app_id=REFERRER,
144174
query=query,
145175
tenant_ids={"referrer": REFERRER, "organization_id": projects[0].organization.id},
146176
)
147177
data_results = raw_snql_query(request, referrer=REFERRER)["data"]
148-
results = [EventUser.from_snuba(result) for result in data_results]
178+
179+
results = self._find_unique(data_results)
180+
end_time = time.time()
181+
analytics.record(
182+
"eventuser_snuba.query",
183+
project_ids=[p.id for p in projects],
184+
query=query.print(),
185+
count_rows_returned=len(data_results),
186+
count_rows_filtered=len(data_results) - len(results),
187+
query_time_ms=int((end_time - start_time) * 1000),
188+
)
149189

150190
return results
151191

192+
@staticmethod
193+
def _find_unique(data_results: List[dict[str, Any]]):
194+
"""
195+
Return the first instance of an EventUser object
196+
with a unique tag_value from the Snuba results.
197+
"""
198+
unique_tag_values = set()
199+
unique_event_users = []
200+
201+
for euser in [EventUser.from_snuba(item) for item in data_results]:
202+
tag_value = euser.tag_value
203+
if tag_value not in unique_tag_values:
204+
unique_event_users.append(euser)
205+
unique_tag_values.add(tag_value)
206+
207+
return unique_event_users
208+
152209
@staticmethod
153210
def from_snuba(result: Mapping[str, Any]) -> EventUser:
154211
"""
@@ -159,11 +216,47 @@ def from_snuba(result: Mapping[str, Any]) -> EventUser:
159216
project_id=result.get("project_id"),
160217
email=result.get("user_email"),
161218
username=result.get("user_name"),
162-
name=result.get("user_name"),
219+
name=None,
163220
ip_address=result.get("ip_address_v4") or result.get("ip_address_v6"),
164221
user_ident=result.get("user_id"),
165222
)
166223

224+
@classmethod
225+
def for_tags(cls, project_id: int, values):
226+
"""
227+
Finds matching EventUser objects from a list of tag values.
228+
229+
Return a dictionary of {tag_value: event_user}.
230+
"""
231+
projects = Project.objects.filter(id=project_id)
232+
233+
if not features.has("organizations:eventuser-from-snuba", projects[0].organization):
234+
return EventUser_model.for_tags(project_id, values)
235+
236+
result = {}
237+
keyword_filters: Dict[str, Any] = {}
238+
for value in values:
239+
key, value = value.split(":", 1)[0], value.split(":", 1)[-1]
240+
if keyword_filters.get(key):
241+
keyword_filters[key].append(value)
242+
else:
243+
keyword_filters[key] = [value]
244+
245+
eventusers = EventUser.for_projects(
246+
projects, keyword_filters, filter_boolean=BooleanOp.OR, return_all=True
247+
)
248+
249+
for keyword, values in keyword_filters.items():
250+
column = KEYWORD_MAP.get_key(keyword)
251+
for value in values:
252+
matching_euser = next(
253+
(euser for euser in eventusers if getattr(euser, column, None) == value), None
254+
)
255+
if matching_euser:
256+
result[f"{keyword}:{value}"] = matching_euser
257+
258+
return result
259+
167260
@property
168261
def tag_value(self):
169262
"""
@@ -189,75 +282,3 @@ def serialize(self):
189282
"ipAddress": self.ip_address,
190283
"avatarUrl": get_gravatar_url(self.email, size=32),
191284
}
192-
193-
194-
def find_eventuser_with_snuba(event: Event):
195-
"""
196-
Query Snuba to get the EventUser information for an Event.
197-
"""
198-
start_date, end_date = _start_and_end_dates(event.datetime)
199-
200-
query = _generate_entity_dataset_query(
201-
event.project_id, event.group_id, event.event_id, start_date, end_date
202-
)
203-
request = Request(
204-
dataset=Dataset.Events.value,
205-
app_id=REFERRER,
206-
query=query,
207-
tenant_ids={"referrer": REFERRER, "organization_id": event.project.organization.id},
208-
)
209-
data_results = raw_snql_query(request, referrer=REFERRER)["data"]
210-
211-
if len(data_results) == 0:
212-
logger.info(
213-
"Errors dataset query to find EventUser did not return any results.",
214-
extra={
215-
"event_id": event.event_id,
216-
"project_id": event.project_id,
217-
"group_id": event.group_id,
218-
},
219-
)
220-
return {}
221-
222-
return data_results[0]
223-
224-
225-
def _generate_entity_dataset_query(
226-
project_id: Optional[int],
227-
group_id: Optional[int],
228-
event_id: str,
229-
start_date: datetime,
230-
end_date: datetime,
231-
) -> Query:
232-
"""This simply generates a query based on the passed parameters"""
233-
where_conditions = [
234-
Condition(Column("event_id"), Op.EQ, event_id),
235-
Condition(Column("timestamp"), Op.GTE, start_date),
236-
Condition(Column("timestamp"), Op.LT, end_date),
237-
]
238-
if project_id:
239-
where_conditions.append(Condition(Column("project_id"), Op.EQ, project_id))
240-
241-
if group_id:
242-
where_conditions.append(Condition(Column("group_id"), Op.EQ, group_id))
243-
244-
return Query(
245-
match=Entity(EntityKey.Events.value),
246-
select=[
247-
Column("project_id"),
248-
Column("group_id"),
249-
Column("ip_address_v6"),
250-
Column("ip_address_v4"),
251-
Column("event_id"),
252-
Column("user_id"),
253-
Column("user"),
254-
Column("user_name"),
255-
Column("user_email"),
256-
],
257-
where=where_conditions,
258-
)
259-
260-
261-
def _start_and_end_dates(time: datetime) -> Tuple[datetime, datetime]:
262-
"""Return the 10 min range start and end time range ."""
263-
return time - timedelta(minutes=5), time + timedelta(minutes=5)

0 commit comments

Comments
 (0)