1
1
from __future__ import annotations
2
2
3
3
import logging
4
+ import time
4
5
from dataclasses import dataclass
5
- from datetime import datetime , timedelta
6
- from typing import Any , List , Mapping , Optional , Tuple
6
+ from datetime import datetime
7
+ from typing import Any , Dict , List , Mapping , Optional
7
8
8
9
from snuba_sdk import (
9
10
BooleanCondition ,
13
14
Direction ,
14
15
Entity ,
15
16
Function ,
16
- Limit ,
17
17
Op ,
18
18
OrderBy ,
19
19
Query ,
20
20
Request ,
21
21
)
22
22
23
+ from sentry import analytics , features
23
24
from sentry .eventstore .models import Event
25
+ from sentry .models .eventuser import EventUser as EventUser_model
24
26
from sentry .models .project import Project
25
27
from sentry .snuba .dataset import Dataset , EntityKey
26
28
from sentry .utils .avatar import get_gravatar_url
35
37
{
36
38
("user_id" ): "id" ,
37
39
("user_name" ): "username" ,
38
- ("email " ): "email" ,
40
+ ("user_email " ): "email" ,
39
41
("ip_address_v4" , "ip_address_v6" ): "ip" ,
40
42
}
41
43
)
@@ -82,12 +84,18 @@ def get_display_name(self):
82
84
83
85
@classmethod
84
86
def for_projects (
85
- self , projects : List [Project ], keyword_filters : Mapping [str , Any ]
87
+ self ,
88
+ projects : List [Project ],
89
+ keyword_filters : Mapping [str , List [Any ]],
90
+ filter_boolean = BooleanOp .AND ,
91
+ return_all = False ,
86
92
) -> List [EventUser ]:
87
93
"""
88
94
Fetch the EventUser with a Snuba query that exists within a list of projects
89
95
and valid `keyword_filters`. The `keyword_filter` keys are in `KEYWORD_MAP`.
90
96
"""
97
+ start_time = time .time ()
98
+
91
99
oldest_project = min (projects , key = lambda item : item .date_added )
92
100
93
101
where_conditions = [
@@ -96,59 +104,108 @@ def for_projects(
96
104
Condition (Column ("timestamp" ), Op .GTE , oldest_project .date_added ),
97
105
]
98
106
107
+ keyword_where_conditions = []
99
108
for keyword , value in keyword_filters .items ():
109
+ if not isinstance (value , list ):
110
+ raise ValueError (f"{ keyword } filter must be a list of values" )
111
+
100
112
snuba_column = SNUBA_KEYWORD_MAP .get_key (keyword )
101
113
if isinstance (snuba_column , tuple ):
102
- where_conditions .append (
103
- BooleanCondition (
104
- BooleanOp .OR ,
105
- [
106
- Condition (
107
- Column (column ),
108
- Op .EQ ,
109
- value
110
- if SNUBA_COLUMN_COALASCE .get (column , None ) is None
111
- else Function (
112
- SNUBA_COLUMN_COALASCE .get (column ), parameters = [value ]
113
- ),
114
- )
115
- for column in snuba_column
116
- ],
114
+ for filter_value in value :
115
+ keyword_where_conditions .append (
116
+ BooleanCondition (
117
+ BooleanOp .OR ,
118
+ [
119
+ Condition (
120
+ Column (column ),
121
+ Op .IN ,
122
+ value
123
+ if SNUBA_COLUMN_COALASCE .get (column , None ) is None
124
+ else Function (
125
+ SNUBA_COLUMN_COALASCE .get (column ), parameters = [filter_value ]
126
+ ),
127
+ )
128
+ for column in snuba_column
129
+ ],
130
+ )
117
131
)
132
+ else :
133
+ keyword_where_conditions .append (Condition (Column (snuba_column ), Op .IN , value ))
134
+
135
+ if len (keyword_where_conditions ) > 1 :
136
+ where_conditions .append (
137
+ BooleanCondition (
138
+ filter_boolean ,
139
+ keyword_where_conditions ,
118
140
)
141
+ )
119
142
120
- else :
121
- where_conditions .append (Condition (Column (snuba_column ), Op .EQ , value ))
143
+ if len (keyword_where_conditions ) == 1 :
144
+ where_conditions .extend (
145
+ keyword_where_conditions ,
146
+ )
147
+
148
+ columns = [
149
+ Column ("project_id" ),
150
+ Column ("ip_address_v6" ),
151
+ Column ("ip_address_v4" ),
152
+ Column ("user_id" ),
153
+ Column ("user_name" ),
154
+ Column ("user_email" ),
155
+ ]
122
156
123
157
query = Query (
124
158
match = Entity (EntityKey .Events .value ),
125
159
select = [
126
- Column ("project_id" ),
127
- Column ("group_id" ),
128
- Column ("ip_address_v6" ),
129
- Column ("ip_address_v4" ),
130
- Column ("event_id" ),
131
- Column ("user_id" ),
132
- Column ("user" ),
133
- Column ("user_name" ),
134
- Column ("user_email" ),
160
+ * columns ,
161
+ Function ("max" , [Column ("timestamp" )], "latest_timestamp" ),
135
162
],
136
163
where = where_conditions ,
137
- limit = Limit ( 1 ) ,
138
- orderby = [OrderBy (Column ("timestamp " ), Direction .DESC )],
164
+ groupby = [ * columns ] ,
165
+ orderby = [OrderBy (Column ("latest_timestamp " ), Direction .DESC )],
139
166
)
140
167
168
+ if not return_all :
169
+ query .set_limit (1 )
170
+
141
171
request = Request (
142
172
dataset = Dataset .Events .value ,
143
173
app_id = REFERRER ,
144
174
query = query ,
145
175
tenant_ids = {"referrer" : REFERRER , "organization_id" : projects [0 ].organization .id },
146
176
)
147
177
data_results = raw_snql_query (request , referrer = REFERRER )["data" ]
148
- results = [EventUser .from_snuba (result ) for result in data_results ]
178
+
179
+ results = self ._find_unique (data_results )
180
+ end_time = time .time ()
181
+ analytics .record (
182
+ "eventuser_snuba.query" ,
183
+ project_ids = [p .id for p in projects ],
184
+ query = query .print (),
185
+ count_rows_returned = len (data_results ),
186
+ count_rows_filtered = len (data_results ) - len (results ),
187
+ query_time_ms = int ((end_time - start_time ) * 1000 ),
188
+ )
149
189
150
190
return results
151
191
192
+ @staticmethod
193
+ def _find_unique (data_results : List [dict [str , Any ]]):
194
+ """
195
+ Return the first instance of an EventUser object
196
+ with a unique tag_value from the Snuba results.
197
+ """
198
+ unique_tag_values = set ()
199
+ unique_event_users = []
200
+
201
+ for euser in [EventUser .from_snuba (item ) for item in data_results ]:
202
+ tag_value = euser .tag_value
203
+ if tag_value not in unique_tag_values :
204
+ unique_event_users .append (euser )
205
+ unique_tag_values .add (tag_value )
206
+
207
+ return unique_event_users
208
+
152
209
@staticmethod
153
210
def from_snuba (result : Mapping [str , Any ]) -> EventUser :
154
211
"""
@@ -159,11 +216,47 @@ def from_snuba(result: Mapping[str, Any]) -> EventUser:
159
216
project_id = result .get ("project_id" ),
160
217
email = result .get ("user_email" ),
161
218
username = result .get ("user_name" ),
162
- name = result . get ( "user_name" ) ,
219
+ name = None ,
163
220
ip_address = result .get ("ip_address_v4" ) or result .get ("ip_address_v6" ),
164
221
user_ident = result .get ("user_id" ),
165
222
)
166
223
224
+ @classmethod
225
+ def for_tags (cls , project_id : int , values ):
226
+ """
227
+ Finds matching EventUser objects from a list of tag values.
228
+
229
+ Return a dictionary of {tag_value: event_user}.
230
+ """
231
+ projects = Project .objects .filter (id = project_id )
232
+
233
+ if not features .has ("organizations:eventuser-from-snuba" , projects [0 ].organization ):
234
+ return EventUser_model .for_tags (project_id , values )
235
+
236
+ result = {}
237
+ keyword_filters : Dict [str , Any ] = {}
238
+ for value in values :
239
+ key , value = value .split (":" , 1 )[0 ], value .split (":" , 1 )[- 1 ]
240
+ if keyword_filters .get (key ):
241
+ keyword_filters [key ].append (value )
242
+ else :
243
+ keyword_filters [key ] = [value ]
244
+
245
+ eventusers = EventUser .for_projects (
246
+ projects , keyword_filters , filter_boolean = BooleanOp .OR , return_all = True
247
+ )
248
+
249
+ for keyword , values in keyword_filters .items ():
250
+ column = KEYWORD_MAP .get_key (keyword )
251
+ for value in values :
252
+ matching_euser = next (
253
+ (euser for euser in eventusers if getattr (euser , column , None ) == value ), None
254
+ )
255
+ if matching_euser :
256
+ result [f"{ keyword } :{ value } " ] = matching_euser
257
+
258
+ return result
259
+
167
260
@property
168
261
def tag_value (self ):
169
262
"""
@@ -189,75 +282,3 @@ def serialize(self):
189
282
"ipAddress" : self .ip_address ,
190
283
"avatarUrl" : get_gravatar_url (self .email , size = 32 ),
191
284
}
192
-
193
-
194
- def find_eventuser_with_snuba (event : Event ):
195
- """
196
- Query Snuba to get the EventUser information for an Event.
197
- """
198
- start_date , end_date = _start_and_end_dates (event .datetime )
199
-
200
- query = _generate_entity_dataset_query (
201
- event .project_id , event .group_id , event .event_id , start_date , end_date
202
- )
203
- request = Request (
204
- dataset = Dataset .Events .value ,
205
- app_id = REFERRER ,
206
- query = query ,
207
- tenant_ids = {"referrer" : REFERRER , "organization_id" : event .project .organization .id },
208
- )
209
- data_results = raw_snql_query (request , referrer = REFERRER )["data" ]
210
-
211
- if len (data_results ) == 0 :
212
- logger .info (
213
- "Errors dataset query to find EventUser did not return any results." ,
214
- extra = {
215
- "event_id" : event .event_id ,
216
- "project_id" : event .project_id ,
217
- "group_id" : event .group_id ,
218
- },
219
- )
220
- return {}
221
-
222
- return data_results [0 ]
223
-
224
-
225
- def _generate_entity_dataset_query (
226
- project_id : Optional [int ],
227
- group_id : Optional [int ],
228
- event_id : str ,
229
- start_date : datetime ,
230
- end_date : datetime ,
231
- ) -> Query :
232
- """This simply generates a query based on the passed parameters"""
233
- where_conditions = [
234
- Condition (Column ("event_id" ), Op .EQ , event_id ),
235
- Condition (Column ("timestamp" ), Op .GTE , start_date ),
236
- Condition (Column ("timestamp" ), Op .LT , end_date ),
237
- ]
238
- if project_id :
239
- where_conditions .append (Condition (Column ("project_id" ), Op .EQ , project_id ))
240
-
241
- if group_id :
242
- where_conditions .append (Condition (Column ("group_id" ), Op .EQ , group_id ))
243
-
244
- return Query (
245
- match = Entity (EntityKey .Events .value ),
246
- select = [
247
- Column ("project_id" ),
248
- Column ("group_id" ),
249
- Column ("ip_address_v6" ),
250
- Column ("ip_address_v4" ),
251
- Column ("event_id" ),
252
- Column ("user_id" ),
253
- Column ("user" ),
254
- Column ("user_name" ),
255
- Column ("user_email" ),
256
- ],
257
- where = where_conditions ,
258
- )
259
-
260
-
261
- def _start_and_end_dates (time : datetime ) -> Tuple [datetime , datetime ]:
262
- """Return the 10 min range start and end time range ."""
263
- return time - timedelta (minutes = 5 ), time + timedelta (minutes = 5 )
0 commit comments