1
1
from __future__ import annotations
2
2
3
3
import logging
4
- import time
5
4
from dataclasses import dataclass
6
- from datetime import datetime
7
- from typing import Any , Dict , List , Mapping , Optional
5
+ from datetime import datetime , timedelta
6
+ from typing import Any , List , Mapping , Optional , Tuple
8
7
9
8
from snuba_sdk import (
10
9
BooleanCondition ,
14
13
Direction ,
15
14
Entity ,
16
15
Function ,
16
+ Limit ,
17
17
Op ,
18
18
OrderBy ,
19
19
Query ,
20
20
Request ,
21
21
)
22
22
23
- from sentry import analytics , features
24
23
from sentry .eventstore .models import Event
25
- from sentry .models .eventuser import EventUser as EventUser_model
26
24
from sentry .models .project import Project
27
25
from sentry .snuba .dataset import Dataset , EntityKey
28
26
from sentry .utils .avatar import get_gravatar_url
37
35
{
38
36
("user_id" ): "id" ,
39
37
("user_name" ): "username" ,
40
- ("user_email " ): "email" ,
38
+ ("email " ): "email" ,
41
39
("ip_address_v4" , "ip_address_v6" ): "ip" ,
42
40
}
43
41
)
@@ -84,18 +82,12 @@ def get_display_name(self):
84
82
85
83
@classmethod
86
84
def for_projects (
87
- self ,
88
- projects : List [Project ],
89
- keyword_filters : Mapping [str , List [Any ]],
90
- filter_boolean = BooleanOp .AND ,
91
- return_all = False ,
85
+ self , projects : List [Project ], keyword_filters : Mapping [str , Any ]
92
86
) -> List [EventUser ]:
93
87
"""
94
88
Fetch the EventUser with a Snuba query that exists within a list of projects
95
89
and valid `keyword_filters`. The `keyword_filter` keys are in `KEYWORD_MAP`.
96
90
"""
97
- start_time = time .time ()
98
-
99
91
oldest_project = min (projects , key = lambda item : item .date_added )
100
92
101
93
where_conditions = [
@@ -104,108 +96,59 @@ def for_projects(
104
96
Condition (Column ("timestamp" ), Op .GTE , oldest_project .date_added ),
105
97
]
106
98
107
- keyword_where_conditions = []
108
99
for keyword , value in keyword_filters .items ():
109
- if not isinstance (value , list ):
110
- raise ValueError (f"{ keyword } filter must be a list of values" )
111
-
112
100
snuba_column = SNUBA_KEYWORD_MAP .get_key (keyword )
113
101
if isinstance (snuba_column , tuple ):
114
- for filter_value in value :
115
- keyword_where_conditions .append (
116
- BooleanCondition (
117
- BooleanOp .OR ,
118
- [
119
- Condition (
120
- Column (column ),
121
- Op .IN ,
122
- value
123
- if SNUBA_COLUMN_COALASCE .get (column , None ) is None
124
- else Function (
125
- SNUBA_COLUMN_COALASCE .get (column ), parameters = [filter_value ]
126
- ),
127
- )
128
- for column in snuba_column
129
- ],
130
- )
102
+ where_conditions .append (
103
+ BooleanCondition (
104
+ BooleanOp .OR ,
105
+ [
106
+ Condition (
107
+ Column (column ),
108
+ Op .EQ ,
109
+ value
110
+ if SNUBA_COLUMN_COALASCE .get (column , None ) is None
111
+ else Function (
112
+ SNUBA_COLUMN_COALASCE .get (column ), parameters = [value ]
113
+ ),
114
+ )
115
+ for column in snuba_column
116
+ ],
131
117
)
132
- else :
133
- keyword_where_conditions .append (Condition (Column (snuba_column ), Op .IN , value ))
134
-
135
- if len (keyword_where_conditions ) > 1 :
136
- where_conditions .append (
137
- BooleanCondition (
138
- filter_boolean ,
139
- keyword_where_conditions ,
140
118
)
141
- )
142
-
143
- if len (keyword_where_conditions ) == 1 :
144
- where_conditions .extend (
145
- keyword_where_conditions ,
146
- )
147
119
148
- columns = [
149
- Column ("project_id" ),
150
- Column ("ip_address_v6" ),
151
- Column ("ip_address_v4" ),
152
- Column ("user_id" ),
153
- Column ("user_name" ),
154
- Column ("user_email" ),
155
- ]
120
+ else :
121
+ where_conditions .append (Condition (Column (snuba_column ), Op .EQ , value ))
156
122
157
123
query = Query (
158
124
match = Entity (EntityKey .Events .value ),
159
125
select = [
160
- * columns ,
161
- Function ("max" , [Column ("timestamp" )], "latest_timestamp" ),
126
+ Column ("project_id" ),
127
+ Column ("group_id" ),
128
+ Column ("ip_address_v6" ),
129
+ Column ("ip_address_v4" ),
130
+ Column ("event_id" ),
131
+ Column ("user_id" ),
132
+ Column ("user" ),
133
+ Column ("user_name" ),
134
+ Column ("user_email" ),
162
135
],
163
136
where = where_conditions ,
164
- groupby = [ * columns ] ,
165
- orderby = [OrderBy (Column ("latest_timestamp " ), Direction .DESC )],
137
+ limit = Limit ( 1 ) ,
138
+ orderby = [OrderBy (Column ("timestamp " ), Direction .DESC )],
166
139
)
167
140
168
- if not return_all :
169
- query .set_limit (1 )
170
-
171
141
request = Request (
172
142
dataset = Dataset .Events .value ,
173
143
app_id = REFERRER ,
174
144
query = query ,
175
145
tenant_ids = {"referrer" : REFERRER , "organization_id" : projects [0 ].organization .id },
176
146
)
177
147
data_results = raw_snql_query (request , referrer = REFERRER )["data" ]
178
-
179
- results = self ._find_unique (data_results )
180
- end_time = time .time ()
181
- analytics .record (
182
- "eventuser_snuba.query" ,
183
- project_ids = [p .id for p in projects ],
184
- query = query .print (),
185
- count_rows_returned = len (data_results ),
186
- count_rows_filtered = len (data_results ) - len (results ),
187
- query_time_ms = int ((end_time - start_time ) * 1000 ),
188
- )
148
+ results = [EventUser .from_snuba (result ) for result in data_results ]
189
149
190
150
return results
191
151
192
- @staticmethod
193
- def _find_unique (data_results : List [dict [str , Any ]]):
194
- """
195
- Return the first instance of an EventUser object
196
- with a unique tag_value from the Snuba results.
197
- """
198
- unique_tag_values = set ()
199
- unique_event_users = []
200
-
201
- for euser in [EventUser .from_snuba (item ) for item in data_results ]:
202
- tag_value = euser .tag_value
203
- if tag_value not in unique_tag_values :
204
- unique_event_users .append (euser )
205
- unique_tag_values .add (tag_value )
206
-
207
- return unique_event_users
208
-
209
152
@staticmethod
210
153
def from_snuba (result : Mapping [str , Any ]) -> EventUser :
211
154
"""
@@ -216,47 +159,11 @@ def from_snuba(result: Mapping[str, Any]) -> EventUser:
216
159
project_id = result .get ("project_id" ),
217
160
email = result .get ("user_email" ),
218
161
username = result .get ("user_name" ),
219
- name = None ,
162
+ name = result . get ( "user_name" ) ,
220
163
ip_address = result .get ("ip_address_v4" ) or result .get ("ip_address_v6" ),
221
164
user_ident = result .get ("user_id" ),
222
165
)
223
166
224
- @classmethod
225
- def for_tags (cls , project_id : int , values ):
226
- """
227
- Finds matching EventUser objects from a list of tag values.
228
-
229
- Return a dictionary of {tag_value: event_user}.
230
- """
231
- projects = Project .objects .filter (id = project_id )
232
-
233
- if not features .has ("organizations:eventuser-from-snuba" , projects [0 ].organization ):
234
- return EventUser_model .for_tags (project_id , values )
235
-
236
- result = {}
237
- keyword_filters : Dict [str , Any ] = {}
238
- for value in values :
239
- key , value = value .split (":" , 1 )[0 ], value .split (":" , 1 )[- 1 ]
240
- if keyword_filters .get (key ):
241
- keyword_filters [key ].append (value )
242
- else :
243
- keyword_filters [key ] = [value ]
244
-
245
- eventusers = EventUser .for_projects (
246
- projects , keyword_filters , filter_boolean = BooleanOp .OR , return_all = True
247
- )
248
-
249
- for keyword , values in keyword_filters .items ():
250
- column = KEYWORD_MAP .get_key (keyword )
251
- for value in values :
252
- matching_euser = next (
253
- (euser for euser in eventusers if getattr (euser , column , None ) == value ), None
254
- )
255
- if matching_euser :
256
- result [f"{ keyword } :{ value } " ] = matching_euser
257
-
258
- return result
259
-
260
167
@property
261
168
def tag_value (self ):
262
169
"""
@@ -282,3 +189,75 @@ def serialize(self):
282
189
"ipAddress" : self .ip_address ,
283
190
"avatarUrl" : get_gravatar_url (self .email , size = 32 ),
284
191
}
192
+
193
+
194
+ def find_eventuser_with_snuba (event : Event ):
195
+ """
196
+ Query Snuba to get the EventUser information for an Event.
197
+ """
198
+ start_date , end_date = _start_and_end_dates (event .datetime )
199
+
200
+ query = _generate_entity_dataset_query (
201
+ event .project_id , event .group_id , event .event_id , start_date , end_date
202
+ )
203
+ request = Request (
204
+ dataset = Dataset .Events .value ,
205
+ app_id = REFERRER ,
206
+ query = query ,
207
+ tenant_ids = {"referrer" : REFERRER , "organization_id" : event .project .organization .id },
208
+ )
209
+ data_results = raw_snql_query (request , referrer = REFERRER )["data" ]
210
+
211
+ if len (data_results ) == 0 :
212
+ logger .info (
213
+ "Errors dataset query to find EventUser did not return any results." ,
214
+ extra = {
215
+ "event_id" : event .event_id ,
216
+ "project_id" : event .project_id ,
217
+ "group_id" : event .group_id ,
218
+ },
219
+ )
220
+ return {}
221
+
222
+ return data_results [0 ]
223
+
224
+
225
+ def _generate_entity_dataset_query (
226
+ project_id : Optional [int ],
227
+ group_id : Optional [int ],
228
+ event_id : str ,
229
+ start_date : datetime ,
230
+ end_date : datetime ,
231
+ ) -> Query :
232
+ """This simply generates a query based on the passed parameters"""
233
+ where_conditions = [
234
+ Condition (Column ("event_id" ), Op .EQ , event_id ),
235
+ Condition (Column ("timestamp" ), Op .GTE , start_date ),
236
+ Condition (Column ("timestamp" ), Op .LT , end_date ),
237
+ ]
238
+ if project_id :
239
+ where_conditions .append (Condition (Column ("project_id" ), Op .EQ , project_id ))
240
+
241
+ if group_id :
242
+ where_conditions .append (Condition (Column ("group_id" ), Op .EQ , group_id ))
243
+
244
+ return Query (
245
+ match = Entity (EntityKey .Events .value ),
246
+ select = [
247
+ Column ("project_id" ),
248
+ Column ("group_id" ),
249
+ Column ("ip_address_v6" ),
250
+ Column ("ip_address_v4" ),
251
+ Column ("event_id" ),
252
+ Column ("user_id" ),
253
+ Column ("user" ),
254
+ Column ("user_name" ),
255
+ Column ("user_email" ),
256
+ ],
257
+ where = where_conditions ,
258
+ )
259
+
260
+
261
+ def _start_and_end_dates (time : datetime ) -> Tuple [datetime , datetime ]:
262
+ """Return the 10 min range start and end time range ."""
263
+ return time - timedelta (minutes = 5 ), time + timedelta (minutes = 5 )
0 commit comments