Skip to content

Commit aa52a5c

Browse files
authored
fix(perf-issues): Accept sanitized span descriptions (#46187)
As of getsentry/sentry-javascript#7206 the Node.js SDK will not append search parameters to the span's description or the URL. Instead, the search parameters will be in the `http.query` key.
1 parent 9f466c3 commit aa52a5c

File tree

3 files changed

+57
-17
lines changed

3 files changed

+57
-17
lines changed

src/sentry/utils/performance_issues/base.py

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -171,19 +171,46 @@ def get_duration_between_spans(first_span: Span, second_span: Span):
171171

172172

173173
def get_url_from_span(span: Span) -> str:
174+
"""
175+
Parses the span data and pulls out the URL. Accounts for different SDKs and
176+
different versions of SDKs formatting and parsing the URL contents
177+
differently.
178+
"""
179+
174180
data = span.get("data") or {}
175-
url = data.get("url") or ""
176-
if not url:
177-
# If data is missing, fall back to description
178-
description = span.get("description") or ""
179-
parts = description.split(" ", 1)
180-
if len(parts) == 2:
181-
url = parts[1]
182-
183-
if type(url) is dict:
184-
url = url.get("pathname") or ""
185-
186-
return url
181+
182+
# The most modern version is to provide URL information in the span
183+
# data
184+
url_data = data.get("url")
185+
186+
if type(url_data) is dict:
187+
# Some transactions mysteriously provide the URL as a dict that looks
188+
# like JavaScript's URL object
189+
url = url_data.get("pathname") or ""
190+
url += url_data.get("search") or ""
191+
return url
192+
193+
if type(url_data) is str:
194+
# Usually the URL is a regular string, and so is the query. This
195+
# is the standardized format for all SDKs, and is the preferred
196+
# format going forward. Otherwise, if `http.query` is absent, `url`
197+
# contains the query.
198+
url = url_data
199+
query_data = data.get("http.query")
200+
if type(query_data) is str and len(query_data) > 0:
201+
url += f"?{query_data}"
202+
203+
return url
204+
205+
# Attempt to parse the full URL from the span description, in case
206+
# the previous approaches did not yield a good result
207+
description = span.get("description") or ""
208+
parts = description.split(" ", 1)
209+
if len(parts) == 2:
210+
url = parts[1]
211+
return url
212+
213+
return ""
187214

188215

189216
def fingerprint_spans(spans: List[Span], unique_only: bool = False):

src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,29 +157,32 @@ def is_span_eligible(cls, span: Span) -> bool:
157157
if description.strip()[:3].upper() != "GET":
158158
return False
159159

160+
url = get_url_from_span(span)
161+
160162
# GraphQL URLs have complicated queries in them. Until we parse those
161163
# queries to check for what's duplicated, we can't tell what is being
162164
# duplicated. Ignore them for now
163-
if "graphql" in description:
165+
if "graphql" in url:
164166
return False
165167

166168
# Next.js infixes its data URLs with a build ID. (e.g.,
167169
# /_next/data/<uuid>/some-endpoint) This causes a fingerprinting
168170
# explosion, since every deploy would change this ID and create new
169171
# fingerprints. Since we're not parameterizing URLs yet, we need to
170172
# exclude them
171-
if "_next/data" in description:
173+
if "_next/data" in url:
172174
return False
173175

174-
url = get_url_from_span(span)
175-
176176
# Next.js error pages cause an N+1 API Call that isn't useful to anyone
177177
if "__nextjs_original-stack-frame" in url:
178178
return False
179179

180180
if not url:
181181
return False
182182

183+
# Once most users update their SDKs to use the latest standard, we
184+
# won't have to do this, since the URLs will be sent in as `span.data`
185+
# in a parsed format
183186
parsed_url = urlparse(str(url))
184187

185188
if parsed_url.netloc in cls.HOST_DENYLIST:

tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ def test_allows_eligible_spans(span):
343343
{
344344
"span_id": "a",
345345
"op": "http.client",
346-
"description": "GET http://service.io/resource",
346+
"description": "GET /resource.js",
347347
"hash": "a",
348348
"data": {"url": "/resource.js"},
349349
},
@@ -353,6 +353,16 @@ def test_allows_eligible_spans(span):
353353
"description": "GET http://service.io/resource?graphql=somequery",
354354
"hash": "a",
355355
},
356+
{
357+
"span_id": "a",
358+
"op": "http.client",
359+
"description": "GET http://service.io/resource", # New JS SDK removes query string from description
360+
"hash": "a",
361+
"data": {
362+
"http.query": "graphql=somequery",
363+
"url": "http://service.io/resource",
364+
},
365+
},
356366
{
357367
"span_id": "a",
358368
"op": "http.client",

0 commit comments

Comments
 (0)