Skip to content

Commit f08bdf6

Browse files
committed
ref(core): Improve URL parsing utilities
1 parent 791d9e3 commit f08bdf6

File tree

4 files changed

+209
-43
lines changed

4 files changed

+209
-43
lines changed

packages/core/src/fetch.ts

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
1-
/* eslint-disable complexity */
21
import { getClient } from './currentScopes';
32
import { SEMANTIC_ATTRIBUTE_SENTRY_OP, SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN } from './semanticAttributes';
43
import { SPAN_STATUS_ERROR, setHttpStatus, startInactiveSpan } from './tracing';
54
import { SentryNonRecordingSpan } from './tracing/sentryNonRecordingSpan';
6-
import type { FetchBreadcrumbHint, HandlerDataFetch, Span, SpanOrigin } from './types-hoist';
5+
import type { FetchBreadcrumbHint, HandlerDataFetch, Span, SpanAttributes, SpanOrigin } from './types-hoist';
76
import { SENTRY_BAGGAGE_KEY_PREFIX } from './utils-hoist/baggage';
87
import { isInstanceOf } from './utils-hoist/is';
9-
import { parseStringToURL, stripUrlQueryAndFragment } from './utils-hoist/url';
8+
import { isURLObjectRelative, parseStringToURLObject } from './utils-hoist/url';
109
import { hasSpansEnabled } from './utils/hasSpansEnabled';
1110
import { getActiveSpan } from './utils/spanUtils';
1211
import { getTraceData } from './utils/traceData';
@@ -54,40 +53,11 @@ export function instrumentFetchRequest(
5453
return undefined;
5554
}
5655

57-
// Curious about `thismessage:/`? See: https://www.rfc-editor.org/rfc/rfc2557.html
58-
// > When the methods above do not yield an absolute URI, a base URL
59-
// > of "thismessage:/" MUST be employed. This base URL has been
60-
// > defined for the sole purpose of resolving relative references
61-
// > within a multipart/related structure when no other base URI is
62-
// > specified.
63-
//
64-
// We need to provide a base URL to `parseStringToURL` because the fetch API gives us a
65-
// relative URL sometimes.
66-
//
67-
// This is the only case where we need to provide a base URL to `parseStringToURL`
68-
// because the relative URL is not valid on its own.
69-
const parsedUrl = url.startsWith('/') ? parseStringToURL(url, 'thismessage:/') : parseStringToURL(url);
70-
const fullUrl = url.startsWith('/') ? undefined : parsedUrl?.href;
71-
7256
const hasParent = !!getActiveSpan();
7357

7458
const span =
7559
shouldCreateSpanResult && hasParent
76-
? startInactiveSpan({
77-
name: `${method} ${stripUrlQueryAndFragment(url)}`,
78-
attributes: {
79-
url,
80-
type: 'fetch',
81-
'http.method': method,
82-
'http.url': parsedUrl?.href,
83-
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: spanOrigin,
84-
[SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'http.client',
85-
...(fullUrl && { 'http.url': fullUrl }),
86-
...(fullUrl && parsedUrl?.host && { 'server.address': parsedUrl.host }),
87-
...(parsedUrl?.search && { 'http.query': parsedUrl.search }),
88-
...(parsedUrl?.hash && { 'http.fragment': parsedUrl.hash }),
89-
},
90-
})
60+
? startInactiveSpan(getSpanStartOptions(url, method, spanOrigin))
9161
: new SentryNonRecordingSpan();
9262

9363
handlerData.fetchData.__span = span.spanContext().spanId;
@@ -264,3 +234,39 @@ function isRequest(request: unknown): request is Request {
264234
function isHeaders(headers: unknown): headers is Headers {
265235
return typeof Headers !== 'undefined' && isInstanceOf(headers, Headers);
266236
}
237+
238+
function getSpanStartOptions(
239+
url: string,
240+
method: string,
241+
spanOrigin: SpanOrigin,
242+
): Parameters<typeof startInactiveSpan>[0] {
243+
const parsedUrl = parseStringToURLObject(url);
244+
return {
245+
name: parsedUrl ? `${method} ${parsedUrl.pathname}` : method,
246+
attributes: getFetchSpanAttributes(url, parsedUrl, method, spanOrigin),
247+
};
248+
}
249+
250+
function getFetchSpanAttributes(
251+
url: string,
252+
parsedUrl: ReturnType<typeof parseStringToURLObject>,
253+
method: string,
254+
spanOrigin: SpanOrigin,
255+
): SpanAttributes {
256+
const attributes: SpanAttributes = {
257+
url,
258+
type: 'fetch',
259+
'http.method': method,
260+
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: spanOrigin,
261+
[SEMANTIC_ATTRIBUTE_SENTRY_OP]: 'http.client',
262+
};
263+
if (parsedUrl) {
264+
if (!isURLObjectRelative(parsedUrl)) {
265+
attributes['http.url'] = parsedUrl.href;
266+
attributes['server.address'] = parsedUrl.host;
267+
}
268+
attributes['http.query'] = parsedUrl.search;
269+
attributes['http.fragment'] = parsedUrl.hash;
270+
}
271+
return attributes;
272+
}

packages/core/src/utils-hoist/index.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,14 @@ export {
122122
objectToBaggageHeader,
123123
} from './baggage';
124124

125-
export { getSanitizedUrlString, parseUrl, stripUrlQueryAndFragment } from './url';
125+
export {
126+
getSanitizedUrlString,
127+
parseUrl,
128+
stripUrlQueryAndFragment,
129+
parseStringToURLObject,
130+
isURLObjectRelative,
131+
getSanitizedUrlStringFromUrlObject,
132+
} from './url';
126133
export { eventFromMessage, eventFromUnknownInput, exceptionFromError, parseStackFrames } from './eventbuilder';
127134
export { callFrameToStackFrame, watchdogTimer } from './anr';
128135
export { LRUMap } from './lru';

packages/core/src/utils-hoist/url.ts

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,50 @@ interface URLwithCanParse extends URL {
1111
canParse: (url: string, base?: string | URL | undefined) => boolean;
1212
}
1313

14+
// A subset of the URL object that is valid for relative URLs
15+
// The URL object cannot handle relative URLs, so we need to handle them separately
16+
type RelativeURL = {
17+
isRelative: true;
18+
pathname: URL['pathname'];
19+
search: URL['search'];
20+
hash: URL['hash'];
21+
};
22+
23+
type URLObject = RelativeURL | URL;
24+
25+
// Curious about `thismessage:/`? See: https://www.rfc-editor.org/rfc/rfc2557.html
26+
// > When the methods above do not yield an absolute URI, a base URL
27+
// > of "thismessage:/" MUST be employed. This base URL has been
28+
// > defined for the sole purpose of resolving relative references
29+
// > within a multipart/related structure when no other base URI is
30+
// > specified.
31+
//
32+
// We need to provide a base URL to `parseStringToURLObject` because the fetch API gives us a
33+
// relative URL sometimes.
34+
//
35+
// This is the only case where we need to provide a base URL to `parseStringToURLObject`
36+
// because the relative URL is not valid on its own.
37+
const DEFAULT_BASE_URL = 'thismessage:/';
38+
39+
/**
40+
* Checks if the URL object is relative
41+
*
42+
* @param url - The URL object to check
43+
* @returns True if the URL object is relative, false otherwise
44+
*/
45+
export function isURLObjectRelative(url: URLObject): url is RelativeURL {
46+
return 'isRelative' in url;
47+
}
48+
1449
/**
1550
* Parses string to a URL object
1651
*
1752
* @param url - The URL to parse
1853
* @returns The parsed URL object or undefined if the URL is invalid
1954
*/
20-
export function parseStringToURL(url: string, base?: string | URL | undefined): URL | undefined {
55+
export function parseStringToURLObject(url: string, urlBase?: string | URL | undefined): URLObject | undefined {
56+
const isRelative = url.startsWith('/');
57+
const base = urlBase ?? (isRelative ? DEFAULT_BASE_URL : undefined);
2158
try {
2259
// Use `canParse` to short-circuit the URL constructor if it's not a valid URL
2360
// This is faster than trying to construct the URL and catching the error
@@ -26,14 +63,49 @@ export function parseStringToURL(url: string, base?: string | URL | undefined):
2663
return undefined;
2764
}
2865

29-
return new URL(url, base);
66+
const fullUrlObject = new URL(url, base);
67+
if (isRelative) {
68+
// Because we used a fake base URL, we need to return a relative URL object
69+
return {
70+
isRelative,
71+
pathname: fullUrlObject.pathname,
72+
search: fullUrlObject.search,
73+
hash: fullUrlObject.hash,
74+
};
75+
}
76+
return fullUrlObject;
3077
} catch {
3178
// empty body
3279
}
3380

3481
return undefined;
3582
}
3683

84+
/**
85+
* Takes a URL object and returns a sanitized string which is safe to use as span name
86+
* see: https://develop.sentry.dev/sdk/data-handling/#structuring-data
87+
*/
88+
export function getSanitizedUrlStringFromUrlObject(url: URLObject): string {
89+
if (isURLObjectRelative(url)) {
90+
return url.pathname;
91+
}
92+
93+
const newUrl = new URL(url);
94+
newUrl.search = '';
95+
newUrl.hash = '';
96+
if (['80', '443'].includes(newUrl.port)) {
97+
newUrl.port = '';
98+
}
99+
if (newUrl.password) {
100+
newUrl.password = '%filtered%';
101+
}
102+
if (newUrl.username) {
103+
newUrl.username = '%filtered%';
104+
}
105+
106+
return newUrl.toString();
107+
}
108+
37109
/**
38110
* Parses string form of URL into an object
39111
* // borrowed from https://tools.ietf.org/html/rfc3986#appendix-B

packages/core/test/utils-hoist/url.test.ts

Lines changed: 88 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
import { describe, expect, it } from 'vitest';
2-
import { getSanitizedUrlString, parseStringToURL, parseUrl, stripUrlQueryAndFragment } from '../../src/utils-hoist/url';
2+
import {
3+
getSanitizedUrlString,
4+
parseUrl,
5+
stripUrlQueryAndFragment,
6+
parseStringToURLObject,
7+
isURLObjectRelative,
8+
getSanitizedUrlStringFromUrlObject,
9+
} from '../../src/utils-hoist/url';
310

411
describe('stripQueryStringAndFragment', () => {
512
const urlString = 'http://dogs.are.great:1231/yay/';
@@ -62,8 +69,6 @@ describe('getSanitizedUrlString', () => {
6269
'https://[filtered]:[filtered]@somedomain.com',
6370
],
6471
['same-origin url', '/api/v4/users?id=123', '/api/v4/users'],
65-
['url without a protocol', 'example.com', 'example.com'],
66-
['url without a protocol with a path', 'example.com/sub/path?id=123', 'example.com/sub/path'],
6772
['url with port 8080', 'http://172.31.12.144:8080/test', 'http://172.31.12.144:8080/test'],
6873
['url with port 4433', 'http://172.31.12.144:4433/test', 'http://172.31.12.144:4433/test'],
6974
['url with port 443', 'http://172.31.12.144:443/test', 'http://172.31.12.144/test'],
@@ -197,19 +202,95 @@ describe('parseUrl', () => {
197202
});
198203
});
199204

200-
describe('parseStringToURL', () => {
205+
describe('parseStringToURLObject', () => {
201206
it('returns undefined for invalid URLs', () => {
202-
expect(parseStringToURL('invalid-url')).toBeUndefined();
207+
expect(parseStringToURLObject('invalid-url')).toBeUndefined();
203208
});
204209

205210
it('returns a URL object for valid URLs', () => {
206-
expect(parseStringToURL('https://somedomain.com')).toBeInstanceOf(URL);
211+
expect(parseStringToURLObject('https://somedomain.com')).toBeInstanceOf(URL);
212+
});
213+
214+
it('returns a URL object for valid URLs with a base URL', () => {
215+
expect(parseStringToURLObject('https://somedomain.com', 'https://base.com')).toBeInstanceOf(URL);
216+
});
217+
218+
it('returns a relative URL object for relative URLs', () => {
219+
expect(parseStringToURLObject('/path/to/happiness')).toEqual({
220+
isRelative: true,
221+
pathname: '/path/to/happiness',
222+
search: '',
223+
hash: '',
224+
});
207225
});
208226

209227
it('does not throw an error if URl.canParse is not defined', () => {
210228
const canParse = (URL as any).canParse;
211229
delete (URL as any).canParse;
212-
expect(parseStringToURL('https://somedomain.com')).toBeInstanceOf(URL);
230+
expect(parseStringToURLObject('https://somedomain.com')).toBeInstanceOf(URL);
213231
(URL as any).canParse = canParse;
214232
});
215233
});
234+
235+
describe('isURLObjectRelative', () => {
236+
it('returns true for relative URLs', () => {
237+
expect(isURLObjectRelative(parseStringToURLObject('/path/to/happiness')!)).toBe(true);
238+
});
239+
240+
it('returns false for absolute URLs', () => {
241+
expect(isURLObjectRelative(parseStringToURLObject('https://somedomain.com')!)).toBe(false);
242+
});
243+
});
244+
245+
describe('getSanitizedUrlStringFromUrlObject', () => {
246+
it.each([
247+
['regular url', 'https://somedomain.com', 'https://somedomain.com/'],
248+
['regular url with a path', 'https://somedomain.com/path/to/happiness', 'https://somedomain.com/path/to/happiness'],
249+
[
250+
'url with standard http port 80',
251+
'http://somedomain.com:80/path/to/happiness',
252+
'http://somedomain.com/path/to/happiness',
253+
],
254+
[
255+
'url with standard https port 443',
256+
'https://somedomain.com:443/path/to/happiness',
257+
'https://somedomain.com/path/to/happiness',
258+
],
259+
[
260+
'url with non-standard port',
261+
'https://somedomain.com:4200/path/to/happiness',
262+
'https://somedomain.com:4200/path/to/happiness',
263+
],
264+
[
265+
'url with query params',
266+
'https://somedomain.com:4200/path/to/happiness?auhtToken=abc123&param2=bar',
267+
'https://somedomain.com:4200/path/to/happiness',
268+
],
269+
[
270+
'url with a fragment',
271+
'https://somedomain.com/path/to/happiness#somewildfragment123',
272+
'https://somedomain.com/path/to/happiness',
273+
],
274+
[
275+
'url with a fragment and query params',
276+
'https://somedomain.com/path/to/happiness#somewildfragment123?auhtToken=abc123&param2=bar',
277+
'https://somedomain.com/path/to/happiness',
278+
],
279+
[
280+
'url with authorization',
281+
'https://username:[email protected]',
282+
'https://%filtered%:%filtered%@somedomain.com/',
283+
],
284+
['same-origin url', '/api/v4/users?id=123', '/api/v4/users'],
285+
['url with port 8080', 'http://172.31.12.144:8080/test', 'http://172.31.12.144:8080/test'],
286+
['url with port 4433', 'http://172.31.12.144:4433/test', 'http://172.31.12.144:4433/test'],
287+
['url with port 443', 'http://172.31.12.144:443/test', 'http://172.31.12.144/test'],
288+
['url with IP and port 80', 'http://172.31.12.144:80/test', 'http://172.31.12.144/test'],
289+
])('returns a sanitized URL for a %s', (_, rawUrl: string, sanitizedURL: string) => {
290+
const urlObject = parseStringToURLObject(rawUrl);
291+
if (!urlObject) {
292+
throw new Error('Invalid URL');
293+
}
294+
expect(getSanitizedUrlStringFromUrlObject(urlObject)).toEqual(sanitizedURL);
295+
});
296+
});

0 commit comments

Comments
 (0)