Skip to content

Commit 0925301

Browse files
author
ladeak
committed
PathNormalizer in Microsoft.AspNetCore.HttpSys.Internal
1 parent 4f78231 commit 0925301

File tree

1 file changed

+115
-159
lines changed

1 file changed

+115
-159
lines changed

src/Shared/HttpSys/RequestProcessing/PathNormalizer.cs

Lines changed: 115 additions & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System.Diagnostics;
5+
using System.Runtime.InteropServices;
56

67
namespace Microsoft.AspNetCore.HttpSys.Internal;
78

@@ -11,195 +12,150 @@ internal static class PathNormalizer
1112
private const byte ByteDot = (byte)'.';
1213

1314
// In-place implementation of the algorithm from https://tools.ietf.org/html/rfc3986#section-5.2.4
14-
public static unsafe int RemoveDotSegments(Span<byte> input)
15+
public static int RemoveDotSegments(Span<byte> src)
1516
{
16-
fixed (byte* start = input)
17-
{
18-
var end = start + input.Length;
19-
return RemoveDotSegments(start, end);
20-
}
21-
}
22-
23-
public static unsafe int RemoveDotSegments(byte* start, byte* end)
24-
{
25-
if (!ContainsDotSegments(start, end))
26-
{
27-
return (int)(end - start);
28-
}
17+
Debug.Assert(src[0] == '/', "Path segment must always start with a '/'");
18+
ReadOnlySpan<byte> dotSlash = "./"u8;
19+
ReadOnlySpan<byte> slashDot = "/."u8;
2920

30-
var src = start;
31-
var dst = start;
21+
var writtenLength = 0;
22+
var readPointer = 0;
3223

33-
while (src < end)
24+
while (src.Length > readPointer)
3425
{
35-
var ch1 = *src;
36-
Debug.Assert(ch1 == '/', "Path segment must always start with a '/'");
37-
38-
byte ch2, ch3, ch4;
39-
40-
switch (end - src)
26+
var currentSrc = src[readPointer..];
27+
var nextDotSegmentIndex = currentSrc.IndexOf(slashDot);
28+
if (nextDotSegmentIndex < 0 && readPointer == 0)
4129
{
42-
case 1:
43-
break;
44-
case 2:
45-
ch2 = *(src + 1);
46-
47-
if (ch2 == ByteDot)
48-
{
49-
// B. if the input buffer begins with a prefix of "/./" or "/.",
50-
// where "." is a complete path segment, then replace that
51-
// prefix with "/" in the input buffer; otherwise,
52-
src += 1;
53-
*src = ByteSlash;
54-
continue;
55-
}
56-
57-
break;
58-
case 3:
59-
ch2 = *(src + 1);
60-
ch3 = *(src + 2);
30+
return src.Length;
31+
}
32+
if (nextDotSegmentIndex < 0)
33+
{
34+
// Copy the remianing src to dst, and return.
35+
currentSrc.CopyTo(src[writtenLength..]);
36+
writtenLength += src.Length - readPointer;
37+
return writtenLength;
38+
}
39+
else if (nextDotSegmentIndex > 0)
40+
{
41+
// Copy until the next segment excluding the trailer.
42+
currentSrc[..nextDotSegmentIndex].CopyTo(src[writtenLength..]);
43+
writtenLength += nextDotSegmentIndex;
44+
readPointer += nextDotSegmentIndex;
45+
}
6146

62-
if (ch2 == ByteDot && ch3 == ByteDot)
63-
{
64-
// C. if the input buffer begins with a prefix of "/../" or "/..",
65-
// where ".." is a complete path segment, then replace that
66-
// prefix with "/" in the input buffer and remove the last
67-
// segment and its preceding "/" (if any) from the output
68-
// buffer; otherwise,
69-
src += 2;
70-
*src = ByteSlash;
47+
var remainingLength = src.Length - readPointer;
7148

72-
if (dst > start)
73-
{
74-
do
75-
{
76-
dst--;
77-
} while (dst > start && *dst != ByteSlash);
78-
}
49+
// Case of /../ or /./ or non-dot segments.
50+
if (remainingLength > 3)
51+
{
52+
var nextIndex = readPointer + 2;
53+
54+
if (src[nextIndex] == ByteSlash)
55+
{
56+
// Case: /./
57+
readPointer = nextIndex;
58+
}
59+
else if (MemoryMarshal.CreateSpan(ref src[nextIndex], 2).StartsWith(dotSlash))
60+
{
61+
// Case: /../
62+
// Remove the last segment and replace the path with /
63+
var lastIndex = MemoryMarshal.CreateSpan(ref src[0], writtenLength).LastIndexOf(ByteSlash);
64+
65+
// Move write pointer to the end of the previous segment without / or to start position
66+
writtenLength = int.Max(0, lastIndex);
67+
68+
// Move the read pointer to the next segments beginning including /
69+
readPointer += 3;
70+
}
71+
else
72+
{
73+
// Not a dot segment e.g. /.a, copy the matched /. and bump the read pointer
74+
slashDot.CopyTo(src[writtenLength..]);
75+
writtenLength += 2;
76+
readPointer = nextIndex;
77+
}
78+
}
7979

80-
continue;
81-
}
82-
else if (ch2 == ByteDot && ch3 == ByteSlash)
80+
// Ending with /.. or /./ or non-dot segments.
81+
else if (remainingLength == 3)
82+
{
83+
var nextIndex = readPointer + 2;
84+
if (src[nextIndex] == ByteSlash)
85+
{
86+
// Case: /./ Replace the /./ segment with a closing /
87+
src[writtenLength++] = ByteSlash;
88+
return writtenLength;
89+
}
90+
else if (src[nextIndex] == ByteDot)
91+
{
92+
// Case: /.. Remove the last segment and replace the path with /
93+
var lastSlashIndex = MemoryMarshal.CreateSpan(ref src[0], writtenLength).LastIndexOf(ByteSlash);
94+
95+
// If this was the beginning of the string, then return /
96+
if (lastSlashIndex < 0)
8397
{
84-
// B. if the input buffer begins with a prefix of "/./" or "/.",
85-
// where "." is a complete path segment, then replace that
86-
// prefix with "/" in the input buffer; otherwise,
87-
src += 2;
88-
continue;
98+
Debug.Assert(src[0] == '/');
99+
return 1;
89100
}
90-
91-
break;
92-
default:
93-
ch2 = *(src + 1);
94-
ch3 = *(src + 2);
95-
ch4 = *(src + 3);
96-
97-
if (ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash)
101+
else
98102
{
99-
// C. if the input buffer begins with a prefix of "/../" or "/..",
100-
// where ".." is a complete path segment, then replace that
101-
// prefix with "/" in the input buffer and remove the last
102-
// segment and its preceding "/" (if any) from the output
103-
// buffer; otherwise,
104-
src += 3;
105-
106-
if (dst > start)
107-
{
108-
do
109-
{
110-
dst--;
111-
} while (dst > start && *dst != ByteSlash);
112-
}
113-
114-
continue;
103+
writtenLength = lastSlashIndex + 1;
115104
}
116-
else if (ch2 == ByteDot && ch3 == ByteSlash)
117-
{
118-
// B. if the input buffer begins with a prefix of "/./" or "/.",
119-
// where "." is a complete path segment, then replace that
120-
// prefix with "/" in the input buffer; otherwise,
121-
src += 2;
122-
continue;
123-
}
124-
125-
break;
105+
return writtenLength;
106+
}
107+
else
108+
{
109+
// Not a dot segment e.g. /.a, copy the /. and bump the read pointer.
110+
slashDot.CopyTo(src[writtenLength..]);
111+
writtenLength += 2;
112+
readPointer = nextIndex;
113+
}
126114
}
127-
128-
// E. move the first path segment in the input buffer to the end of
129-
// the output buffer, including the initial "/" character (if
130-
// any) and any subsequent characters up to, but not including,
131-
// the next "/" character or the end of the input buffer.
132-
do
115+
// Ending with /.
116+
else if (remainingLength == 2)
133117
{
134-
*dst++ = ch1;
135-
ch1 = *++src;
136-
} while (src < end && ch1 != ByteSlash);
137-
}
138-
139-
if (dst == start)
140-
{
141-
*dst++ = ByteSlash;
118+
src[writtenLength++] = ByteSlash;
119+
return writtenLength;
120+
}
142121
}
143-
144-
return (int)(dst - start);
122+
return writtenLength;
145123
}
146124

147-
public static unsafe bool ContainsDotSegments(byte* start, byte* end)
125+
public static bool ContainsDotSegments(Span<byte> src)
148126
{
149-
var src = start;
150-
var dst = start;
151-
152-
while (src < end)
127+
Debug.Assert(src[0] == '/', "Path segment must always start with a '/'");
128+
ReadOnlySpan<byte> slashDot = "/."u8;
129+
ReadOnlySpan<byte> dotSlash = "./"u8;
130+
while (src.Length > 0)
153131
{
154-
var ch1 = *src;
155-
Debug.Assert(ch1 == '/', "Path segment must always start with a '/'");
156-
157-
byte ch2, ch3, ch4;
158-
159-
switch (end - src)
132+
var nextSlashDotIndex = src.IndexOf(slashDot);
133+
if (nextSlashDotIndex < 0)
160134
{
161-
case 1:
162-
break;
163-
case 2:
164-
ch2 = *(src + 1);
165-
166-
if (ch2 == ByteDot)
167-
{
168-
return true;
169-
}
170-
171-
break;
172-
case 3:
173-
ch2 = *(src + 1);
174-
ch3 = *(src + 2);
175-
176-
if ((ch2 == ByteDot && ch3 == ByteDot) ||
177-
(ch2 == ByteDot && ch3 == ByteSlash))
135+
return false;
136+
}
137+
else
138+
{
139+
src = src[(nextSlashDotIndex + 2)..];
140+
}
141+
switch (src.Length)
142+
{
143+
case 0: // Case of /.
144+
return true;
145+
case 1: // Case of /.. or /./
146+
if (src[0] == ByteDot || src[0] == ByteSlash)
178147
{
179148
return true;
180149
}
181-
182150
break;
183-
default:
184-
ch2 = *(src + 1);
185-
ch3 = *(src + 2);
186-
ch4 = *(src + 3);
187-
188-
if ((ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash) ||
189-
(ch2 == ByteDot && ch3 == ByteSlash))
151+
default: // Case of /../ or /./
152+
if (dotSlash.SequenceEqual(src[..2]) || src[0] == ByteSlash)
190153
{
191154
return true;
192155
}
193-
194156
break;
195157
}
196-
197-
do
198-
{
199-
ch1 = *++src;
200-
} while (src < end && ch1 != ByteSlash);
201158
}
202-
203159
return false;
204160
}
205161
}

0 commit comments

Comments
 (0)