Skip to content

Commit ece10b1

Browse files
authored
backport 8.0 - http.sys accept loop - mitigate against break due to possible conflicting IO callbacks (#54437)
* investigate #54251 (more details will be in PR) 1. handle MRTVS cascading fault breaking the accept loop 2. log any expectation failures # Conflicts: # src/Servers/HttpSys/src/AsyncAcceptContext.cs * make log text consistent * upgrade log to "critical" on the actual state error * - capture whether it is the managed vs unmanaged code-path that is glitching - lock new critical log messages behind app-context switch
1 parent 6be61f7 commit ece10b1

File tree

5 files changed

+142
-40
lines changed

5 files changed

+142
-40
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using Microsoft.Extensions.Logging;
5+
6+
namespace Microsoft.AspNetCore.Server.HttpSys;
7+
8+
internal partial class AsyncAcceptContext
9+
{
10+
private static partial class Log
11+
{
12+
[LoggerMessage(LoggerEventIds.AcceptSetResultFailed, LogLevel.Error, "Error attempting to set 'accept' outcome", EventName = "AcceptSetResultFailed")]
13+
public static partial void AcceptSetResultFailed(ILogger logger, Exception exception);
14+
15+
// note on "critical": these represent an unexpected IO callback state that needs investigation; see https://github.com/dotnet/aspnetcore/pull/54368/
16+
17+
[LoggerMessage(LoggerEventIds.AcceptSetExpectationMismatch, LogLevel.Critical, "Mismatch setting callback expectation - {Value}", EventName = "AcceptSetExpectationMismatch")]
18+
public static partial void AcceptSetExpectationMismatch(ILogger logger, int value);
19+
20+
[LoggerMessage(LoggerEventIds.AcceptCancelExpectationMismatch, LogLevel.Critical, "Mismatch canceling accept state - {Value}", EventName = "AcceptCancelExpectationMismatch")]
21+
public static partial void AcceptCancelExpectationMismatch(ILogger logger, int value);
22+
23+
[LoggerMessage(LoggerEventIds.AcceptObserveExpectationMismatch, LogLevel.Critical, "Mismatch observing {Kind} accept callback - {Value}", EventName = "AcceptObserveExpectationMismatch")]
24+
public static partial void AcceptObserveExpectationMismatch(ILogger logger, string kind, int value);
25+
}
26+
}

src/Servers/HttpSys/src/AsyncAcceptContext.cs

Lines changed: 110 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,23 @@
44
using System.Diagnostics;
55
using System.Threading.Tasks.Sources;
66
using Microsoft.AspNetCore.HttpSys.Internal;
7+
using Microsoft.Extensions.Logging;
78

89
namespace Microsoft.AspNetCore.Server.HttpSys;
910

10-
internal sealed unsafe class AsyncAcceptContext : IValueTaskSource<RequestContext>, IDisposable
11+
internal sealed unsafe partial class AsyncAcceptContext : IValueTaskSource<RequestContext>, IDisposable
1112
{
1213
private static readonly IOCompletionCallback IOCallback = IOWaitCallback;
1314
private readonly PreAllocatedOverlapped _preallocatedOverlapped;
1415
private readonly IRequestContextFactory _requestContextFactory;
16+
private readonly ILogger _logger;
17+
private int _expectedCompletionCount;
1518

1619
private NativeOverlapped* _overlapped;
1720

21+
private readonly bool _logExpectationFailures = AppContext.TryGetSwitch(
22+
"Microsoft.AspNetCore.Server.HttpSys.LogAcceptExpectationFailure", out var enabled) && enabled;
23+
1824
// mutable struct; do not make this readonly
1925
private ManualResetValueTaskSourceCore<RequestContext> _mrvts = new()
2026
{
@@ -24,11 +30,12 @@ internal sealed unsafe class AsyncAcceptContext : IValueTaskSource<RequestContex
2430

2531
private RequestContext? _requestContext;
2632

27-
internal AsyncAcceptContext(HttpSysListener server, IRequestContextFactory requestContextFactory)
33+
internal AsyncAcceptContext(HttpSysListener server, IRequestContextFactory requestContextFactory, ILogger logger)
2834
{
2935
Server = server;
3036
_requestContextFactory = requestContextFactory;
3137
_preallocatedOverlapped = new(IOCallback, state: this, pinData: null);
38+
_logger = logger;
3239
}
3340

3441
internal HttpSysListener Server { get; }
@@ -51,15 +58,16 @@ internal ValueTask<RequestContext> AcceptAsync()
5158
return new ValueTask<RequestContext>(this, _mrvts.Version);
5259
}
5360

54-
private void IOCompleted(uint errorCode, uint numBytes)
61+
private void IOCompleted(uint errorCode, uint numBytes, bool managed)
5562
{
5663
try
5764
{
65+
ObserveCompletion(managed); // expectation tracking
5866
if (errorCode != UnsafeNclNativeMethods.ErrorCodes.ERROR_SUCCESS &&
5967
errorCode != UnsafeNclNativeMethods.ErrorCodes.ERROR_MORE_DATA)
6068
{
61-
_mrvts.SetException(new HttpSysException((int)errorCode));
62-
return;
69+
// (keep all the error handling in one place)
70+
throw new HttpSysException((int)errorCode);
6371
}
6472

6573
Debug.Assert(_requestContext != null);
@@ -71,7 +79,14 @@ private void IOCompleted(uint errorCode, uint numBytes)
7179
// we want to reuse the acceptContext object for future accepts.
7280
_requestContext = null;
7381

74-
_mrvts.SetResult(requestContext);
82+
try
83+
{
84+
_mrvts.SetResult(requestContext);
85+
}
86+
catch (Exception ex)
87+
{
88+
Log.AcceptSetResultFailed(_logger, ex);
89+
}
7590
}
7691
else
7792
{
@@ -84,22 +99,69 @@ private void IOCompleted(uint errorCode, uint numBytes)
8499
if (statusCode != UnsafeNclNativeMethods.ErrorCodes.ERROR_SUCCESS &&
85100
statusCode != UnsafeNclNativeMethods.ErrorCodes.ERROR_IO_PENDING)
86101
{
87-
// someother bad error, possible(?) return values are:
102+
// some other bad error, possible(?) return values are:
88103
// ERROR_INVALID_HANDLE, ERROR_INSUFFICIENT_BUFFER, ERROR_OPERATION_ABORTED
89-
_mrvts.SetException(new HttpSysException((int)statusCode));
104+
// (keep all the error handling in one place)
105+
throw new HttpSysException((int)statusCode);
90106
}
91107
}
92108
}
93109
catch (Exception exception)
94110
{
95-
_mrvts.SetException(exception);
111+
try
112+
{
113+
_mrvts.SetException(exception);
114+
}
115+
catch (Exception ex)
116+
{
117+
Log.AcceptSetResultFailed(_logger, ex);
118+
}
96119
}
97120
}
98121

99122
private static unsafe void IOWaitCallback(uint errorCode, uint numBytes, NativeOverlapped* nativeOverlapped)
100123
{
101124
var acceptContext = (AsyncAcceptContext)ThreadPoolBoundHandle.GetNativeOverlappedState(nativeOverlapped)!;
102-
acceptContext.IOCompleted(errorCode, numBytes);
125+
acceptContext.IOCompleted(errorCode, numBytes, false);
126+
}
127+
128+
private void SetExpectCompletion() // we anticipate a completion *might* occur
129+
{
130+
// note this is intentionally a "reset and check" rather than Increment, so that we don't spam
131+
// the logs forever if a glitch occurs
132+
var value = Interlocked.Exchange(ref _expectedCompletionCount, 1); // should have been 0
133+
if (value != 0)
134+
{
135+
if (_logExpectationFailures)
136+
{
137+
Log.AcceptSetExpectationMismatch(_logger, value);
138+
}
139+
Debug.Assert(false, nameof(SetExpectCompletion)); // fail hard in debug
140+
}
141+
}
142+
private void CancelExpectCompletion() // due to error-code etc, we no longer anticipate a completion
143+
{
144+
var value = Interlocked.Decrement(ref _expectedCompletionCount); // should have been 1, so now 0
145+
if (value != 0)
146+
{
147+
if (_logExpectationFailures)
148+
{
149+
Log.AcceptCancelExpectationMismatch(_logger, value);
150+
}
151+
Debug.Assert(false, nameof(CancelExpectCompletion)); // fail hard in debug
152+
}
153+
}
154+
private void ObserveCompletion(bool managed) // a completion was invoked
155+
{
156+
var value = Interlocked.Decrement(ref _expectedCompletionCount); // should have been 1, so now 0
157+
if (value != 0)
158+
{
159+
if (_logExpectationFailures)
160+
{
161+
Log.AcceptObserveExpectationMismatch(_logger, managed ? "managed" : "unmanaged", value);
162+
}
163+
Debug.Assert(false, nameof(ObserveCompletion)); // fail hard in debug
164+
}
103165
}
104166

105167
private uint QueueBeginGetContext()
@@ -112,6 +174,7 @@ private uint QueueBeginGetContext()
112174

113175
retry = false;
114176
uint bytesTransferred = 0;
177+
SetExpectCompletion(); // track this *before*, because of timing vs IOCP (could even be effectively synchronous)
115178
statusCode = HttpApi.HttpReceiveHttpRequest(
116179
Server.RequestQueue.Handle,
117180
_requestContext.RequestId,
@@ -123,35 +186,44 @@ private uint QueueBeginGetContext()
123186
&bytesTransferred,
124187
_overlapped);
125188

126-
if ((statusCode == UnsafeNclNativeMethods.ErrorCodes.ERROR_CONNECTION_INVALID
127-
|| statusCode == UnsafeNclNativeMethods.ErrorCodes.ERROR_INVALID_PARAMETER)
128-
&& _requestContext.RequestId != 0)
129-
{
130-
// ERROR_CONNECTION_INVALID:
131-
// The client reset the connection between the time we got the MORE_DATA error and when we called HttpReceiveHttpRequest
132-
// with the new buffer. We can clear the request id and move on to the next request.
133-
//
134-
// ERROR_INVALID_PARAMETER: Historical check from HttpListener.
135-
// https://referencesource.microsoft.com/#System/net/System/Net/_ListenerAsyncResult.cs,137
136-
// we might get this if somebody stole our RequestId,
137-
// set RequestId to 0 and start all over again with the buffer we just allocated
138-
// BUGBUG: how can someone steal our request ID? seems really bad and in need of fix.
139-
_requestContext.RequestId = 0;
140-
retry = true;
141-
}
142-
else if (statusCode == UnsafeNclNativeMethods.ErrorCodes.ERROR_MORE_DATA)
143-
{
144-
// the buffer was not big enough to fit the headers, we need
145-
// to read the RequestId returned, allocate a new buffer of the required size
146-
// (uint)backingBuffer.Length - AlignmentPadding
147-
AllocateNativeRequest(bytesTransferred);
148-
retry = true;
149-
}
150-
else if (statusCode == UnsafeNclNativeMethods.ErrorCodes.ERROR_SUCCESS
151-
&& HttpSysListener.SkipIOCPCallbackOnSuccess)
189+
switch (statusCode)
152190
{
153-
// IO operation completed synchronously - callback won't be called to signal completion.
154-
IOCompleted(statusCode, bytesTransferred);
191+
case (UnsafeNclNativeMethods.ErrorCodes.ERROR_CONNECTION_INVALID or UnsafeNclNativeMethods.ErrorCodes.ERROR_INVALID_PARAMETER) when _requestContext.RequestId != 0:
192+
// ERROR_CONNECTION_INVALID:
193+
// The client reset the connection between the time we got the MORE_DATA error and when we called HttpReceiveHttpRequest
194+
// with the new buffer. We can clear the request id and move on to the next request.
195+
//
196+
// ERROR_INVALID_PARAMETER: Historical check from HttpListener.
197+
// https://referencesource.microsoft.com/#System/net/System/Net/_ListenerAsyncResult.cs,137
198+
// we might get this if somebody stole our RequestId,
199+
// set RequestId to 0 and start all over again with the buffer we just allocated
200+
// BUGBUG: how can someone steal our request ID? seems really bad and in need of fix.
201+
CancelExpectCompletion();
202+
_requestContext.RequestId = 0;
203+
retry = true;
204+
break;
205+
case UnsafeNclNativeMethods.ErrorCodes.ERROR_MORE_DATA:
206+
// the buffer was not big enough to fit the headers, we need
207+
// to read the RequestId returned, allocate a new buffer of the required size
208+
// (uint)backingBuffer.Length - AlignmentPadding
209+
CancelExpectCompletion(); // we'll "expect" again when we retry
210+
AllocateNativeRequest(bytesTransferred);
211+
retry = true;
212+
break;
213+
case UnsafeNclNativeMethods.ErrorCodes.ERROR_SUCCESS:
214+
if (HttpSysListener.SkipIOCPCallbackOnSuccess)
215+
{
216+
// IO operation completed synchronously - callback won't be called to signal completion.
217+
IOCompleted(statusCode, bytesTransferred, true); // marks completion
218+
}
219+
// else: callback fired by IOCP (at some point), which marks completion
220+
break;
221+
case UnsafeNclNativeMethods.ErrorCodes.ERROR_IO_PENDING:
222+
break; // no change to state - callback will occur at some point
223+
default:
224+
// fault code, not expecting an IOCP callback
225+
CancelExpectCompletion();
226+
break;
155227
}
156228
}
157229
while (retry);

src/Servers/HttpSys/src/LoggerEventIds.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,8 @@ internal static class LoggerEventIds
5454
public const int RequestValidationFailed = 47;
5555
public const int CreateDisconnectTokenError = 48;
5656
public const int RequestAborted = 49;
57+
public const int AcceptSetResultFailed = 50;
58+
public const int AcceptSetExpectationMismatch = 51;
59+
public const int AcceptCancelExpectationMismatch = 52;
60+
public const int AcceptObserveExpectationMismatch = 53;
5761
}

src/Servers/HttpSys/src/MessagePump.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ private void ProcessRequestsWorker()
162162
Debug.Assert(RequestContextFactory != null);
163163

164164
// Allocate and accept context per loop and reuse it for all accepts
165-
var acceptContext = new AsyncAcceptContext(Listener, RequestContextFactory);
165+
var acceptContext = new AsyncAcceptContext(Listener, RequestContextFactory, _logger);
166166

167167
var loop = new AcceptLoop(acceptContext, this);
168168

src/Servers/HttpSys/test/FunctionalTests/Listener/Utilities.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ internal static HttpSysListener CreateServerOnExistingQueue(string requestQueueN
107107
internal static async Task<RequestContext> AcceptAsync(this HttpSysListener server, TimeSpan timeout)
108108
{
109109
var factory = new TestRequestContextFactory(server);
110-
using var acceptContext = new AsyncAcceptContext(server, factory);
110+
using var acceptContext = new AsyncAcceptContext(server, factory, server.Logger);
111111

112112
async Task<RequestContext> AcceptAsync()
113113
{

0 commit comments

Comments
 (0)