Skip to content

Commit a3c1e58

Browse files
authored
fix: guard against overflow in sse backoff calculation (#455)
In some platform configurations, the SSE backoff calculation overflows which results in failing to schedule another backoff attempt via `asio::deadline_timer::expires_from_now`. This PR refactors the backoff algorithm to use free functions which can be independently tested. It fixes the bug by constraining the maximum backoff exponent based on the configurable max backoff parameter. Finally, I've added some test vectors for large attempt values. In the bug report, the overflow happens at about 34 minutes which corresponded to 55 attempts.
1 parent 1ca48d5 commit a3c1e58

File tree

6 files changed

+212
-41
lines changed

6 files changed

+212
-41
lines changed

libs/server-sent-events/src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ add_library(${LIBNAME} OBJECT
1111
parser.cpp
1212
event.cpp
1313
error.cpp
14+
backoff_detail.cpp
1415
backoff.cpp)
1516

1617
target_link_libraries(${LIBNAME}

libs/server-sent-events/src/backoff.cpp

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,14 @@
11
#include "backoff.hpp"
2+
#include "backoff_detail.hpp"
23

3-
static double const kDefaultJitterRatio = 0.5;
4-
static const std::chrono::milliseconds kDefaultResetInterval =
5-
std::chrono::milliseconds{60'000};
4+
static constexpr double kDefaultJitterRatio = 0.5;
5+
static constexpr auto kDefaultResetInterval = std::chrono::milliseconds{60'000};
66

77
namespace launchdarkly::sse {
88

9-
std::chrono::milliseconds Backoff::calculate_backoff() {
10-
return initial_ * static_cast<uint64_t>(std::pow(2, attempt_ - 1));
11-
}
12-
13-
std::chrono::milliseconds Backoff::jitter(std::chrono::milliseconds base) {
14-
return std::chrono::milliseconds(static_cast<uint64_t>(
15-
base.count() - (random_(jitter_ratio_) * base.count())));
16-
}
17-
18-
std::chrono::milliseconds Backoff::delay() {
19-
return jitter(std::min(calculate_backoff(), max_));
9+
std::chrono::milliseconds Backoff::delay() const {
10+
return detail::delay(initial_, max_, attempt_, max_exponent_, jitter_ratio_,
11+
random_);
2012
}
2113

2214
void Backoff::fail() {
@@ -50,12 +42,20 @@ Backoff::Backoff(std::chrono::milliseconds initial,
5042
double jitter_ratio,
5143
std::chrono::milliseconds reset_interval,
5244
std::function<double(double ratio)> random)
53-
: attempt_(1),
54-
initial_(initial),
45+
: initial_(initial),
5546
max_(max),
47+
// This is necessary to constrain the exponent that is passed eventually
48+
// into std::pow. Otherwise, we'll be operating with a number that is too
49+
// large to be represented as a chrono::milliseconds and (depending on the
50+
// platform) may result in a value of 0, negative, or some other
51+
// unexpected value.
52+
max_exponent_(std::ceil(
53+
std::log2(max.count() / std::max(std::chrono::milliseconds{1}.count(),
54+
initial.count())))),
55+
attempt_(1),
5656
jitter_ratio_(jitter_ratio),
5757
reset_interval_(reset_interval),
58-
random_(random) {}
58+
random_(std::move(random)) {}
5959

6060
Backoff::Backoff(std::chrono::milliseconds initial,
6161
std::chrono::milliseconds max)
@@ -68,5 +68,4 @@ Backoff::Backoff(std::chrono::milliseconds initial,
6868
0.0, kDefaultJitterRatio);
6969
return distribution(this->random_gen_);
7070
}) {}
71-
7271
} // namespace launchdarkly::sse

libs/server-sent-events/src/backoff.hpp

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#include <random>
77

88
namespace launchdarkly::sse {
9-
109
/**
1110
* Implements an algorithm for calculating the delay between connection
1211
* attempts.
@@ -52,35 +51,18 @@ class Backoff {
5251
/**
5352
* Get the current reconnect delay.
5453
*/
55-
std::chrono::milliseconds delay();
54+
[[nodiscard]] std::chrono::milliseconds delay() const;
5655

5756
private:
58-
/**
59-
* Calculate an exponential backoff based on the initial retry time and
60-
* the current attempt.
61-
*
62-
* @param attempt The current attempt count.
63-
* @param initial The initial retry delay.
64-
* @return The current backoff based on the number of attempts.
65-
*/
66-
std::chrono::milliseconds calculate_backoff();
67-
68-
/**
69-
* Produce a jittered version of the base value. This value will be adjusted
70-
* randomly to be between 50% and 100% of the input duration.
71-
*
72-
* @param base The base duration to jitter.
73-
* @return The jittered duration.
74-
*/
75-
std::chrono::milliseconds jitter(std::chrono::milliseconds base);
76-
7757
std::chrono::milliseconds initial_;
7858
std::chrono::milliseconds max_;
79-
uint64_t attempt_;
59+
60+
std::uint64_t max_exponent_;
61+
std::uint64_t attempt_;
8062
std::optional<std::chrono::time_point<std::chrono::system_clock>>
8163
active_since_;
8264
double const jitter_ratio_;
83-
const std::chrono::milliseconds reset_interval_;
65+
std::chrono::milliseconds const reset_interval_;
8466
// Default random generator. Used when random method not specified.
8567
std::function<double(double ratio)> random_;
8668
std::default_random_engine random_gen_;
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#include "backoff_detail.hpp"
2+
3+
#include <algorithm>
4+
#include <cmath>
5+
6+
namespace launchdarkly::sse::detail {
7+
std::chrono::milliseconds calculate_backoff(
8+
std::chrono::milliseconds const initial,
9+
std::uint64_t const attempt,
10+
std::uint64_t const max_exponent) {
11+
std::uint64_t const exponent = std::min(attempt - 1, max_exponent);
12+
double const exponentiated = std::pow(2, exponent);
13+
return initial * static_cast<uint64_t>(exponentiated);
14+
}
15+
16+
std::chrono::milliseconds jitter(std::chrono::milliseconds const base,
17+
double const jitter_ratio,
18+
std::function<double(double)> const& random) {
19+
double const jitter = random(jitter_ratio);
20+
auto const base_as_double = static_cast<double>(base.count());
21+
double const jittered_base = base_as_double - jitter * base_as_double;
22+
return std::chrono::milliseconds(static_cast<uint64_t>(jittered_base));
23+
}
24+
25+
std::chrono::milliseconds delay(std::chrono::milliseconds const initial,
26+
std::chrono::milliseconds const max,
27+
std::uint64_t const attempt,
28+
std::uint64_t const max_exponent,
29+
double const jitter_ratio,
30+
std::function<double(double)> const& random) {
31+
auto const backoff = calculate_backoff(initial, attempt, max_exponent);
32+
auto const constrained_backoff = std::min(backoff, max);
33+
return jitter(constrained_backoff, jitter_ratio, random);
34+
}
35+
} // namespace launchdarkly::sse::detail
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#pragma once
2+
3+
#include <chrono>
4+
#include <functional>
5+
6+
namespace launchdarkly::sse::detail {
7+
/**
8+
* Calculate an exponential backoff based on the initial retry time and
9+
* the current attempt.
10+
*
11+
* @param initial The initial retry delay.
12+
* @param attempt The current attempt count.
13+
* @param max_exponent The maximum exponent to use in the calculation.
14+
*
15+
* @return The current backoff based on the number of attempts.
16+
*/
17+
18+
std::chrono::milliseconds calculate_backoff(std::chrono::milliseconds initial,
19+
std::uint64_t attempt,
20+
std::uint64_t max_exponent);
21+
/**
22+
* Produce a jittered version of the base value. This value will be adjusted
23+
* randomly to be between 50% and 100% of the input duration.
24+
*
25+
* @param base The base duration to jitter.
26+
* @param jitter_ratio The ratio to use when jittering.
27+
* @param random A random method which produces a value between 0 and
28+
* jitter_ratio.
29+
* @return The jittered duration.
30+
*/
31+
std::chrono::milliseconds jitter(std::chrono::milliseconds base,
32+
double jitter_ratio,
33+
std::function<double(double)> const& random);
34+
35+
/**
36+
*
37+
* @param initial The initial retry delay.
38+
* @param max The maximum delay between retries.
39+
* @param attempt The current attempt.
40+
* @param max_exponent The maximum exponent to use in the calculation.
41+
* @param jitter_ratio The ratio to use when jittering.
42+
* @param random A random method which produces a value between 0 and
43+
* jitter_ratio.
44+
* @return The delay between connection attempts.
45+
*/
46+
std::chrono::milliseconds delay(std::chrono::milliseconds initial,
47+
std::chrono::milliseconds max,
48+
std::uint64_t attempt,
49+
std::uint64_t max_exponent,
50+
double jitter_ratio,
51+
std::function<double(double)> const& random);
52+
} // namespace launchdarkly::sse::detail

libs/server-sent-events/tests/backoff_test.cpp

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
#include <gtest/gtest.h>
22

3+
#include <numeric>
34
#include <thread>
45

56
#include "backoff.hpp"
7+
#include "backoff_detail.hpp"
68

79
using launchdarkly::sse::Backoff;
810

@@ -94,3 +96,103 @@ TEST(BackoffTests, BackoffDoesNotResetActiveConnectionWasTooShort) {
9496

9597
EXPECT_EQ(8000, backoff.delay().count());
9698
}
99+
100+
// This test specifically attempts to reproduce an overflow bug reported by a
101+
// customer that occurred using default backoff parameters after 55 attempts.
102+
TEST(BackoffTests, BackoffDoesNotOverflowAt55Attempts) {
103+
Backoff backoff(
104+
std::chrono::milliseconds{1000}, std::chrono::milliseconds{30000}, 0,
105+
std::chrono::milliseconds{6000}, [](auto ratio) { return 0; });
106+
107+
constexpr std::size_t kAttemptsToReachMax = 5;
108+
constexpr std::size_t kAttemptWhereOverflowOccurs = 54;
109+
110+
for (int i = 0; i < kAttemptsToReachMax; i++) {
111+
backoff.fail();
112+
}
113+
// At 5 attempts, we've already reached maximum backoff, given the initial
114+
// parameters.
115+
EXPECT_EQ(std::chrono::milliseconds{30000}, backoff.delay());
116+
117+
// This should succeed even if the code was unprotected.
118+
for (int i = 0; i < kAttemptWhereOverflowOccurs - kAttemptsToReachMax - 1;
119+
i++) {
120+
backoff.fail();
121+
EXPECT_EQ(std::chrono::milliseconds{30000}, backoff.delay());
122+
}
123+
124+
// This should trigger overflow in unprotected code.
125+
backoff.fail();
126+
EXPECT_EQ(std::chrono::milliseconds{30000}, backoff.delay());
127+
}
128+
129+
class BackoffOverflowEndToEndTest
130+
: public ::testing::TestWithParam<std::uint64_t> {};
131+
132+
// The purpose of this test is to simulate the SDK calling backoff.fail()
133+
// over and over again.
134+
//
135+
// Given a large amount of failed attempts, the calculation should remain at
136+
// the max backoff value and stay there.
137+
//
138+
// Since we can't simulate very large numbers of attempts (simply takes too
139+
// long), the free functions that Backoff delegates to are tested separately.
140+
TEST_P(BackoffOverflowEndToEndTest, BackoffDoesNotOverflowWithVariousAttempts) {
141+
Backoff backoff(
142+
std::chrono::milliseconds{1000}, std::chrono::milliseconds{30000}, 0,
143+
std::chrono::milliseconds{60000}, [](auto ratio) { return 0; });
144+
145+
std::uint64_t const attempts = GetParam();
146+
147+
for (int i = 0; i < attempts; i++) {
148+
backoff.fail();
149+
}
150+
151+
auto const val = backoff.delay();
152+
EXPECT_EQ(val, std::chrono::milliseconds{30000});
153+
}
154+
155+
INSTANTIATE_TEST_SUITE_P(
156+
VariousBackoffAttempts,
157+
BackoffOverflowEndToEndTest,
158+
::testing::Values<std::uint64_t>(10, 54, 55, 63, 64, 65, 500, 1000));
159+
160+
class BackoffOverflowUnitTest : public ::testing::TestWithParam<std::uint64_t> {
161+
};
162+
163+
// This test performs a straight calculation of the backoff given
164+
// a range of large values, including the maximum uint64_t which might be
165+
// reached at the heat death of the universe (or earlier, if there's a bug.)
166+
// It should not overflow, but remain at the max value.
167+
TEST_P(BackoffOverflowUnitTest,
168+
BackoffDoesNotOverflowWithVariousAttemptsInCalculateBackoff) {
169+
std::uint64_t const attempts = GetParam();
170+
171+
constexpr std::uint64_t max_exponent = 5;
172+
173+
// Given an exponent of 5, and initial delay of 1ms, the max should be
174+
// 2^5 = 32 for any amount of attempts > 5.
175+
176+
constexpr auto initial = std::chrono::milliseconds{1};
177+
constexpr auto max = std::chrono::milliseconds{32};
178+
179+
auto const millisecondsA = launchdarkly::sse::detail::calculate_backoff(
180+
initial, attempts, max_exponent);
181+
182+
ASSERT_EQ(millisecondsA, max);
183+
184+
// Given a random function that always returns 0, this should be identical
185+
// to calling calculate_backoff.
186+
auto const millisecondsB = launchdarkly::sse::detail::delay(
187+
initial, max, attempts, max_exponent, 0, [](auto ratio) { return 0; });
188+
189+
ASSERT_EQ(millisecondsB, max);
190+
}
191+
192+
INSTANTIATE_TEST_SUITE_P(VariousBackoffAttempts,
193+
BackoffOverflowUnitTest,
194+
::testing::Values<std::uint64_t>(
195+
1000,
196+
10000,
197+
100000,
198+
std::numeric_limits<std::uint64_t>::max()));

0 commit comments

Comments
 (0)