Skip to content

Commit c9e6189

Browse files
committed
ntp: Make the RTC synchronization more reliable
Miroslav reported that the periodic RTC synchronization in the NTP code fails more often than not to hit the specified update window. The reason is that the code uses delayed_work to schedule the update which needs to be in thread context as the underlying RTC might be connected via a slow bus, e.g. I2C. In the update function it verifies whether the current time is correct vs. the requirements of the underlying RTC. But delayed_work is using the timer wheel for scheduling which is inaccurate by design. Depending on the distance to the expiry the wheel gets less granular to allow batching and to avoid the cascading of the original timer wheel. See 500462a ("timers: Switch to a non-cascading wheel") and the code for further details. The code already deals with this by splitting the 660 seconds period into a long 659 seconds timer and then retrying with a smaller delta. But looking at the actual granularities of the timer wheel (which depend on the HZ configuration) the 659 seconds timer ends up in an outer wheel level and is affected by a worst case granularity of: HZ Granularity 1000 32s 250 16s 100 40s So the initial timer can be already off by max 12.5% which is not a big issue as the period of the sync is defined as ~11 minutes. The fine grained second attempt schedules to the desired update point with a timer expiring less than a second from now. Depending on the actual delta and the HZ setting even the second attempt can end up in outer wheel levels which have a large enough granularity to make the correctness check fail. As this is a fundamental property of the timer wheel there is no way to make this more accurate short of iterating in one jiffies steps towards the update point. Switch it to an hrtimer instead which schedules the actual update work. The hrtimer will expire precisely (max 1 jiffie delay when high resolution timers are not available). The actual scheduling delay of the work is the same as before. The update is triggered from do_adjtimex() which is a bit racy but not much more racy than it was before: if (ntp_synced()) queue_delayed_work(system_power_efficient_wq, &sync_work, 0); which is racy when the work is currently executed and has not managed to reschedule itself. This becomes now: if (ntp_synced() && !hrtimer_is_queued(&sync_hrtimer)) queue_work(system_power_efficient_wq, &sync_work, 0); which is racy when the hrtimer has expired and the work is currently executed and has not yet managed to rearm the hrtimer. Not a big problem as it just schedules work for nothing. The new implementation has a safe guard in place to catch the case where the hrtimer is queued on entry to the work function and avoids an extra update attempt of the RTC that way. Reported-by: Miroslav Lichvar <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Tested-by: Miroslav Lichvar <[email protected]> Reviewed-by: Jason Gunthorpe <[email protected]> Acked-by: Alexandre Belloni <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 354c796 commit c9e6189

File tree

3 files changed

+55
-43
lines changed

3 files changed

+55
-43
lines changed

include/linux/timex.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,6 @@ extern int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex *
157157
extern void hardpps(const struct timespec64 *, const struct timespec64 *);
158158

159159
int read_current_timer(unsigned long *timer_val);
160-
void ntp_notify_cmos_timer(void);
161160

162161
/* The clock frequency of the i8253/i8254 PIT */
163162
#define PIT_TICK_RATE 1193182ul

kernel/time/ntp.c

Lines changed: 48 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -494,65 +494,55 @@ int second_overflow(time64_t secs)
494494
return leap;
495495
}
496496

497+
#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
497498
static void sync_hw_clock(struct work_struct *work);
498-
static DECLARE_DELAYED_WORK(sync_work, sync_hw_clock);
499-
500-
static void sched_sync_hw_clock(struct timespec64 now,
501-
unsigned long target_nsec, bool fail)
499+
static DECLARE_WORK(sync_work, sync_hw_clock);
500+
static struct hrtimer sync_hrtimer;
501+
#define SYNC_PERIOD_NS (11UL * 60 * NSEC_PER_SEC)
502502

503+
static enum hrtimer_restart sync_timer_callback(struct hrtimer *timer)
503504
{
504-
struct timespec64 next;
505+
queue_work(system_power_efficient_wq, &sync_work);
505506

506-
ktime_get_real_ts64(&next);
507-
if (!fail)
508-
next.tv_sec = 659;
509-
else {
510-
/*
511-
* Try again as soon as possible. Delaying long periods
512-
* decreases the accuracy of the work queue timer. Due to this
513-
* the algorithm is very likely to require a short-sleep retry
514-
* after the above long sleep to synchronize ts_nsec.
515-
*/
516-
next.tv_sec = 0;
517-
}
507+
return HRTIMER_NORESTART;
508+
}
518509

519-
/* Compute the needed delay that will get to tv_nsec == target_nsec */
520-
next.tv_nsec = target_nsec - next.tv_nsec;
521-
if (next.tv_nsec <= 0)
522-
next.tv_nsec += NSEC_PER_SEC;
523-
if (next.tv_nsec >= NSEC_PER_SEC) {
524-
next.tv_sec++;
525-
next.tv_nsec -= NSEC_PER_SEC;
526-
}
510+
static void sched_sync_hw_clock(unsigned long offset_nsec, bool retry)
511+
{
512+
ktime_t exp = ktime_set(ktime_get_real_seconds(), 0);
513+
514+
if (retry)
515+
exp = ktime_add_ns(exp, 2 * NSEC_PER_SEC - offset_nsec);
516+
else
517+
exp = ktime_add_ns(exp, SYNC_PERIOD_NS - offset_nsec);
527518

528-
queue_delayed_work(system_power_efficient_wq, &sync_work,
529-
timespec64_to_jiffies(&next));
519+
hrtimer_start(&sync_hrtimer, exp, HRTIMER_MODE_ABS);
530520
}
531521

532522
static void sync_rtc_clock(void)
533523
{
534-
unsigned long target_nsec;
535-
struct timespec64 adjust, now;
524+
unsigned long offset_nsec;
525+
struct timespec64 adjust;
536526
int rc;
537527

538528
if (!IS_ENABLED(CONFIG_RTC_SYSTOHC))
539529
return;
540530

541-
ktime_get_real_ts64(&now);
531+
ktime_get_real_ts64(&adjust);
542532

543-
adjust = now;
544533
if (persistent_clock_is_local)
545534
adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
546535

547536
/*
548-
* The current RTC in use will provide the target_nsec it wants to be
549-
* called at, and does rtc_tv_nsec_ok internally.
537+
* The current RTC in use will provide the nanoseconds offset prior
538+
* to a full second it wants to be called at, and invokes
539+
* rtc_tv_nsec_ok() internally.
550540
*/
551-
rc = rtc_set_ntp_time(adjust, &target_nsec);
541+
rc = rtc_set_ntp_time(adjust, &offset_nsec);
552542
if (rc == -ENODEV)
553543
return;
554544

555-
sched_sync_hw_clock(now, target_nsec, rc);
545+
sched_sync_hw_clock(offset_nsec, rc != 0);
556546
}
557547

558548
#ifdef CONFIG_GENERIC_CMOS_UPDATE
@@ -599,7 +589,7 @@ static bool sync_cmos_clock(void)
599589
}
600590
}
601591

602-
sched_sync_hw_clock(now, target_nsec, rc);
592+
sched_sync_hw_clock(target_nsec, rc != 0);
603593
return true;
604594
}
605595

@@ -613,7 +603,12 @@ static bool sync_cmos_clock(void)
613603
*/
614604
static void sync_hw_clock(struct work_struct *work)
615605
{
616-
if (!ntp_synced())
606+
/*
607+
* Don't update if STA_UNSYNC is set and if ntp_notify_cmos_timer()
608+
* managed to schedule the work between the timer firing and the
609+
* work being able to rearm the timer. Wait for the timer to expire.
610+
*/
611+
if (!ntp_synced() || hrtimer_is_queued(&sync_hrtimer))
617612
return;
618613

619614
if (sync_cmos_clock())
@@ -624,13 +619,23 @@ static void sync_hw_clock(struct work_struct *work)
624619

625620
void ntp_notify_cmos_timer(void)
626621
{
627-
if (!ntp_synced())
628-
return;
622+
/*
623+
* When the work is currently executed but has not yet the timer
624+
* rearmed this queues the work immediately again. No big issue,
625+
* just a pointless work scheduled.
626+
*/
627+
if (ntp_synced() && !hrtimer_is_queued(&sync_hrtimer))
628+
queue_work(system_power_efficient_wq, &sync_work);
629+
}
629630

630-
if (IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE) ||
631-
IS_ENABLED(CONFIG_RTC_SYSTOHC))
632-
queue_delayed_work(system_power_efficient_wq, &sync_work, 0);
631+
static void __init ntp_init_cmos_sync(void)
632+
{
633+
hrtimer_init(&sync_hrtimer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
634+
sync_hrtimer.function = sync_timer_callback;
633635
}
636+
#else /* CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) */
637+
static inline void __init ntp_init_cmos_sync(void) { }
638+
#endif /* !CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) */
634639

635640
/*
636641
* Propagate a new txc->status value into the NTP state:
@@ -1044,4 +1049,5 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
10441049
void __init ntp_init(void)
10451050
{
10461051
ntp_clear();
1052+
ntp_init_cmos_sync();
10471053
}

kernel/time/ntp_internal.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,11 @@ extern int __do_adjtimex(struct __kernel_timex *txc,
1212
const struct timespec64 *ts,
1313
s32 *time_tai, struct audit_ntp_data *ad);
1414
extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts);
15+
16+
#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
17+
extern void ntp_notify_cmos_timer(void);
18+
#else
19+
static inline void ntp_notify_cmos_timer(void) { }
20+
#endif
21+
1522
#endif /* _LINUX_NTP_INTERNAL_H */

0 commit comments

Comments
 (0)