@@ -114,7 +114,11 @@ type controllerManager struct {
114
114
started bool
115
115
startedLeader bool
116
116
healthzStarted bool
117
- errChan chan error
117
+
118
+ // NB(directxman12): we don't just use an error channel here to avoid the situation where the
119
+ // error channel is too small and we end up blocking some goroutines waiting to report their errors.
120
+ // errSignal lets us track when we should stop because an error occurred
121
+ errSignal * errSignaler
118
122
119
123
// internalStop is the stop channel *actually* used by everything involved
120
124
// with the manager as a stop channel, so that we can pass a stop channel
@@ -150,6 +154,45 @@ type controllerManager struct {
150
154
retryPeriod time.Duration
151
155
}
152
156
157
+ type errSignaler struct {
158
+ // errSignal indicates that an error occurred, when closed. It shouldn't
159
+ // be written to.
160
+ errSignal chan struct {}
161
+
162
+ // err is the received error
163
+ err error
164
+
165
+ mu sync.Mutex
166
+ }
167
+
168
+ func (r * errSignaler ) SignalError (err error ) {
169
+ r .mu .Lock ()
170
+ defer r .mu .Unlock ()
171
+
172
+ if r .err != nil {
173
+ // we already have an error, don't try again
174
+ return
175
+ }
176
+
177
+ // save the error and report it
178
+ r .err = err
179
+ close (r .errSignal )
180
+ }
181
+
182
+ func (r * errSignaler ) Error () error {
183
+ r .mu .Lock ()
184
+ defer r .mu .Unlock ()
185
+
186
+ return r .err
187
+ }
188
+
189
+ func (r * errSignaler ) GotError () chan struct {} {
190
+ r .mu .Lock ()
191
+ defer r .mu .Unlock ()
192
+
193
+ return r .errSignal
194
+ }
195
+
153
196
// Add sets dependencies on i, and adds it to the list of Runnables to start.
154
197
func (cm * controllerManager ) Add (r Runnable ) error {
155
198
cm .mu .Lock ()
@@ -174,7 +217,9 @@ func (cm *controllerManager) Add(r Runnable) error {
174
217
if shouldStart {
175
218
// If already started, start the controller
176
219
go func () {
177
- cm .errChan <- r .Start (cm .internalStop )
220
+ if err := r .Start (cm .internalStop ); err != nil {
221
+ cm .errSignal .SignalError (err )
222
+ }
178
223
}()
179
224
}
180
225
@@ -304,15 +349,15 @@ func (cm *controllerManager) serveMetrics(stop <-chan struct{}) {
304
349
go func () {
305
350
log .Info ("starting metrics server" , "path" , metricsPath )
306
351
if err := server .Serve (cm .metricsListener ); err != nil && err != http .ErrServerClosed {
307
- cm .errChan <- err
352
+ cm .errSignal . SignalError ( err )
308
353
}
309
354
}()
310
355
311
356
// Shutdown the server when stop is closed
312
357
select {
313
358
case <- stop :
314
359
if err := server .Shutdown (context .Background ()); err != nil {
315
- cm .errChan <- err
360
+ cm .errSignal . SignalError ( err )
316
361
}
317
362
}
318
363
}
@@ -334,7 +379,7 @@ func (cm *controllerManager) serveHealthProbes(stop <-chan struct{}) {
334
379
// Run server
335
380
go func () {
336
381
if err := server .Serve (cm .healthProbeListener ); err != nil && err != http .ErrServerClosed {
337
- cm .errChan <- err
382
+ cm .errSignal . SignalError ( err )
338
383
}
339
384
}()
340
385
cm .healthzStarted = true
@@ -344,7 +389,7 @@ func (cm *controllerManager) serveHealthProbes(stop <-chan struct{}) {
344
389
select {
345
390
case <- stop :
346
391
if err := server .Shutdown (context .Background ()); err != nil {
347
- cm .errChan <- err
392
+ cm .errSignal . SignalError ( err )
348
393
}
349
394
}
350
395
}
@@ -353,6 +398,9 @@ func (cm *controllerManager) Start(stop <-chan struct{}) error {
353
398
// join the passed-in stop channel as an upstream feeding into cm.internalStopper
354
399
defer close (cm .internalStopper )
355
400
401
+ // initialize this here so that we reset the signal channel state on every start
402
+ cm .errSignal = & errSignaler {errSignal : make (chan struct {})}
403
+
356
404
// Metrics should be served whether the controller is leader or not.
357
405
// (If we don't serve metrics for non-leaders, prometheus will still scrape
358
406
// the pod but will get a connection refused)
@@ -380,9 +428,9 @@ func (cm *controllerManager) Start(stop <-chan struct{}) error {
380
428
case <- stop :
381
429
// We are done
382
430
return nil
383
- case err := <- cm .errChan :
431
+ case <- cm .errSignal . GotError () :
384
432
// Error starting a controller
385
- return err
433
+ return cm . errSignal . Error ()
386
434
}
387
435
}
388
436
@@ -398,7 +446,9 @@ func (cm *controllerManager) startNonLeaderElectionRunnables() {
398
446
// Write any Start errors to a channel so we can return them
399
447
ctrl := c
400
448
go func () {
401
- cm .errChan <- ctrl .Start (cm .internalStop )
449
+ if err := ctrl .Start (cm .internalStop ); err != nil {
450
+ cm .errSignal .SignalError (err )
451
+ }
402
452
}()
403
453
}
404
454
}
@@ -415,7 +465,9 @@ func (cm *controllerManager) startLeaderElectionRunnables() {
415
465
// Write any Start errors to a channel so we can return them
416
466
ctrl := c
417
467
go func () {
418
- cm .errChan <- ctrl .Start (cm .internalStop )
468
+ if err := ctrl .Start (cm .internalStop ); err != nil {
469
+ cm .errSignal .SignalError (err )
470
+ }
419
471
}()
420
472
}
421
473
@@ -433,7 +485,7 @@ func (cm *controllerManager) waitForCache() {
433
485
}
434
486
go func () {
435
487
if err := cm .startCache (cm .internalStop ); err != nil {
436
- cm .errChan <- err
488
+ cm .errSignal . SignalError ( err )
437
489
}
438
490
}()
439
491
@@ -457,7 +509,7 @@ func (cm *controllerManager) startLeaderElection() (err error) {
457
509
// Most implementations of leader election log.Fatal() here.
458
510
// Since Start is wrapped in log.Fatal when called, we can just return
459
511
// an error here which will cause the program to exit.
460
- cm .errChan <- fmt .Errorf ("leader election lost" )
512
+ cm .errSignal . SignalError ( fmt .Errorf ("leader election lost" ) )
461
513
},
462
514
},
463
515
})
0 commit comments