@@ -25,6 +25,7 @@ import (
25
25
"sync"
26
26
"time"
27
27
28
+ "github.com/davecgh/go-spew/spew"
28
29
"github.com/go-logr/logr"
29
30
"github.com/prometheus/client_golang/prometheus/promhttp"
30
31
"k8s.io/apimachinery/pkg/api/meta"
@@ -105,10 +106,9 @@ type controllerManager struct {
105
106
healthzHandler * healthz.Handler
106
107
107
108
mu sync.Mutex
108
- started bool
109
- startedLeader bool
110
109
healthzStarted bool
111
110
errChan chan error
111
+ runnables * runnables
112
112
113
113
// controllerOptions are the global controller options.
114
114
controllerOptions v1alpha1.ControllerConfigurationSpec
@@ -134,8 +134,6 @@ type controllerManager struct {
134
134
// election was configured.
135
135
elected chan struct {}
136
136
137
- caches []hasCache
138
-
139
137
// port is the port that the webhook server serves at.
140
138
port int
141
139
// host is the hostname that the webhook server binds to.
@@ -160,10 +158,6 @@ type controllerManager struct {
160
158
// between tries of actions.
161
159
retryPeriod time.Duration
162
160
163
- // waitForRunnable is holding the number of runnables currently running so that
164
- // we can wait for them to exit before quitting the manager
165
- waitForRunnable sync.WaitGroup
166
-
167
161
// gracefulShutdownTimeout is the duration given to runnable to stop
168
162
// before the manager actually returns on stop.
169
163
gracefulShutdownTimeout time.Duration
@@ -194,6 +188,7 @@ type hasCache interface {
194
188
func (cm * controllerManager ) Add (r Runnable ) error {
195
189
cm .mu .Lock ()
196
190
defer cm .mu .Unlock ()
191
+
197
192
if cm .stopProcedureEngaged {
198
193
return errors .New ("can't accept new runnable as stop procedure is already engaged" )
199
194
}
@@ -203,31 +198,14 @@ func (cm *controllerManager) Add(r Runnable) error {
203
198
return err
204
199
}
205
200
206
- var shouldStart bool
207
-
208
- // Add the runnable to the leader election or the non-leaderelection list
209
- if leRunnable , ok := r .(LeaderElectionRunnable ); ok && ! leRunnable .NeedLeaderElection () {
210
- shouldStart = cm .started
211
- cm .nonLeaderElectionRunnables = append (cm .nonLeaderElectionRunnables , r )
212
- } else if hasCache , ok := r .(hasCache ); ok {
213
- cm .caches = append (cm .caches , hasCache )
214
- if cm .started {
215
- cm .startRunnable (hasCache )
216
- if ! hasCache .GetCache ().WaitForCacheSync (cm .internalCtx ) {
217
- return fmt .Errorf ("could not sync cache" )
201
+ return cm .runnables .Add (r , func (ctx context.Context ) bool {
202
+ if cache , ok := r .(hasCache ); ok {
203
+ if ! cache .GetCache ().WaitForCacheSync (cm .internalCtx ) {
204
+ return false
218
205
}
219
206
}
220
- } else {
221
- shouldStart = cm .startedLeader
222
- cm .leaderElectionRunnables = append (cm .leaderElectionRunnables , r )
223
- }
224
-
225
- if shouldStart {
226
- // If already started, start the controller
227
- cm .startRunnable (r )
228
- }
229
-
230
- return nil
207
+ return true
208
+ })
231
209
}
232
210
233
211
// Deprecated: use the equivalent Options field to set a field. This method will be removed in v0.10.
@@ -385,13 +363,13 @@ func (cm *controllerManager) serveMetrics() {
385
363
Handler : mux ,
386
364
}
387
365
// Run the server
388
- cm .startRunnable (RunnableFunc (func (_ context.Context ) error {
366
+ cm .runnables . Add (RunnableFunc (func (_ context.Context ) error {
389
367
cm .logger .Info ("starting metrics server" , "path" , defaultMetricsEndpoint )
390
368
if err := server .Serve (cm .metricsListener ); err != nil && err != http .ErrServerClosed {
391
369
return err
392
370
}
393
371
return nil
394
- }))
372
+ }), nil )
395
373
396
374
// Shutdown the server when stop is closed
397
375
<- cm .internalProceduresStop
@@ -422,12 +400,12 @@ func (cm *controllerManager) serveHealthProbes() {
422
400
}
423
401
424
402
// Run server
425
- cm .startRunnable (RunnableFunc (func (_ context.Context ) error {
403
+ cm .runnables . Add (RunnableFunc (func (_ context.Context ) error {
426
404
if err := server .Serve (cm .healthProbeListener ); err != nil && err != http .ErrServerClosed {
427
405
return err
428
406
}
429
407
return nil
430
- }))
408
+ }), nil )
431
409
cm .healthzStarted = true
432
410
}()
433
411
@@ -438,11 +416,30 @@ func (cm *controllerManager) serveHealthProbes() {
438
416
}
439
417
}
440
418
419
+ // Start starts the manager and locks indefinitely.
420
+ // There is only two ways to have start return:
421
+ // An error has occurred during in one of the internal operations,
422
+ // such as leader election, cache start, webhooks, and so on.
423
+ // Or, the context is cancelled.
441
424
func (cm * controllerManager ) Start (ctx context.Context ) (err error ) {
425
+ cm .mu .Lock ()
426
+ {
427
+ // Initialize the internal context.
428
+ cm .internalCtx , cm .internalCancel = context .WithCancel (ctx )
429
+
430
+ // initialize this here so that we reset the signal channel state on every start
431
+ // Everything that might write into this channel must be started in a new goroutine,
432
+ // because otherwise we might block this routine trying to write into the full channel
433
+ // and will not be able to enter the deferred cm.engageStopProcedure() which drains
434
+ // it.
435
+ cm .errChan = make (chan error )
436
+ }
437
+ cm .mu .Unlock ()
438
+
439
+ // Add the cluster runnable.
442
440
if err := cm .Add (cm .cluster ); err != nil {
443
441
return fmt .Errorf ("failed to add cluster to runnables: %w" , err )
444
442
}
445
- cm .internalCtx , cm .internalCancel = context .WithCancel (ctx )
446
443
447
444
// This chan indicates that stop is complete, in other words all runnables have returned or timeout on stop request
448
445
stopComplete := make (chan struct {})
@@ -463,13 +460,6 @@ func (cm *controllerManager) Start(ctx context.Context) (err error) {
463
460
}
464
461
}()
465
462
466
- // initialize this here so that we reset the signal channel state on every start
467
- // Everything that might write into this channel must be started in a new goroutine,
468
- // because otherwise we might block this routine trying to write into the full channel
469
- // and will not be able to enter the deferred cm.engageStopProcedure() which drains
470
- // it.
471
- cm .errChan = make (chan error )
472
-
473
463
// Metrics should be served whether the controller is leader or not.
474
464
// (If we don't serve metrics for non-leaders, prometheus will still scrape
475
465
// the pod but will get a connection refused)
@@ -568,7 +558,10 @@ func (cm *controllerManager) waitForRunnableToEnd(shutdownCancel context.CancelF
568
558
}()
569
559
570
560
go func () {
571
- cm .waitForRunnable .Wait ()
561
+ cm .runnables .others .StopAndWait ()
562
+ cm .runnables .caches .StopAndWait ()
563
+ cm .runnables .leaderElection .StopAndWait ()
564
+ cm .runnables .webhooks .StopAndWait ()
572
565
shutdownCancel ()
573
566
}()
574
567
@@ -580,71 +573,29 @@ func (cm *controllerManager) waitForRunnableToEnd(shutdownCancel context.CancelF
580
573
}
581
574
582
575
func (cm * controllerManager ) startNonLeaderElectionRunnables () {
583
- cm .mu .Lock ()
584
- defer cm .mu .Unlock ()
585
-
586
576
// First start any webhook servers, which includes conversion, validation, and defaulting
587
577
// webhooks that are registered.
588
578
//
589
579
// WARNING: Webhooks MUST start before any cache is populated, otherwise there is a race condition
590
580
// between conversion webhooks and the cache sync (usually initial list) which causes the webhooks
591
581
// to never start because no cache can be populated.
592
- for _ , c := range cm .nonLeaderElectionRunnables {
593
- if _ , ok := c .(* webhook.Server ); ok {
594
- cm .startRunnable (c )
595
- }
596
- }
582
+ cm .runnables .webhooks .Start (cm .internalCtx , cm .errChan )
583
+ cm .runnables .webhooks .WaitReady (cm .internalCtx )
597
584
598
585
// Start and wait for caches.
599
- cm .waitForCache (cm .internalCtx )
586
+ cm .runnables . caches . WaitReady (cm .internalCtx )
600
587
601
588
// Start the non-leaderelection Runnables after the cache has synced
602
- for _ , c := range cm .nonLeaderElectionRunnables {
603
- if _ , ok := c .(* webhook.Server ); ok {
604
- continue
605
- }
606
-
607
- // Controllers block, but we want to return an error if any have an error starting.
608
- // Write any Start errors to a channel so we can return them
609
- cm .startRunnable (c )
610
- }
589
+ cm .runnables .others .Start (cm .internalCtx , cm .errChan )
611
590
}
612
591
613
592
func (cm * controllerManager ) startLeaderElectionRunnables () {
614
- cm .mu .Lock ()
615
- defer cm .mu .Unlock ()
616
-
617
- cm .waitForCache (cm .internalCtx )
618
-
619
- // Start the leader election Runnables after the cache has synced
620
- for _ , c := range cm .leaderElectionRunnables {
621
- // Controllers block, but we want to return an error if any have an error starting.
622
- // Write any Start errors to a channel so we can return them
623
- cm .startRunnable (c )
624
- }
625
-
626
- cm .startedLeader = true
627
- }
593
+ spew .Dump ("STARTING THE CACHES!!!" )
594
+ cm .runnables .caches .Start (cm .internalCtx , cm .errChan )
595
+ cm .runnables .caches .WaitReady (cm .internalCtx )
628
596
629
- func (cm * controllerManager ) waitForCache (ctx context.Context ) {
630
- if cm .started {
631
- return
632
- }
633
-
634
- for _ , cache := range cm .caches {
635
- cm .startRunnable (cache )
636
- }
637
-
638
- // Wait for the caches to sync.
639
- // TODO(community): Check the return value and write a test
640
- for _ , cache := range cm .caches {
641
- cache .GetCache ().WaitForCacheSync (ctx )
642
- }
643
- // TODO: This should be the return value of cm.cache.WaitForCacheSync but we abuse
644
- // cm.started as check if we already started the cache so it must always become true.
645
- // Making sure that the cache doesn't get started twice is needed to not get a "close
646
- // of closed channel" panic
647
- cm .started = true
597
+ cm .runnables .leaderElection .Start (cm .internalCtx , cm .errChan )
598
+ cm .runnables .leaderElection .WaitReady (cm .internalCtx )
648
599
}
649
600
650
601
func (cm * controllerManager ) startLeaderElection () (err error ) {
@@ -694,13 +645,3 @@ func (cm *controllerManager) startLeaderElection() (err error) {
694
645
func (cm * controllerManager ) Elected () <- chan struct {} {
695
646
return cm .elected
696
647
}
697
-
698
- func (cm * controllerManager ) startRunnable (r Runnable ) {
699
- cm .waitForRunnable .Add (1 )
700
- go func () {
701
- defer cm .waitForRunnable .Done ()
702
- if err := r .Start (cm .internalCtx ); err != nil {
703
- cm .errChan <- err
704
- }
705
- }()
706
- }
0 commit comments