@@ -18,15 +18,22 @@ package machine
18
18
19
19
import (
20
20
"context"
21
+ "fmt"
21
22
"os"
23
+ "time"
22
24
25
+ "github.com/go-log/log/info"
23
26
machinev1 "github.com/openshift/cluster-api/pkg/apis/machine/v1beta1"
24
27
controllerError "github.com/openshift/cluster-api/pkg/controller/error"
25
28
"github.com/openshift/cluster-api/pkg/util"
29
+ kubedrain "github.com/openshift/kubernetes-drain"
26
30
corev1 "k8s.io/api/core/v1"
27
31
apierrors "k8s.io/apimachinery/pkg/api/errors"
28
32
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
29
33
"k8s.io/apimachinery/pkg/runtime"
34
+ "k8s.io/client-go/kubernetes"
35
+ "k8s.io/client-go/rest"
36
+ "k8s.io/client-go/tools/record"
30
37
"k8s.io/klog"
31
38
"sigs.k8s.io/controller-runtime/pkg/client"
32
39
"sigs.k8s.io/controller-runtime/pkg/controller"
@@ -38,6 +45,9 @@ import (
38
45
39
46
const (
40
47
NodeNameEnvVar = "NODE_NAME"
48
+
49
+ // ExcludeNodeDrainingAnnotation annotation explicitly skips node draining if set
50
+ ExcludeNodeDrainingAnnotation = "machine.openshift.io/exclude-node-draining"
41
51
)
42
52
43
53
var DefaultActuator Actuator
@@ -49,10 +59,12 @@ func AddWithActuator(mgr manager.Manager, actuator Actuator) error {
49
59
// newReconciler returns a new reconcile.Reconciler
50
60
func newReconciler (mgr manager.Manager , actuator Actuator ) reconcile.Reconciler {
51
61
r := & ReconcileMachine {
52
- Client : mgr .GetClient (),
53
- scheme : mgr .GetScheme (),
54
- nodeName : os .Getenv (NodeNameEnvVar ),
55
- actuator : actuator ,
62
+ Client : mgr .GetClient (),
63
+ eventRecorder : mgr .GetRecorder ("machine-controller" ),
64
+ config : mgr .GetConfig (),
65
+ scheme : mgr .GetScheme (),
66
+ nodeName : os .Getenv (NodeNameEnvVar ),
67
+ actuator : actuator ,
56
68
}
57
69
58
70
if r .nodeName == "" {
@@ -80,8 +92,11 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
80
92
// ReconcileMachine reconciles a Machine object
81
93
type ReconcileMachine struct {
82
94
client.Client
95
+ config * rest.Config
83
96
scheme * runtime.Scheme
84
97
98
+ eventRecorder record.EventRecorder
99
+
85
100
actuator Actuator
86
101
87
102
// nodeName is the name of the node on which the machine controller is running, if not present, it is loaded from NODE_NAME.
@@ -168,6 +183,18 @@ func (r *ReconcileMachine) Reconcile(request reconcile.Request) (reconcile.Resul
168
183
}
169
184
170
185
klog .Infof ("Reconciling machine %q triggers delete" , name )
186
+
187
+ // Drain node before deletion
188
+ // If a machine is not linked to a node, just delete the machine. Since a node
189
+ // can be unlinked from a machine when the node goes NotReady and is removed
190
+ // by cloud controller manager. In that case some machines would never get
191
+ // deleted without a manual intervention.
192
+ if _ , exists := m .ObjectMeta .Annotations [ExcludeNodeDrainingAnnotation ]; ! exists && m .Status .NodeRef != nil {
193
+ if err := r .drainNode (m ); err != nil {
194
+ return reconcile.Result {}, err
195
+ }
196
+ }
197
+
171
198
if err := r .actuator .Delete (ctx , cluster , m ); err != nil {
172
199
if requeueErr , ok := err .(* controllerError.RequeueAfterError ); ok {
173
200
klog .Infof ("Actuator returned requeue-after error: %v" , requeueErr )
@@ -233,6 +260,41 @@ func (r *ReconcileMachine) Reconcile(request reconcile.Request) (reconcile.Resul
233
260
return reconcile.Result {}, nil
234
261
}
235
262
263
+ func (r * ReconcileMachine ) drainNode (machine * machinev1.Machine ) error {
264
+ kubeClient , err := kubernetes .NewForConfig (r .config )
265
+ if err != nil {
266
+ return fmt .Errorf ("unable to build kube client: %v" , err )
267
+ }
268
+ node , err := kubeClient .CoreV1 ().Nodes ().Get (machine .Status .NodeRef .Name , metav1.GetOptions {})
269
+ if err != nil {
270
+ return fmt .Errorf ("unable to get node %q: %v" , machine .Status .NodeRef .Name , err )
271
+ }
272
+
273
+ if err := kubedrain .Drain (
274
+ kubeClient ,
275
+ []* corev1.Node {node },
276
+ & kubedrain.DrainOptions {
277
+ Force : true ,
278
+ IgnoreDaemonsets : true ,
279
+ DeleteLocalData : true ,
280
+ GracePeriodSeconds : - 1 ,
281
+ Logger : info .New (klog .V (0 )),
282
+ // If a pod is not evicted in 20 second, retry the eviction next time the
283
+ // machine gets reconciled again (to allow other machines to be reconciled)
284
+ Timeout : 20 * time .Second ,
285
+ },
286
+ ); err != nil {
287
+ // Machine still tries to terminate after drain failure
288
+ klog .Warningf ("drain failed for machine %q: %v" , machine .Name , err )
289
+ return & controllerError.RequeueAfterError {RequeueAfter : 20 * time .Second }
290
+ }
291
+
292
+ klog .Infof ("drain successful for machine %q" , machine .Name )
293
+ r .eventRecorder .Eventf (machine , corev1 .EventTypeNormal , "Deleted" , "Node %q drained" , node .Name )
294
+
295
+ return nil
296
+ }
297
+
236
298
func (r * ReconcileMachine ) getCluster (ctx context.Context , machine * machinev1.Machine ) (* machinev1.Cluster , error ) {
237
299
if machine .Labels [machinev1 .MachineClusterLabelName ] == "" {
238
300
klog .Infof ("Machine %q in namespace %q doesn't specify %q label, assuming nil cluster" , machine .Name , machinev1 .MachineClusterLabelName , machine .Namespace )
0 commit comments