@@ -19,15 +19,22 @@ package machine
19
19
import (
20
20
"context"
21
21
"errors"
22
+ "fmt"
22
23
"os"
24
+ "time"
23
25
26
+ "github.com/go-log/log/info"
24
27
machinev1 "github.com/openshift/cluster-api/pkg/apis/machine/v1beta1"
25
28
controllerError "github.com/openshift/cluster-api/pkg/controller/error"
26
29
"github.com/openshift/cluster-api/pkg/util"
30
+ kubedrain "github.com/openshift/kubernetes-drain"
27
31
corev1 "k8s.io/api/core/v1"
28
32
apierrors "k8s.io/apimachinery/pkg/api/errors"
29
33
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30
34
"k8s.io/apimachinery/pkg/runtime"
35
+ "k8s.io/client-go/kubernetes"
36
+ "k8s.io/client-go/rest"
37
+ "k8s.io/client-go/tools/record"
31
38
"k8s.io/klog"
32
39
"sigs.k8s.io/controller-runtime/pkg/client"
33
40
"sigs.k8s.io/controller-runtime/pkg/controller"
@@ -37,7 +44,12 @@ import (
37
44
"sigs.k8s.io/controller-runtime/pkg/source"
38
45
)
39
46
40
- const NodeNameEnvVar = "NODE_NAME"
47
+ const (
48
+ NodeNameEnvVar = "NODE_NAME"
49
+
50
+ // ExcludeNodeDrainingAnnotation annotation explicitly skips node draining if set
51
+ ExcludeNodeDrainingAnnotation = "machine.openshift.io/exclude-node-draining"
52
+ )
41
53
42
54
var DefaultActuator Actuator
43
55
@@ -48,10 +60,12 @@ func AddWithActuator(mgr manager.Manager, actuator Actuator) error {
48
60
// newReconciler returns a new reconcile.Reconciler
49
61
func newReconciler (mgr manager.Manager , actuator Actuator ) reconcile.Reconciler {
50
62
r := & ReconcileMachine {
51
- Client : mgr .GetClient (),
52
- scheme : mgr .GetScheme (),
53
- nodeName : os .Getenv (NodeNameEnvVar ),
54
- actuator : actuator ,
63
+ Client : mgr .GetClient (),
64
+ eventRecorder : mgr .GetRecorder ("machine-controller" ),
65
+ config : mgr .GetConfig (),
66
+ scheme : mgr .GetScheme (),
67
+ nodeName : os .Getenv (NodeNameEnvVar ),
68
+ actuator : actuator ,
55
69
}
56
70
57
71
if r .nodeName == "" {
@@ -83,8 +97,11 @@ var _ reconcile.Reconciler = &ReconcileMachine{}
83
97
// ReconcileMachine reconciles a Machine object
84
98
type ReconcileMachine struct {
85
99
client.Client
100
+ config * rest.Config
86
101
scheme * runtime.Scheme
87
102
103
+ eventRecorder record.EventRecorder
104
+
88
105
actuator Actuator
89
106
90
107
// nodeName is the name of the node on which the machine controller is running, if not present, it is loaded from NODE_NAME.
@@ -145,6 +162,51 @@ func (r *ReconcileMachine) Reconcile(request reconcile.Request) (reconcile.Resul
145
162
return reconcile.Result {}, nil
146
163
}
147
164
klog .Infof ("reconciling machine object %v triggers delete." , name )
165
+
166
+ // Drain node before deletion
167
+ // If a machine is not linked to a node, just delete the machine. Since a node
168
+ // can be unlinked from a machine when the node goes NotReady and is removed
169
+ // by cloud controller manager. In that case some machines would never get
170
+ // deleted without a manual intervention.
171
+ if _ , exists := m .ObjectMeta .Annotations [ExcludeNodeDrainingAnnotation ]; ! exists && m .Status .NodeRef != nil {
172
+ if err := func () error {
173
+ kubeClient , err := kubernetes .NewForConfig (r .config )
174
+ if err != nil {
175
+ return fmt .Errorf ("unable to build kube client: %v" , err )
176
+ }
177
+ node , err := kubeClient .CoreV1 ().Nodes ().Get (m .Status .NodeRef .Name , metav1.GetOptions {})
178
+ if err != nil {
179
+ return fmt .Errorf ("unable to get node %q: %v" , m .Status .NodeRef .Name , err )
180
+ }
181
+
182
+ if err := kubedrain .Drain (
183
+ kubeClient ,
184
+ []* corev1.Node {node },
185
+ & kubedrain.DrainOptions {
186
+ Force : true ,
187
+ IgnoreDaemonsets : true ,
188
+ DeleteLocalData : true ,
189
+ GracePeriodSeconds : - 1 ,
190
+ Logger : info .New (klog .V (0 )),
191
+ // If a pod is not evicted in 20 second, retry the eviction next time the
192
+ // machine gets reconciled again (to allow other machines to be reconciled)
193
+ Timeout : 20 * time .Second ,
194
+ },
195
+ ); err != nil {
196
+ // Machine still tries to terminate after drain failure
197
+ klog .Warningf ("drain failed for machine %q: %v" , m .Name , err )
198
+ return & controllerError.RequeueAfterError {RequeueAfter : 20 * time .Second }
199
+ }
200
+
201
+ klog .Infof ("drain successful for machine %q" , m .Name )
202
+ r .eventRecorder .Eventf (m , corev1 .EventTypeNormal , "Deleted" , "Node %q drained" , node .Name )
203
+
204
+ return nil
205
+ }(); err != nil {
206
+ return reconcile.Result {}, err
207
+ }
208
+ }
209
+
148
210
if err := r .actuator .Delete (ctx , cluster , m ); err != nil {
149
211
klog .Errorf ("Error deleting machine object %v; %v" , name , err )
150
212
if requeueErr , ok := err .(* controllerError.RequeueAfterError ); ok {
0 commit comments