@@ -19,10 +19,12 @@ package batchapi
19
19
import (
20
20
"fmt"
21
21
"strings"
22
+ "time"
22
23
23
24
"github.com/aws/aws-sdk-go/aws"
24
25
"github.com/aws/aws-sdk-go/service/sqs"
25
26
awslib "github.com/cortexlabs/cortex/pkg/lib/aws"
27
+ "github.com/cortexlabs/cortex/pkg/lib/cron"
26
28
"github.com/cortexlabs/cortex/pkg/lib/errors"
27
29
libjson "github.com/cortexlabs/cortex/pkg/lib/json"
28
30
s "github.com/cortexlabs/cortex/pkg/lib/strings"
@@ -31,6 +33,11 @@ import (
31
33
"github.com/cortexlabs/cortex/pkg/types/spec"
32
34
)
33
35
36
+ const (
37
+ _markForDeletion = "cortex.dev/to-be-deleted"
38
+ _queueGraceKillTimePeriod = 5 * time .Minute
39
+ )
40
+
34
41
func apiQueueNamePrefix (apiName string ) string {
35
42
return config .CoreConfig .SQSNamePrefix () + apiName + "-"
36
43
}
@@ -121,21 +128,75 @@ func listQueueURLsForAllAPIs() ([]string, error) {
121
128
return queueURLs , nil
122
129
}
123
130
124
- func deleteQueueByJobKey (jobKey spec.JobKey ) error {
131
+ func markForDeletion (queueURL string ) error {
132
+ _ , err := config .AWS .SQS ().TagQueue (& sqs.TagQueueInput {
133
+ QueueUrl : aws .String (queueURL ),
134
+ Tags : aws .StringMap (map [string ]string {
135
+ _markForDeletion : time .Now ().Format (time .RFC3339Nano ),
136
+ }),
137
+ })
138
+ if err != nil {
139
+ return errors .WithStack (err )
140
+ }
141
+ return nil
142
+ }
143
+
144
+ func deleteQueueWithDelay (jobKey spec.JobKey ) error {
125
145
queueURL , err := getJobQueueURL (jobKey )
126
146
if err != nil {
127
147
return err
128
148
}
129
149
130
- return deleteQueueByURL (queueURL )
131
- }
132
-
133
- func deleteQueueByJobKeyIfExists (jobKey spec.JobKey ) error {
134
- err := deleteQueueByJobKey (jobKey )
135
- if err != nil && awslib .IsNonExistentQueueErr (errors .CauseOrSelf (err )) {
150
+ output , err := config .AWS .SQS ().ListQueueTags (& sqs.ListQueueTagsInput {
151
+ QueueUrl : aws .String (queueURL ),
152
+ })
153
+ if err != nil {
154
+ if ! awslib .IsNonExistentQueueErr (errors .CauseOrSelf (err )) {
155
+ operatorLogger .Error (err )
156
+ }
136
157
return nil
137
158
}
138
- return err
159
+
160
+ if value , exists := output .Tags [_markForDeletion ]; exists {
161
+ markedTime , err := time .Parse (time .RFC3339Nano , * value )
162
+ if err != nil {
163
+ err = deleteQueueByURL (queueURL )
164
+ if err != nil {
165
+ if ! awslib .IsNonExistentQueueErr (errors .CauseOrSelf (err )) {
166
+ operatorLogger .Error (err )
167
+ }
168
+ return nil
169
+ }
170
+ }
171
+
172
+ if time .Since (markedTime ) > _queueGraceKillTimePeriod {
173
+ err := deleteQueueByURL (queueURL )
174
+ if err != nil {
175
+ if ! awslib .IsNonExistentQueueErr (errors .CauseOrSelf (err )) {
176
+ operatorLogger .Error (err )
177
+ }
178
+ return nil
179
+ }
180
+ }
181
+ } else {
182
+ operatorLogger .Info ("scheduling deleting queue " + jobKey .UserString ())
183
+ err = markForDeletion (queueURL )
184
+ if err != nil && awslib .IsNonExistentQueueErr (errors .CauseOrSelf (err )) {
185
+ return nil
186
+ }
187
+
188
+ time .AfterFunc (_queueGraceKillTimePeriod , func () {
189
+ defer cron .Recoverer (nil )
190
+ operatorLogger .Info ("deleting queue " + jobKey .UserString ())
191
+ err := deleteQueueByURL (queueURL )
192
+ // ignore non existent queue errors
193
+ if err != nil && ! awslib .IsNonExistentQueueErr (errors .CauseOrSelf (err )) {
194
+ operatorLogger .Error (err )
195
+ }
196
+ })
197
+ }
198
+
199
+ return nil
139
200
}
140
201
141
202
func deleteQueueByURL (queueURL string ) error {
0 commit comments