@@ -9,13 +9,14 @@ import (
9
9
"os"
10
10
"os/exec"
11
11
"syscall"
12
+ "time"
12
13
13
14
"github.com/opencontainers/runtime-spec/specs-go"
14
15
"github.com/sirupsen/logrus"
15
16
"golang.org/x/xerrors"
16
17
)
17
18
18
- const RETRY = 3
19
+ const RETRY = 10
19
20
20
21
var (
21
22
defaultOOMScoreAdj = 1000
@@ -81,22 +82,28 @@ func createAndRunc(runcPath string, log *logrus.Logger) error {
81
82
if err != nil {
82
83
return xerrors .Errorf ("cannot encode config.json: %w" , err )
83
84
}
84
- for _ , fn := range []string {"config.json" , "/tmp/debug.json" } {
85
- err = os .WriteFile (fn , fc , 0644 )
86
- if err != nil {
87
- return xerrors .Errorf ("cannot encode config.json: %w" , err )
88
- }
85
+ err = os .WriteFile ("config.json" , fc , 0644 )
86
+ if err != nil {
87
+ return xerrors .Errorf ("cannot encode config.json: %w" , err )
89
88
}
90
89
91
90
// See here for more details on why retries are necessary.
92
91
// https://github.com/gitpod-io/gitpod/issues/12365
93
92
for i := 0 ; i <= RETRY ; i ++ {
94
- err = syscall .Exec (runcPath , os .Args , os .Environ ())
95
- if err == nil {
96
- return err
97
- } else {
93
+ err = exec .Command (runcPath , os .Args [1 :]... ).Run ()
94
+
95
+ if err != nil {
98
96
log .WithError (err ).Warn ("runc failed" )
97
+
98
+ // runc creation failures can be caused by timing issues with workspacekit/seccomp notify under load.
99
+ // Easing of on the pressure here lowers the likelihood of that error.
100
+ // NOTE(cw): glossing over races with delays is bad style, but also pragmatic.
101
+ //
102
+ // Context: https://linear.app/gitpod/issue/ENG-797/docker-containers-sometimes-fail-to-start
103
+ time .Sleep (100 * time .Millisecond )
104
+ continue
99
105
}
106
+ return nil
100
107
}
101
108
return xerrors .Errorf ("exec %s: %w" , runcPath , err )
102
109
}
0 commit comments