|
| 1 | +// Copyright The libcontainer authors |
| 2 | + |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | + |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | + |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +// Implements creation of eBPF device filter program. |
| 16 | +// |
| 17 | +// Based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c |
| 18 | +// |
| 19 | +// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano) |
| 20 | +// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397 |
| 21 | +package devicefilter |
| 22 | + |
| 23 | +import ( |
| 24 | + "errors" |
| 25 | + "fmt" |
| 26 | + "math" |
| 27 | + "strconv" |
| 28 | + |
| 29 | + "github.com/cilium/ebpf/asm" |
| 30 | + "golang.org/x/sys/unix" |
| 31 | + |
| 32 | + devices "github.com/gitpod-io/gitpod/ws-daemon/pkg/libcontainer/devices" |
| 33 | +) |
| 34 | + |
| 35 | +const ( |
| 36 | + // license string format is same as kernel MODULE_LICENSE macro |
| 37 | + license = "Apache" |
| 38 | +) |
| 39 | + |
| 40 | +// DeviceFilter returns eBPF device filter program and its license string. |
| 41 | +func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) { |
| 42 | + // Generate the minimum ruleset for the device rules we are given. While we |
| 43 | + // don't care about minimum transitions in cgroupv2, using the emulator |
| 44 | + // gives us a guarantee that the behaviour of devices filtering is the same |
| 45 | + // as cgroupv1, including security hardenings to avoid misconfiguration |
| 46 | + // (such as punching holes in wildcard rules). |
| 47 | + emu := new(emulator) |
| 48 | + for _, rule := range rules { |
| 49 | + if err := emu.Apply(*rule); err != nil { |
| 50 | + return nil, "", err |
| 51 | + } |
| 52 | + } |
| 53 | + cleanRules, err := emu.Rules() |
| 54 | + if err != nil { |
| 55 | + return nil, "", err |
| 56 | + } |
| 57 | + |
| 58 | + p := &program{ |
| 59 | + defaultAllow: emu.IsBlacklist(), |
| 60 | + } |
| 61 | + p.init() |
| 62 | + |
| 63 | + for idx, rule := range cleanRules { |
| 64 | + if rule.Type == devices.WildcardDevice { |
| 65 | + // We can safely skip over wildcard entries because there should |
| 66 | + // only be one (at most) at the very start to instruct cgroupv1 to |
| 67 | + // go into allow-list mode. However we do double-check this here. |
| 68 | + if idx != 0 || rule.Allow != emu.IsBlacklist() { |
| 69 | + return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString()) |
| 70 | + } |
| 71 | + continue |
| 72 | + } |
| 73 | + if rule.Allow == p.defaultAllow { |
| 74 | + // There should be no rules which have an action equal to the |
| 75 | + // default action, the emulator removes those. |
| 76 | + return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString()) |
| 77 | + } |
| 78 | + if err := p.appendRule(rule); err != nil { |
| 79 | + return nil, "", err |
| 80 | + } |
| 81 | + } |
| 82 | + return p.finalize(), license, nil |
| 83 | +} |
| 84 | + |
| 85 | +type program struct { |
| 86 | + insts asm.Instructions |
| 87 | + defaultAllow bool |
| 88 | + blockID int |
| 89 | +} |
| 90 | + |
| 91 | +func (p *program) init() { |
| 92 | + // struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423 |
| 93 | + /* |
| 94 | + u32 access_type |
| 95 | + u32 major |
| 96 | + u32 minor |
| 97 | + */ |
| 98 | + // R2 <- type (lower 16 bit of u32 access_type at R1[0]) |
| 99 | + p.insts = append(p.insts, |
| 100 | + asm.LoadMem(asm.R2, asm.R1, 0, asm.Word), |
| 101 | + asm.And.Imm32(asm.R2, 0xFFFF)) |
| 102 | + |
| 103 | + // R3 <- access (upper 16 bit of u32 access_type at R1[0]) |
| 104 | + p.insts = append(p.insts, |
| 105 | + asm.LoadMem(asm.R3, asm.R1, 0, asm.Word), |
| 106 | + // RSh: bitwise shift right |
| 107 | + asm.RSh.Imm32(asm.R3, 16)) |
| 108 | + |
| 109 | + // R4 <- major (u32 major at R1[4]) |
| 110 | + p.insts = append(p.insts, |
| 111 | + asm.LoadMem(asm.R4, asm.R1, 4, asm.Word)) |
| 112 | + |
| 113 | + // R5 <- minor (u32 minor at R1[8]) |
| 114 | + p.insts = append(p.insts, |
| 115 | + asm.LoadMem(asm.R5, asm.R1, 8, asm.Word)) |
| 116 | +} |
| 117 | + |
| 118 | +// appendRule rule converts an OCI rule to the relevant eBPF block and adds it |
| 119 | +// to the in-progress filter program. In order to operate properly, it must be |
| 120 | +// called with a "clean" rule list (generated by devices.Emulator.Rules() -- |
| 121 | +// with any "a" rules removed). |
| 122 | +func (p *program) appendRule(rule *devices.Rule) error { |
| 123 | + if p.blockID < 0 { |
| 124 | + return errors.New("the program is finalized") |
| 125 | + } |
| 126 | + |
| 127 | + var bpfType int32 |
| 128 | + switch rule.Type { |
| 129 | + case devices.CharDevice: |
| 130 | + bpfType = int32(unix.BPF_DEVCG_DEV_CHAR) |
| 131 | + case devices.BlockDevice: |
| 132 | + bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK) |
| 133 | + default: |
| 134 | + // We do not permit 'a', nor any other types we don't know about. |
| 135 | + return fmt.Errorf("invalid type %q", string(rule.Type)) |
| 136 | + } |
| 137 | + if rule.Major > math.MaxUint32 { |
| 138 | + return fmt.Errorf("invalid major %d", rule.Major) |
| 139 | + } |
| 140 | + if rule.Minor > math.MaxUint32 { |
| 141 | + return fmt.Errorf("invalid minor %d", rule.Major) |
| 142 | + } |
| 143 | + hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1 |
| 144 | + hasMinor := rule.Minor >= 0 |
| 145 | + bpfAccess := int32(0) |
| 146 | + for _, r := range rule.Permissions { |
| 147 | + switch r { |
| 148 | + case 'r': |
| 149 | + bpfAccess |= unix.BPF_DEVCG_ACC_READ |
| 150 | + case 'w': |
| 151 | + bpfAccess |= unix.BPF_DEVCG_ACC_WRITE |
| 152 | + case 'm': |
| 153 | + bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD |
| 154 | + default: |
| 155 | + return fmt.Errorf("unknown device access %v", r) |
| 156 | + } |
| 157 | + } |
| 158 | + // If the access is rwm, skip the check. |
| 159 | + hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD) |
| 160 | + |
| 161 | + var ( |
| 162 | + blockSym = "block-" + strconv.Itoa(p.blockID) |
| 163 | + nextBlockSym = "block-" + strconv.Itoa(p.blockID+1) |
| 164 | + prevBlockLastIdx = len(p.insts) - 1 |
| 165 | + ) |
| 166 | + p.insts = append(p.insts, |
| 167 | + // if (R2 != bpfType) goto next |
| 168 | + asm.JNE.Imm(asm.R2, bpfType, nextBlockSym), |
| 169 | + ) |
| 170 | + if hasAccess { |
| 171 | + p.insts = append(p.insts, |
| 172 | + // if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next |
| 173 | + asm.Mov.Reg32(asm.R1, asm.R3), |
| 174 | + asm.And.Imm32(asm.R1, bpfAccess), |
| 175 | + asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym), |
| 176 | + ) |
| 177 | + } |
| 178 | + if hasMajor { |
| 179 | + p.insts = append(p.insts, |
| 180 | + // if (R4 != major) goto next |
| 181 | + asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym), |
| 182 | + ) |
| 183 | + } |
| 184 | + if hasMinor { |
| 185 | + p.insts = append(p.insts, |
| 186 | + // if (R5 != minor) goto next |
| 187 | + asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym), |
| 188 | + ) |
| 189 | + } |
| 190 | + p.insts = append(p.insts, acceptBlock(rule.Allow)...) |
| 191 | + // set blockSym to the first instruction we added in this iteration |
| 192 | + p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].WithSymbol(blockSym) |
| 193 | + p.blockID++ |
| 194 | + return nil |
| 195 | +} |
| 196 | + |
| 197 | +func (p *program) finalize() asm.Instructions { |
| 198 | + var v int32 |
| 199 | + if p.defaultAllow { |
| 200 | + v = 1 |
| 201 | + } |
| 202 | + blockSym := "block-" + strconv.Itoa(p.blockID) |
| 203 | + p.insts = append(p.insts, |
| 204 | + // R0 <- v |
| 205 | + asm.Mov.Imm32(asm.R0, v).WithSymbol(blockSym), |
| 206 | + asm.Return(), |
| 207 | + ) |
| 208 | + p.blockID = -1 |
| 209 | + return p.insts |
| 210 | +} |
| 211 | + |
| 212 | +func acceptBlock(accept bool) asm.Instructions { |
| 213 | + var v int32 |
| 214 | + if accept { |
| 215 | + v = 1 |
| 216 | + } |
| 217 | + return []asm.Instruction{ |
| 218 | + // R0 <- v |
| 219 | + asm.Mov.Imm32(asm.R0, v), |
| 220 | + asm.Return(), |
| 221 | + } |
| 222 | +} |
0 commit comments