Upgrade libcontainer dependency
Upgrade libcontainer dependency to pick up fix for CVE-2019-19921 .
This commit is contained in:
parent
3b84c2d464
commit
7423f38771
4
go.mod
4
go.mod
|
@ -27,6 +27,7 @@ require (
|
|||
github.com/aws/aws-sdk-go v1.25.41
|
||||
github.com/boltdb/bolt v1.3.1
|
||||
github.com/checkpoint-restore/go-criu v0.0.0-20190109184317-bdb7599cd87b // indirect
|
||||
github.com/cilium/ebpf v0.0.0-20200617135954-7acf5cc039f4 // indirect
|
||||
github.com/container-storage-interface/spec v1.2.0-rc1.0.20191021210849-a33ece0a8a9f
|
||||
github.com/containerd/console v1.0.0 // indirect
|
||||
github.com/containerd/go-cni v0.0.0-20190904155053-d20b7eebc7ee
|
||||
|
@ -34,7 +35,6 @@ require (
|
|||
github.com/containernetworking/plugins v0.7.3-0.20190501191748-2d6d46d308b2
|
||||
github.com/coreos/go-iptables v0.4.3-0.20190724151750-969b135e941d
|
||||
github.com/coreos/go-semver v0.3.0
|
||||
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect
|
||||
github.com/cyphar/filepath-securejoin v0.2.3-0.20190205144030-7efe413b52e1 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1
|
||||
github.com/docker/cli v0.0.0-20200303215952-eb310fca4956
|
||||
|
@ -107,7 +107,7 @@ require (
|
|||
github.com/oklog/run v1.0.1-0.20180308005104-6934b124db28 // indirect
|
||||
github.com/onsi/ginkgo v1.12.0
|
||||
github.com/onsi/gomega v1.9.0
|
||||
github.com/opencontainers/runc v1.0.0-rc7
|
||||
github.com/opencontainers/runc v1.0.0-rc90
|
||||
github.com/opencontainers/runtime-spec v1.0.2-0.20200307132014-f49fed0d6290 // indirect
|
||||
github.com/opencontainers/selinux v1.4.1-0.20200311111634-a2f0d9c2aafc // indirect
|
||||
github.com/pkg/errors v0.9.1
|
||||
|
|
8
go.sum
8
go.sum
|
@ -112,6 +112,8 @@ github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XL
|
|||
github.com/checkpoint-restore/go-criu v0.0.0-20190109184317-bdb7599cd87b h1:T4nWG1TXIxeor8mAu5bFguPJgSIGhZqv/f0z55KCrJM=
|
||||
github.com/checkpoint-restore/go-criu v0.0.0-20190109184317-bdb7599cd87b/go.mod h1:TrMrLQfeENAPYPRsJuq3jsqdlRh3lvi6trTZJG8+tho=
|
||||
github.com/cheggaaa/pb v1.0.27/go.mod h1:pQciLPpbU0oxA0h+VJYYLxO+XeDQb5pZijXscXHm81s=
|
||||
github.com/cilium/ebpf v0.0.0-20200617135954-7acf5cc039f4 h1:ePYSGbwH5CQieB9tn+YAPIqbo+e2iZi1ykHJUJpVzyU=
|
||||
github.com/cilium/ebpf v0.0.0-20200617135954-7acf5cc039f4/go.mod h1:XT+cAw5wfvsodedcijoh1l9cf7v1x9FlFB/3VmF/O8s=
|
||||
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible h1:C29Ae4G5GtYyYMm1aztcyj/J5ckgJm2zwdDajFbx1NY=
|
||||
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
|
||||
github.com/circonus-labs/circonusllhist v0.1.3 h1:TJH+oke8D16535+jHExHj4nQvzlZrj7ug5D7I/orNUA=
|
||||
|
@ -150,8 +152,6 @@ github.com/coreos/go-systemd v0.0.0-20181012123002-c6f51f82210d/go.mod h1:F5haX7
|
|||
github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
|
||||
github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f h1:JOrtw2xFKzlg+cbHpyrpLDmnN1HqhBfnX7WDiW7eG2c=
|
||||
github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
|
||||
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg=
|
||||
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/cyphar/filepath-securejoin v0.2.3-0.20190205144030-7efe413b52e1 h1:dCqRswe3ZAwkQWdvFLwRqmJCpGP3DWb7bFogdqY3+QU=
|
||||
github.com/cyphar/filepath-securejoin v0.2.3-0.20190205144030-7efe413b52e1/go.mod h1:FpkQEhXnPnOthhzymB7CGsFk2G9VLXONKD9G7QGMM+4=
|
||||
|
@ -580,8 +580,8 @@ github.com/opencontainers/image-spec v1.0.1 h1:JMemWkRwHx4Zj+fVxWoMCFm/8sYGGrUVo
|
|||
github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
|
||||
github.com/opencontainers/runc v0.0.0-20190115041553-12f6a991201f/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
|
||||
github.com/opencontainers/runc v0.1.1/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
|
||||
github.com/opencontainers/runc v1.0.0-rc7 h1:1eqSWkWzuGk04OAMJPPjan7yGMcI1SUjpr/sx3+q1+M=
|
||||
github.com/opencontainers/runc v1.0.0-rc7/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
|
||||
github.com/opencontainers/runc v1.0.0-rc90 h1:4+xo8mtWixbHoEm451+WJNUrq12o2/tDsyK9Vgc/NcA=
|
||||
github.com/opencontainers/runc v1.0.0-rc90/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
|
||||
github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/opencontainers/runtime-spec v1.0.2-0.20200307132014-f49fed0d6290 h1:IV446MVY9HToNlBK/Hv7RLg5m0lu8F/dSv5r679ZbWM=
|
||||
github.com/opencontainers/runtime-spec v1.0.2-0.20200307132014-f49fed0d6290/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
# Binaries for programs and plugins
|
||||
*.exe
|
||||
*.exe~
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
|
||||
# Test binary, build with `go test -c`
|
||||
*.test
|
||||
|
||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
||||
*.out
|
|
@ -0,0 +1,46 @@
|
|||
# Contributor Covenant Code of Conduct
|
||||
|
||||
## Our Pledge
|
||||
|
||||
In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
|
||||
|
||||
## Our Standards
|
||||
|
||||
Examples of behavior that contributes to creating a positive environment include:
|
||||
|
||||
* Using welcoming and inclusive language
|
||||
* Being respectful of differing viewpoints and experiences
|
||||
* Gracefully accepting constructive criticism
|
||||
* Focusing on what is best for the community
|
||||
* Showing empathy towards other community members
|
||||
|
||||
Examples of unacceptable behavior by participants include:
|
||||
|
||||
* The use of sexualized language or imagery and unwelcome sexual attention or advances
|
||||
* Trolling, insulting/derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or electronic address, without explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a professional setting
|
||||
|
||||
## Our Responsibilities
|
||||
|
||||
Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
|
||||
|
||||
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
|
||||
|
||||
## Scope
|
||||
|
||||
This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at nathanjsweet at gmail dot com or i at lmb dot io. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
|
||||
|
||||
Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
|
||||
|
||||
[homepage]: http://contributor-covenant.org
|
||||
[version]: http://contributor-covenant.org/version/1/4/
|
|
@ -0,0 +1,23 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2017 Nathan Sweet
|
||||
Copyright (c) 2018, 2019 Cloudflare
|
||||
Copyright (c) 2019 Authors of Cilium
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,207 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"syscall"
|
||||
|
||||
"github.com/cilium/ebpf/internal"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
// MapABI are the attributes of a Map which are available across all supported kernels.
|
||||
type MapABI struct {
|
||||
Type MapType
|
||||
KeySize uint32
|
||||
ValueSize uint32
|
||||
MaxEntries uint32
|
||||
Flags uint32
|
||||
}
|
||||
|
||||
func newMapABIFromSpec(spec *MapSpec) *MapABI {
|
||||
return &MapABI{
|
||||
spec.Type,
|
||||
spec.KeySize,
|
||||
spec.ValueSize,
|
||||
spec.MaxEntries,
|
||||
spec.Flags,
|
||||
}
|
||||
}
|
||||
|
||||
func newMapABIFromFd(fd *internal.FD) (string, *MapABI, error) {
|
||||
info, err := bpfGetMapInfoByFD(fd)
|
||||
if err != nil {
|
||||
if xerrors.Is(err, syscall.EINVAL) {
|
||||
abi, err := newMapABIFromProc(fd)
|
||||
return "", abi, err
|
||||
}
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
return "", &MapABI{
|
||||
MapType(info.mapType),
|
||||
info.keySize,
|
||||
info.valueSize,
|
||||
info.maxEntries,
|
||||
info.flags,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func newMapABIFromProc(fd *internal.FD) (*MapABI, error) {
|
||||
var abi MapABI
|
||||
err := scanFdInfo(fd, map[string]interface{}{
|
||||
"map_type": &abi.Type,
|
||||
"key_size": &abi.KeySize,
|
||||
"value_size": &abi.ValueSize,
|
||||
"max_entries": &abi.MaxEntries,
|
||||
"map_flags": &abi.Flags,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &abi, nil
|
||||
}
|
||||
|
||||
// Equal returns true if two ABIs have the same values.
|
||||
func (abi *MapABI) Equal(other *MapABI) bool {
|
||||
switch {
|
||||
case abi.Type != other.Type:
|
||||
return false
|
||||
case abi.KeySize != other.KeySize:
|
||||
return false
|
||||
case abi.ValueSize != other.ValueSize:
|
||||
return false
|
||||
case abi.MaxEntries != other.MaxEntries:
|
||||
return false
|
||||
case abi.Flags != other.Flags:
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// ProgramABI are the attributes of a Program which are available across all supported kernels.
|
||||
type ProgramABI struct {
|
||||
Type ProgramType
|
||||
}
|
||||
|
||||
func newProgramABIFromSpec(spec *ProgramSpec) *ProgramABI {
|
||||
return &ProgramABI{
|
||||
spec.Type,
|
||||
}
|
||||
}
|
||||
|
||||
func newProgramABIFromFd(fd *internal.FD) (string, *ProgramABI, error) {
|
||||
info, err := bpfGetProgInfoByFD(fd)
|
||||
if err != nil {
|
||||
if xerrors.Is(err, syscall.EINVAL) {
|
||||
return newProgramABIFromProc(fd)
|
||||
}
|
||||
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
var name string
|
||||
if bpfName := internal.CString(info.name[:]); bpfName != "" {
|
||||
name = bpfName
|
||||
} else {
|
||||
name = internal.CString(info.tag[:])
|
||||
}
|
||||
|
||||
return name, &ProgramABI{
|
||||
Type: ProgramType(info.progType),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func newProgramABIFromProc(fd *internal.FD) (string, *ProgramABI, error) {
|
||||
var (
|
||||
abi ProgramABI
|
||||
name string
|
||||
)
|
||||
|
||||
err := scanFdInfo(fd, map[string]interface{}{
|
||||
"prog_type": &abi.Type,
|
||||
"prog_tag": &name,
|
||||
})
|
||||
if xerrors.Is(err, errMissingFields) {
|
||||
return "", nil, &internal.UnsupportedFeatureError{
|
||||
Name: "reading ABI from /proc/self/fdinfo",
|
||||
MinimumVersion: internal.Version{4, 11, 0},
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
return name, &abi, nil
|
||||
}
|
||||
|
||||
func scanFdInfo(fd *internal.FD, fields map[string]interface{}) error {
|
||||
raw, err := fd.Value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fh, err := os.Open(fmt.Sprintf("/proc/self/fdinfo/%d", raw))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
if err := scanFdInfoReader(fh, fields); err != nil {
|
||||
return xerrors.Errorf("%s: %w", fh.Name(), err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var errMissingFields = xerrors.New("missing fields")
|
||||
|
||||
func scanFdInfoReader(r io.Reader, fields map[string]interface{}) error {
|
||||
var (
|
||||
scanner = bufio.NewScanner(r)
|
||||
scanned int
|
||||
)
|
||||
|
||||
for scanner.Scan() {
|
||||
parts := bytes.SplitN(scanner.Bytes(), []byte("\t"), 2)
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
name := bytes.TrimSuffix(parts[0], []byte(":"))
|
||||
field, ok := fields[string(name)]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if n, err := fmt.Fscanln(bytes.NewReader(parts[1]), field); err != nil || n != 1 {
|
||||
return xerrors.Errorf("can't parse field %s: %v", name, err)
|
||||
}
|
||||
|
||||
scanned++
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if scanned != len(fields) {
|
||||
return errMissingFields
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Equal returns true if two ABIs have the same values.
|
||||
func (abi *ProgramABI) Equal(other *ProgramABI) bool {
|
||||
switch {
|
||||
case abi.Type != other.Type:
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,149 @@
|
|||
package asm
|
||||
|
||||
//go:generate stringer -output alu_string.go -type=Source,Endianness,ALUOp
|
||||
|
||||
// Source of ALU / ALU64 / Branch operations
|
||||
//
|
||||
// msb lsb
|
||||
// +----+-+---+
|
||||
// |op |S|cls|
|
||||
// +----+-+---+
|
||||
type Source uint8
|
||||
|
||||
const sourceMask OpCode = 0x08
|
||||
|
||||
// Source bitmask
|
||||
const (
|
||||
// InvalidSource is returned by getters when invoked
|
||||
// on non ALU / branch OpCodes.
|
||||
InvalidSource Source = 0xff
|
||||
// ImmSource src is from constant
|
||||
ImmSource Source = 0x00
|
||||
// RegSource src is from register
|
||||
RegSource Source = 0x08
|
||||
)
|
||||
|
||||
// The Endianness of a byte swap instruction.
|
||||
type Endianness uint8
|
||||
|
||||
const endianMask = sourceMask
|
||||
|
||||
// Endian flags
|
||||
const (
|
||||
InvalidEndian Endianness = 0xff
|
||||
// Convert to little endian
|
||||
LE Endianness = 0x00
|
||||
// Convert to big endian
|
||||
BE Endianness = 0x08
|
||||
)
|
||||
|
||||
// ALUOp are ALU / ALU64 operations
|
||||
//
|
||||
// msb lsb
|
||||
// +----+-+---+
|
||||
// |OP |s|cls|
|
||||
// +----+-+---+
|
||||
type ALUOp uint8
|
||||
|
||||
const aluMask OpCode = 0xf0
|
||||
|
||||
const (
|
||||
// InvalidALUOp is returned by getters when invoked
|
||||
// on non ALU OpCodes
|
||||
InvalidALUOp ALUOp = 0xff
|
||||
// Add - addition
|
||||
Add ALUOp = 0x00
|
||||
// Sub - subtraction
|
||||
Sub ALUOp = 0x10
|
||||
// Mul - multiplication
|
||||
Mul ALUOp = 0x20
|
||||
// Div - division
|
||||
Div ALUOp = 0x30
|
||||
// Or - bitwise or
|
||||
Or ALUOp = 0x40
|
||||
// And - bitwise and
|
||||
And ALUOp = 0x50
|
||||
// LSh - bitwise shift left
|
||||
LSh ALUOp = 0x60
|
||||
// RSh - bitwise shift right
|
||||
RSh ALUOp = 0x70
|
||||
// Neg - sign/unsign signing bit
|
||||
Neg ALUOp = 0x80
|
||||
// Mod - modulo
|
||||
Mod ALUOp = 0x90
|
||||
// Xor - bitwise xor
|
||||
Xor ALUOp = 0xa0
|
||||
// Mov - move value from one place to another
|
||||
Mov ALUOp = 0xb0
|
||||
// ArSh - arithmatic shift
|
||||
ArSh ALUOp = 0xc0
|
||||
// Swap - endian conversions
|
||||
Swap ALUOp = 0xd0
|
||||
)
|
||||
|
||||
// HostTo converts from host to another endianness.
|
||||
func HostTo(endian Endianness, dst Register, size Size) Instruction {
|
||||
var imm int64
|
||||
switch size {
|
||||
case Half:
|
||||
imm = 16
|
||||
case Word:
|
||||
imm = 32
|
||||
case DWord:
|
||||
imm = 64
|
||||
default:
|
||||
return Instruction{OpCode: InvalidOpCode}
|
||||
}
|
||||
|
||||
return Instruction{
|
||||
OpCode: OpCode(ALUClass).SetALUOp(Swap).SetSource(Source(endian)),
|
||||
Dst: dst,
|
||||
Constant: imm,
|
||||
}
|
||||
}
|
||||
|
||||
// Op returns the OpCode for an ALU operation with a given source.
|
||||
func (op ALUOp) Op(source Source) OpCode {
|
||||
return OpCode(ALU64Class).SetALUOp(op).SetSource(source)
|
||||
}
|
||||
|
||||
// Reg emits `dst (op) src`.
|
||||
func (op ALUOp) Reg(dst, src Register) Instruction {
|
||||
return Instruction{
|
||||
OpCode: op.Op(RegSource),
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
}
|
||||
}
|
||||
|
||||
// Imm emits `dst (op) value`.
|
||||
func (op ALUOp) Imm(dst Register, value int32) Instruction {
|
||||
return Instruction{
|
||||
OpCode: op.Op(ImmSource),
|
||||
Dst: dst,
|
||||
Constant: int64(value),
|
||||
}
|
||||
}
|
||||
|
||||
// Op32 returns the OpCode for a 32-bit ALU operation with a given source.
|
||||
func (op ALUOp) Op32(source Source) OpCode {
|
||||
return OpCode(ALUClass).SetALUOp(op).SetSource(source)
|
||||
}
|
||||
|
||||
// Reg32 emits `dst (op) src`, zeroing the upper 32 bit of dst.
|
||||
func (op ALUOp) Reg32(dst, src Register) Instruction {
|
||||
return Instruction{
|
||||
OpCode: op.Op32(RegSource),
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
}
|
||||
}
|
||||
|
||||
// Imm32 emits `dst (op) value`, zeroing the upper 32 bit of dst.
|
||||
func (op ALUOp) Imm32(dst Register, value int32) Instruction {
|
||||
return Instruction{
|
||||
OpCode: op.Op32(ImmSource),
|
||||
Dst: dst,
|
||||
Constant: int64(value),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
// Code generated by "stringer -output alu_string.go -type=Source,Endianness,ALUOp"; DO NOT EDIT.
|
||||
|
||||
package asm
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[InvalidSource-255]
|
||||
_ = x[ImmSource-0]
|
||||
_ = x[RegSource-8]
|
||||
}
|
||||
|
||||
const (
|
||||
_Source_name_0 = "ImmSource"
|
||||
_Source_name_1 = "RegSource"
|
||||
_Source_name_2 = "InvalidSource"
|
||||
)
|
||||
|
||||
func (i Source) String() string {
|
||||
switch {
|
||||
case i == 0:
|
||||
return _Source_name_0
|
||||
case i == 8:
|
||||
return _Source_name_1
|
||||
case i == 255:
|
||||
return _Source_name_2
|
||||
default:
|
||||
return "Source(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[InvalidEndian-255]
|
||||
_ = x[LE-0]
|
||||
_ = x[BE-8]
|
||||
}
|
||||
|
||||
const (
|
||||
_Endianness_name_0 = "LE"
|
||||
_Endianness_name_1 = "BE"
|
||||
_Endianness_name_2 = "InvalidEndian"
|
||||
)
|
||||
|
||||
func (i Endianness) String() string {
|
||||
switch {
|
||||
case i == 0:
|
||||
return _Endianness_name_0
|
||||
case i == 8:
|
||||
return _Endianness_name_1
|
||||
case i == 255:
|
||||
return _Endianness_name_2
|
||||
default:
|
||||
return "Endianness(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[InvalidALUOp-255]
|
||||
_ = x[Add-0]
|
||||
_ = x[Sub-16]
|
||||
_ = x[Mul-32]
|
||||
_ = x[Div-48]
|
||||
_ = x[Or-64]
|
||||
_ = x[And-80]
|
||||
_ = x[LSh-96]
|
||||
_ = x[RSh-112]
|
||||
_ = x[Neg-128]
|
||||
_ = x[Mod-144]
|
||||
_ = x[Xor-160]
|
||||
_ = x[Mov-176]
|
||||
_ = x[ArSh-192]
|
||||
_ = x[Swap-208]
|
||||
}
|
||||
|
||||
const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapInvalidALUOp"
|
||||
|
||||
var _ALUOp_map = map[ALUOp]string{
|
||||
0: _ALUOp_name[0:3],
|
||||
16: _ALUOp_name[3:6],
|
||||
32: _ALUOp_name[6:9],
|
||||
48: _ALUOp_name[9:12],
|
||||
64: _ALUOp_name[12:14],
|
||||
80: _ALUOp_name[14:17],
|
||||
96: _ALUOp_name[17:20],
|
||||
112: _ALUOp_name[20:23],
|
||||
128: _ALUOp_name[23:26],
|
||||
144: _ALUOp_name[26:29],
|
||||
160: _ALUOp_name[29:32],
|
||||
176: _ALUOp_name[32:35],
|
||||
192: _ALUOp_name[35:39],
|
||||
208: _ALUOp_name[39:43],
|
||||
255: _ALUOp_name[43:55],
|
||||
}
|
||||
|
||||
func (i ALUOp) String() string {
|
||||
if str, ok := _ALUOp_map[i]; ok {
|
||||
return str
|
||||
}
|
||||
return "ALUOp(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
// Package asm is an assembler for eBPF bytecode.
|
||||
package asm
|
|
@ -0,0 +1,143 @@
|
|||
package asm
|
||||
|
||||
//go:generate stringer -output func_string.go -type=BuiltinFunc
|
||||
|
||||
// BuiltinFunc is a built-in eBPF function.
|
||||
type BuiltinFunc int32
|
||||
|
||||
// eBPF built-in functions
|
||||
//
|
||||
// You can renegerate this list using the following gawk script:
|
||||
//
|
||||
// /FN\(.+\),/ {
|
||||
// match($1, /\((.+)\)/, r)
|
||||
// split(r[1], p, "_")
|
||||
// printf "Fn"
|
||||
// for (i in p) {
|
||||
// printf "%s%s", toupper(substr(p[i], 1, 1)), substr(p[i], 2)
|
||||
// }
|
||||
// print ""
|
||||
// }
|
||||
//
|
||||
// The script expects include/uapi/linux/bpf.h as it's input.
|
||||
const (
|
||||
FnUnspec BuiltinFunc = iota
|
||||
FnMapLookupElem
|
||||
FnMapUpdateElem
|
||||
FnMapDeleteElem
|
||||
FnProbeRead
|
||||
FnKtimeGetNs
|
||||
FnTracePrintk
|
||||
FnGetPrandomU32
|
||||
FnGetSmpProcessorId
|
||||
FnSkbStoreBytes
|
||||
FnL3CsumReplace
|
||||
FnL4CsumReplace
|
||||
FnTailCall
|
||||
FnCloneRedirect
|
||||
FnGetCurrentPidTgid
|
||||
FnGetCurrentUidGid
|
||||
FnGetCurrentComm
|
||||
FnGetCgroupClassid
|
||||
FnSkbVlanPush
|
||||
FnSkbVlanPop
|
||||
FnSkbGetTunnelKey
|
||||
FnSkbSetTunnelKey
|
||||
FnPerfEventRead
|
||||
FnRedirect
|
||||
FnGetRouteRealm
|
||||
FnPerfEventOutput
|
||||
FnSkbLoadBytes
|
||||
FnGetStackid
|
||||
FnCsumDiff
|
||||
FnSkbGetTunnelOpt
|
||||
FnSkbSetTunnelOpt
|
||||
FnSkbChangeProto
|
||||
FnSkbChangeType
|
||||
FnSkbUnderCgroup
|
||||
FnGetHashRecalc
|
||||
FnGetCurrentTask
|
||||
FnProbeWriteUser
|
||||
FnCurrentTaskUnderCgroup
|
||||
FnSkbChangeTail
|
||||
FnSkbPullData
|
||||
FnCsumUpdate
|
||||
FnSetHashInvalid
|
||||
FnGetNumaNodeId
|
||||
FnSkbChangeHead
|
||||
FnXdpAdjustHead
|
||||
FnProbeReadStr
|
||||
FnGetSocketCookie
|
||||
FnGetSocketUid
|
||||
FnSetHash
|
||||
FnSetsockopt
|
||||
FnSkbAdjustRoom
|
||||
FnRedirectMap
|
||||
FnSkRedirectMap
|
||||
FnSockMapUpdate
|
||||
FnXdpAdjustMeta
|
||||
FnPerfEventReadValue
|
||||
FnPerfProgReadValue
|
||||
FnGetsockopt
|
||||
FnOverrideReturn
|
||||
FnSockOpsCbFlagsSet
|
||||
FnMsgRedirectMap
|
||||
FnMsgApplyBytes
|
||||
FnMsgCorkBytes
|
||||
FnMsgPullData
|
||||
FnBind
|
||||
FnXdpAdjustTail
|
||||
FnSkbGetXfrmState
|
||||
FnGetStack
|
||||
FnSkbLoadBytesRelative
|
||||
FnFibLookup
|
||||
FnSockHashUpdate
|
||||
FnMsgRedirectHash
|
||||
FnSkRedirectHash
|
||||
FnLwtPushEncap
|
||||
FnLwtSeg6StoreBytes
|
||||
FnLwtSeg6AdjustSrh
|
||||
FnLwtSeg6Action
|
||||
FnRcRepeat
|
||||
FnRcKeydown
|
||||
FnSkbCgroupId
|
||||
FnGetCurrentCgroupId
|
||||
FnGetLocalStorage
|
||||
FnSkSelectReuseport
|
||||
FnSkbAncestorCgroupId
|
||||
FnSkLookupTcp
|
||||
FnSkLookupUdp
|
||||
FnSkRelease
|
||||
FnMapPushElem
|
||||
FnMapPopElem
|
||||
FnMapPeekElem
|
||||
FnMsgPushData
|
||||
FnMsgPopData
|
||||
FnRcPointerRel
|
||||
FnSpinLock
|
||||
FnSpinUnlock
|
||||
FnSkFullsock
|
||||
FnTcpSock
|
||||
FnSkbEcnSetCe
|
||||
FnGetListenerSock
|
||||
FnSkcLookupTcp
|
||||
FnTcpCheckSyncookie
|
||||
FnSysctlGetName
|
||||
FnSysctlGetCurrentValue
|
||||
FnSysctlGetNewValue
|
||||
FnSysctlSetNewValue
|
||||
FnStrtol
|
||||
FnStrtoul
|
||||
FnSkStorageGet
|
||||
FnSkStorageDelete
|
||||
FnSendSignal
|
||||
FnTcpGenSyncookie
|
||||
)
|
||||
|
||||
// Call emits a function call.
|
||||
func (fn BuiltinFunc) Call() Instruction {
|
||||
return Instruction{
|
||||
OpCode: OpCode(JumpClass).SetJumpOp(Call),
|
||||
Constant: int64(fn),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,133 @@
|
|||
// Code generated by "stringer -output func_string.go -type=BuiltinFunc"; DO NOT EDIT.
|
||||
|
||||
package asm
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[FnUnspec-0]
|
||||
_ = x[FnMapLookupElem-1]
|
||||
_ = x[FnMapUpdateElem-2]
|
||||
_ = x[FnMapDeleteElem-3]
|
||||
_ = x[FnProbeRead-4]
|
||||
_ = x[FnKtimeGetNs-5]
|
||||
_ = x[FnTracePrintk-6]
|
||||
_ = x[FnGetPrandomU32-7]
|
||||
_ = x[FnGetSmpProcessorId-8]
|
||||
_ = x[FnSkbStoreBytes-9]
|
||||
_ = x[FnL3CsumReplace-10]
|
||||
_ = x[FnL4CsumReplace-11]
|
||||
_ = x[FnTailCall-12]
|
||||
_ = x[FnCloneRedirect-13]
|
||||
_ = x[FnGetCurrentPidTgid-14]
|
||||
_ = x[FnGetCurrentUidGid-15]
|
||||
_ = x[FnGetCurrentComm-16]
|
||||
_ = x[FnGetCgroupClassid-17]
|
||||
_ = x[FnSkbVlanPush-18]
|
||||
_ = x[FnSkbVlanPop-19]
|
||||
_ = x[FnSkbGetTunnelKey-20]
|
||||
_ = x[FnSkbSetTunnelKey-21]
|
||||
_ = x[FnPerfEventRead-22]
|
||||
_ = x[FnRedirect-23]
|
||||
_ = x[FnGetRouteRealm-24]
|
||||
_ = x[FnPerfEventOutput-25]
|
||||
_ = x[FnSkbLoadBytes-26]
|
||||
_ = x[FnGetStackid-27]
|
||||
_ = x[FnCsumDiff-28]
|
||||
_ = x[FnSkbGetTunnelOpt-29]
|
||||
_ = x[FnSkbSetTunnelOpt-30]
|
||||
_ = x[FnSkbChangeProto-31]
|
||||
_ = x[FnSkbChangeType-32]
|
||||
_ = x[FnSkbUnderCgroup-33]
|
||||
_ = x[FnGetHashRecalc-34]
|
||||
_ = x[FnGetCurrentTask-35]
|
||||
_ = x[FnProbeWriteUser-36]
|
||||
_ = x[FnCurrentTaskUnderCgroup-37]
|
||||
_ = x[FnSkbChangeTail-38]
|
||||
_ = x[FnSkbPullData-39]
|
||||
_ = x[FnCsumUpdate-40]
|
||||
_ = x[FnSetHashInvalid-41]
|
||||
_ = x[FnGetNumaNodeId-42]
|
||||
_ = x[FnSkbChangeHead-43]
|
||||
_ = x[FnXdpAdjustHead-44]
|
||||
_ = x[FnProbeReadStr-45]
|
||||
_ = x[FnGetSocketCookie-46]
|
||||
_ = x[FnGetSocketUid-47]
|
||||
_ = x[FnSetHash-48]
|
||||
_ = x[FnSetsockopt-49]
|
||||
_ = x[FnSkbAdjustRoom-50]
|
||||
_ = x[FnRedirectMap-51]
|
||||
_ = x[FnSkRedirectMap-52]
|
||||
_ = x[FnSockMapUpdate-53]
|
||||
_ = x[FnXdpAdjustMeta-54]
|
||||
_ = x[FnPerfEventReadValue-55]
|
||||
_ = x[FnPerfProgReadValue-56]
|
||||
_ = x[FnGetsockopt-57]
|
||||
_ = x[FnOverrideReturn-58]
|
||||
_ = x[FnSockOpsCbFlagsSet-59]
|
||||
_ = x[FnMsgRedirectMap-60]
|
||||
_ = x[FnMsgApplyBytes-61]
|
||||
_ = x[FnMsgCorkBytes-62]
|
||||
_ = x[FnMsgPullData-63]
|
||||
_ = x[FnBind-64]
|
||||
_ = x[FnXdpAdjustTail-65]
|
||||
_ = x[FnSkbGetXfrmState-66]
|
||||
_ = x[FnGetStack-67]
|
||||
_ = x[FnSkbLoadBytesRelative-68]
|
||||
_ = x[FnFibLookup-69]
|
||||
_ = x[FnSockHashUpdate-70]
|
||||
_ = x[FnMsgRedirectHash-71]
|
||||
_ = x[FnSkRedirectHash-72]
|
||||
_ = x[FnLwtPushEncap-73]
|
||||
_ = x[FnLwtSeg6StoreBytes-74]
|
||||
_ = x[FnLwtSeg6AdjustSrh-75]
|
||||
_ = x[FnLwtSeg6Action-76]
|
||||
_ = x[FnRcRepeat-77]
|
||||
_ = x[FnRcKeydown-78]
|
||||
_ = x[FnSkbCgroupId-79]
|
||||
_ = x[FnGetCurrentCgroupId-80]
|
||||
_ = x[FnGetLocalStorage-81]
|
||||
_ = x[FnSkSelectReuseport-82]
|
||||
_ = x[FnSkbAncestorCgroupId-83]
|
||||
_ = x[FnSkLookupTcp-84]
|
||||
_ = x[FnSkLookupUdp-85]
|
||||
_ = x[FnSkRelease-86]
|
||||
_ = x[FnMapPushElem-87]
|
||||
_ = x[FnMapPopElem-88]
|
||||
_ = x[FnMapPeekElem-89]
|
||||
_ = x[FnMsgPushData-90]
|
||||
_ = x[FnMsgPopData-91]
|
||||
_ = x[FnRcPointerRel-92]
|
||||
_ = x[FnSpinLock-93]
|
||||
_ = x[FnSpinUnlock-94]
|
||||
_ = x[FnSkFullsock-95]
|
||||
_ = x[FnTcpSock-96]
|
||||
_ = x[FnSkbEcnSetCe-97]
|
||||
_ = x[FnGetListenerSock-98]
|
||||
_ = x[FnSkcLookupTcp-99]
|
||||
_ = x[FnTcpCheckSyncookie-100]
|
||||
_ = x[FnSysctlGetName-101]
|
||||
_ = x[FnSysctlGetCurrentValue-102]
|
||||
_ = x[FnSysctlGetNewValue-103]
|
||||
_ = x[FnSysctlSetNewValue-104]
|
||||
_ = x[FnStrtol-105]
|
||||
_ = x[FnStrtoul-106]
|
||||
_ = x[FnSkStorageGet-107]
|
||||
_ = x[FnSkStorageDelete-108]
|
||||
_ = x[FnSendSignal-109]
|
||||
_ = x[FnTcpGenSyncookie-110]
|
||||
}
|
||||
|
||||
const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookie"
|
||||
|
||||
var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632}
|
||||
|
||||
func (i BuiltinFunc) String() string {
|
||||
if i < 0 || i >= BuiltinFunc(len(_BuiltinFunc_index)-1) {
|
||||
return "BuiltinFunc(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
return _BuiltinFunc_name[_BuiltinFunc_index[i]:_BuiltinFunc_index[i+1]]
|
||||
}
|
|
@ -0,0 +1,477 @@
|
|||
package asm
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
// InstructionSize is the size of a BPF instruction in bytes
|
||||
const InstructionSize = 8
|
||||
|
||||
// Instruction is a single eBPF instruction.
|
||||
type Instruction struct {
|
||||
OpCode OpCode
|
||||
Dst Register
|
||||
Src Register
|
||||
Offset int16
|
||||
Constant int64
|
||||
Reference string
|
||||
Symbol string
|
||||
}
|
||||
|
||||
// Sym creates a symbol.
|
||||
func (ins Instruction) Sym(name string) Instruction {
|
||||
ins.Symbol = name
|
||||
return ins
|
||||
}
|
||||
|
||||
// Unmarshal decodes a BPF instruction.
|
||||
func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, error) {
|
||||
var bi bpfInstruction
|
||||
err := binary.Read(r, bo, &bi)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
ins.OpCode = bi.OpCode
|
||||
ins.Offset = bi.Offset
|
||||
ins.Constant = int64(bi.Constant)
|
||||
ins.Dst, ins.Src, err = bi.Registers.Unmarshal(bo)
|
||||
if err != nil {
|
||||
return 0, xerrors.Errorf("can't unmarshal registers: %s", err)
|
||||
}
|
||||
|
||||
if !bi.OpCode.isDWordLoad() {
|
||||
return InstructionSize, nil
|
||||
}
|
||||
|
||||
var bi2 bpfInstruction
|
||||
if err := binary.Read(r, bo, &bi2); err != nil {
|
||||
// No Wrap, to avoid io.EOF clash
|
||||
return 0, xerrors.New("64bit immediate is missing second half")
|
||||
}
|
||||
if bi2.OpCode != 0 || bi2.Offset != 0 || bi2.Registers != 0 {
|
||||
return 0, xerrors.New("64bit immediate has non-zero fields")
|
||||
}
|
||||
ins.Constant = int64(uint64(uint32(bi2.Constant))<<32 | uint64(uint32(bi.Constant)))
|
||||
|
||||
return 2 * InstructionSize, nil
|
||||
}
|
||||
|
||||
// Marshal encodes a BPF instruction.
|
||||
func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error) {
|
||||
if ins.OpCode == InvalidOpCode {
|
||||
return 0, xerrors.New("invalid opcode")
|
||||
}
|
||||
|
||||
isDWordLoad := ins.OpCode.isDWordLoad()
|
||||
|
||||
cons := int32(ins.Constant)
|
||||
if isDWordLoad {
|
||||
// Encode least significant 32bit first for 64bit operations.
|
||||
cons = int32(uint32(ins.Constant))
|
||||
}
|
||||
|
||||
regs, err := newBPFRegisters(ins.Dst, ins.Src, bo)
|
||||
if err != nil {
|
||||
return 0, xerrors.Errorf("can't marshal registers: %s", err)
|
||||
}
|
||||
|
||||
bpfi := bpfInstruction{
|
||||
ins.OpCode,
|
||||
regs,
|
||||
ins.Offset,
|
||||
cons,
|
||||
}
|
||||
|
||||
if err := binary.Write(w, bo, &bpfi); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if !isDWordLoad {
|
||||
return InstructionSize, nil
|
||||
}
|
||||
|
||||
bpfi = bpfInstruction{
|
||||
Constant: int32(ins.Constant >> 32),
|
||||
}
|
||||
|
||||
if err := binary.Write(w, bo, &bpfi); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return 2 * InstructionSize, nil
|
||||
}
|
||||
|
||||
// RewriteMapPtr changes an instruction to use a new map fd.
|
||||
//
|
||||
// Returns an error if the instruction doesn't load a map.
|
||||
func (ins *Instruction) RewriteMapPtr(fd int) error {
|
||||
if !ins.OpCode.isDWordLoad() {
|
||||
return xerrors.Errorf("%s is not a 64 bit load", ins.OpCode)
|
||||
}
|
||||
|
||||
if ins.Src != PseudoMapFD && ins.Src != PseudoMapValue {
|
||||
return xerrors.New("not a load from a map")
|
||||
}
|
||||
|
||||
// Preserve the offset value for direct map loads.
|
||||
offset := uint64(ins.Constant) & (math.MaxUint32 << 32)
|
||||
rawFd := uint64(uint32(fd))
|
||||
ins.Constant = int64(offset | rawFd)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ins *Instruction) mapPtr() uint32 {
|
||||
return uint32(uint64(ins.Constant) & math.MaxUint32)
|
||||
}
|
||||
|
||||
// RewriteMapOffset changes the offset of a direct load from a map.
|
||||
//
|
||||
// Returns an error if the instruction is not a direct load.
|
||||
func (ins *Instruction) RewriteMapOffset(offset uint32) error {
|
||||
if !ins.OpCode.isDWordLoad() {
|
||||
return xerrors.Errorf("%s is not a 64 bit load", ins.OpCode)
|
||||
}
|
||||
|
||||
if ins.Src != PseudoMapValue {
|
||||
return xerrors.New("not a direct load from a map")
|
||||
}
|
||||
|
||||
fd := uint64(ins.Constant) & math.MaxUint32
|
||||
ins.Constant = int64(uint64(offset)<<32 | fd)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ins *Instruction) mapOffset() uint32 {
|
||||
return uint32(uint64(ins.Constant) >> 32)
|
||||
}
|
||||
|
||||
func (ins *Instruction) isLoadFromMap() bool {
|
||||
return ins.OpCode == LoadImmOp(DWord) && (ins.Src == PseudoMapFD || ins.Src == PseudoMapValue)
|
||||
}
|
||||
|
||||
// Format implements fmt.Formatter.
|
||||
func (ins Instruction) Format(f fmt.State, c rune) {
|
||||
if c != 'v' {
|
||||
fmt.Fprintf(f, "{UNRECOGNIZED: %c}", c)
|
||||
return
|
||||
}
|
||||
|
||||
op := ins.OpCode
|
||||
|
||||
if op == InvalidOpCode {
|
||||
fmt.Fprint(f, "INVALID")
|
||||
return
|
||||
}
|
||||
|
||||
// Omit trailing space for Exit
|
||||
if op.JumpOp() == Exit {
|
||||
fmt.Fprint(f, op)
|
||||
return
|
||||
}
|
||||
|
||||
if ins.isLoadFromMap() {
|
||||
fd := int32(ins.mapPtr())
|
||||
switch ins.Src {
|
||||
case PseudoMapFD:
|
||||
fmt.Fprintf(f, "LoadMapPtr dst: %s fd: %d", ins.Dst, fd)
|
||||
|
||||
case PseudoMapValue:
|
||||
fmt.Fprintf(f, "LoadMapValue dst: %s, fd: %d off: %d", ins.Dst, fd, ins.mapOffset())
|
||||
}
|
||||
|
||||
goto ref
|
||||
}
|
||||
|
||||
fmt.Fprintf(f, "%v ", op)
|
||||
switch cls := op.Class(); cls {
|
||||
case LdClass, LdXClass, StClass, StXClass:
|
||||
switch op.Mode() {
|
||||
case ImmMode:
|
||||
fmt.Fprintf(f, "dst: %s imm: %d", ins.Dst, ins.Constant)
|
||||
case AbsMode:
|
||||
fmt.Fprintf(f, "imm: %d", ins.Constant)
|
||||
case IndMode:
|
||||
fmt.Fprintf(f, "dst: %s src: %s imm: %d", ins.Dst, ins.Src, ins.Constant)
|
||||
case MemMode:
|
||||
fmt.Fprintf(f, "dst: %s src: %s off: %d imm: %d", ins.Dst, ins.Src, ins.Offset, ins.Constant)
|
||||
case XAddMode:
|
||||
fmt.Fprintf(f, "dst: %s src: %s", ins.Dst, ins.Src)
|
||||
}
|
||||
|
||||
case ALU64Class, ALUClass:
|
||||
fmt.Fprintf(f, "dst: %s ", ins.Dst)
|
||||
if op.ALUOp() == Swap || op.Source() == ImmSource {
|
||||
fmt.Fprintf(f, "imm: %d", ins.Constant)
|
||||
} else {
|
||||
fmt.Fprintf(f, "src: %s", ins.Src)
|
||||
}
|
||||
|
||||
case JumpClass:
|
||||
switch jop := op.JumpOp(); jop {
|
||||
case Call:
|
||||
if ins.Src == PseudoCall {
|
||||
// bpf-to-bpf call
|
||||
fmt.Fprint(f, ins.Constant)
|
||||
} else {
|
||||
fmt.Fprint(f, BuiltinFunc(ins.Constant))
|
||||
}
|
||||
|
||||
default:
|
||||
fmt.Fprintf(f, "dst: %s off: %d ", ins.Dst, ins.Offset)
|
||||
if op.Source() == ImmSource {
|
||||
fmt.Fprintf(f, "imm: %d", ins.Constant)
|
||||
} else {
|
||||
fmt.Fprintf(f, "src: %s", ins.Src)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ref:
|
||||
if ins.Reference != "" {
|
||||
fmt.Fprintf(f, " <%s>", ins.Reference)
|
||||
}
|
||||
}
|
||||
|
||||
// Instructions is an eBPF program.
|
||||
type Instructions []Instruction
|
||||
|
||||
func (insns Instructions) String() string {
|
||||
return fmt.Sprint(insns)
|
||||
}
|
||||
|
||||
// RewriteMapPtr rewrites all loads of a specific map pointer to a new fd.
|
||||
//
|
||||
// Returns an error if the symbol isn't used, see IsUnreferencedSymbol.
|
||||
func (insns Instructions) RewriteMapPtr(symbol string, fd int) error {
|
||||
if symbol == "" {
|
||||
return xerrors.New("empty symbol")
|
||||
}
|
||||
|
||||
found := false
|
||||
for i := range insns {
|
||||
ins := &insns[i]
|
||||
if ins.Reference != symbol {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := ins.RewriteMapPtr(fd); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
found = true
|
||||
}
|
||||
|
||||
if !found {
|
||||
return &unreferencedSymbolError{symbol}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SymbolOffsets returns the set of symbols and their offset in
|
||||
// the instructions.
|
||||
func (insns Instructions) SymbolOffsets() (map[string]int, error) {
|
||||
offsets := make(map[string]int)
|
||||
|
||||
for i, ins := range insns {
|
||||
if ins.Symbol == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := offsets[ins.Symbol]; ok {
|
||||
return nil, xerrors.Errorf("duplicate symbol %s", ins.Symbol)
|
||||
}
|
||||
|
||||
offsets[ins.Symbol] = i
|
||||
}
|
||||
|
||||
return offsets, nil
|
||||
}
|
||||
|
||||
// ReferenceOffsets returns the set of references and their offset in
|
||||
// the instructions.
|
||||
func (insns Instructions) ReferenceOffsets() map[string][]int {
|
||||
offsets := make(map[string][]int)
|
||||
|
||||
for i, ins := range insns {
|
||||
if ins.Reference == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
offsets[ins.Reference] = append(offsets[ins.Reference], i)
|
||||
}
|
||||
|
||||
return offsets
|
||||
}
|
||||
|
||||
func (insns Instructions) marshalledOffsets() (map[string]int, error) {
|
||||
symbols := make(map[string]int)
|
||||
|
||||
marshalledPos := 0
|
||||
for _, ins := range insns {
|
||||
currentPos := marshalledPos
|
||||
marshalledPos += ins.OpCode.marshalledInstructions()
|
||||
|
||||
if ins.Symbol == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := symbols[ins.Symbol]; ok {
|
||||
return nil, xerrors.Errorf("duplicate symbol %s", ins.Symbol)
|
||||
}
|
||||
|
||||
symbols[ins.Symbol] = currentPos
|
||||
}
|
||||
|
||||
return symbols, nil
|
||||
}
|
||||
|
||||
// Format implements fmt.Formatter.
|
||||
//
|
||||
// You can control indentation of symbols by
|
||||
// specifying a width. Setting a precision controls the indentation of
|
||||
// instructions.
|
||||
// The default character is a tab, which can be overriden by specifying
|
||||
// the ' ' space flag.
|
||||
func (insns Instructions) Format(f fmt.State, c rune) {
|
||||
if c != 's' && c != 'v' {
|
||||
fmt.Fprintf(f, "{UNKNOWN FORMAT '%c'}", c)
|
||||
return
|
||||
}
|
||||
|
||||
// Precision is better in this case, because it allows
|
||||
// specifying 0 padding easily.
|
||||
padding, ok := f.Precision()
|
||||
if !ok {
|
||||
padding = 1
|
||||
}
|
||||
|
||||
indent := strings.Repeat("\t", padding)
|
||||
if f.Flag(' ') {
|
||||
indent = strings.Repeat(" ", padding)
|
||||
}
|
||||
|
||||
symPadding, ok := f.Width()
|
||||
if !ok {
|
||||
symPadding = padding - 1
|
||||
}
|
||||
if symPadding < 0 {
|
||||
symPadding = 0
|
||||
}
|
||||
|
||||
symIndent := strings.Repeat("\t", symPadding)
|
||||
if f.Flag(' ') {
|
||||
symIndent = strings.Repeat(" ", symPadding)
|
||||
}
|
||||
|
||||
// Figure out how many digits we need to represent the highest
|
||||
// offset.
|
||||
highestOffset := 0
|
||||
for _, ins := range insns {
|
||||
highestOffset += ins.OpCode.marshalledInstructions()
|
||||
}
|
||||
offsetWidth := int(math.Ceil(math.Log10(float64(highestOffset))))
|
||||
|
||||
offset := 0
|
||||
for _, ins := range insns {
|
||||
if ins.Symbol != "" {
|
||||
fmt.Fprintf(f, "%s%s:\n", symIndent, ins.Symbol)
|
||||
}
|
||||
fmt.Fprintf(f, "%s%*d: %v\n", indent, offsetWidth, offset, ins)
|
||||
offset += ins.OpCode.marshalledInstructions()
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Marshal encodes a BPF program into the kernel format.
|
||||
func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error {
|
||||
absoluteOffsets, err := insns.marshalledOffsets()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
num := 0
|
||||
for i, ins := range insns {
|
||||
switch {
|
||||
case ins.OpCode.JumpOp() == Call && ins.Src == PseudoCall && ins.Constant == -1:
|
||||
// Rewrite bpf to bpf call
|
||||
offset, ok := absoluteOffsets[ins.Reference]
|
||||
if !ok {
|
||||
return xerrors.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference)
|
||||
}
|
||||
|
||||
ins.Constant = int64(offset - num - 1)
|
||||
|
||||
case ins.OpCode.Class() == JumpClass && ins.Offset == -1:
|
||||
// Rewrite jump to label
|
||||
offset, ok := absoluteOffsets[ins.Reference]
|
||||
if !ok {
|
||||
return xerrors.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference)
|
||||
}
|
||||
|
||||
ins.Offset = int16(offset - num - 1)
|
||||
}
|
||||
|
||||
n, err := ins.Marshal(w, bo)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("instruction %d: %w", i, err)
|
||||
}
|
||||
|
||||
num += int(n / InstructionSize)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type bpfInstruction struct {
|
||||
OpCode OpCode
|
||||
Registers bpfRegisters
|
||||
Offset int16
|
||||
Constant int32
|
||||
}
|
||||
|
||||
type bpfRegisters uint8
|
||||
|
||||
func newBPFRegisters(dst, src Register, bo binary.ByteOrder) (bpfRegisters, error) {
|
||||
switch bo {
|
||||
case binary.LittleEndian:
|
||||
return bpfRegisters((src << 4) | (dst & 0xF)), nil
|
||||
case binary.BigEndian:
|
||||
return bpfRegisters((dst << 4) | (src & 0xF)), nil
|
||||
default:
|
||||
return 0, xerrors.Errorf("unrecognized ByteOrder %T", bo)
|
||||
}
|
||||
}
|
||||
|
||||
func (r bpfRegisters) Unmarshal(bo binary.ByteOrder) (dst, src Register, err error) {
|
||||
switch bo {
|
||||
case binary.LittleEndian:
|
||||
return Register(r & 0xF), Register(r >> 4), nil
|
||||
case binary.BigEndian:
|
||||
return Register(r >> 4), Register(r & 0xf), nil
|
||||
default:
|
||||
return 0, 0, xerrors.Errorf("unrecognized ByteOrder %T", bo)
|
||||
}
|
||||
}
|
||||
|
||||
type unreferencedSymbolError struct {
|
||||
symbol string
|
||||
}
|
||||
|
||||
func (use *unreferencedSymbolError) Error() string {
|
||||
return fmt.Sprintf("unreferenced symbol %s", use.symbol)
|
||||
}
|
||||
|
||||
// IsUnreferencedSymbol returns true if err was caused by
|
||||
// an unreferenced symbol.
|
||||
func IsUnreferencedSymbol(err error) bool {
|
||||
_, ok := err.(*unreferencedSymbolError)
|
||||
return ok
|
||||
}
|
|
@ -0,0 +1,109 @@
|
|||
package asm
|
||||
|
||||
//go:generate stringer -output jump_string.go -type=JumpOp
|
||||
|
||||
// JumpOp affect control flow.
|
||||
//
|
||||
// msb lsb
|
||||
// +----+-+---+
|
||||
// |OP |s|cls|
|
||||
// +----+-+---+
|
||||
type JumpOp uint8
|
||||
|
||||
const jumpMask OpCode = aluMask
|
||||
|
||||
const (
|
||||
// InvalidJumpOp is returned by getters when invoked
|
||||
// on non branch OpCodes
|
||||
InvalidJumpOp JumpOp = 0xff
|
||||
// Ja jumps by offset unconditionally
|
||||
Ja JumpOp = 0x00
|
||||
// JEq jumps by offset if r == imm
|
||||
JEq JumpOp = 0x10
|
||||
// JGT jumps by offset if r > imm
|
||||
JGT JumpOp = 0x20
|
||||
// JGE jumps by offset if r >= imm
|
||||
JGE JumpOp = 0x30
|
||||
// JSet jumps by offset if r & imm
|
||||
JSet JumpOp = 0x40
|
||||
// JNE jumps by offset if r != imm
|
||||
JNE JumpOp = 0x50
|
||||
// JSGT jumps by offset if signed r > signed imm
|
||||
JSGT JumpOp = 0x60
|
||||
// JSGE jumps by offset if signed r >= signed imm
|
||||
JSGE JumpOp = 0x70
|
||||
// Call builtin or user defined function from imm
|
||||
Call JumpOp = 0x80
|
||||
// Exit ends execution, with value in r0
|
||||
Exit JumpOp = 0x90
|
||||
// JLT jumps by offset if r < imm
|
||||
JLT JumpOp = 0xa0
|
||||
// JLE jumps by offset if r <= imm
|
||||
JLE JumpOp = 0xb0
|
||||
// JSLT jumps by offset if signed r < signed imm
|
||||
JSLT JumpOp = 0xc0
|
||||
// JSLE jumps by offset if signed r <= signed imm
|
||||
JSLE JumpOp = 0xd0
|
||||
)
|
||||
|
||||
// Return emits an exit instruction.
|
||||
//
|
||||
// Requires a return value in R0.
|
||||
func Return() Instruction {
|
||||
return Instruction{
|
||||
OpCode: OpCode(JumpClass).SetJumpOp(Exit),
|
||||
}
|
||||
}
|
||||
|
||||
// Op returns the OpCode for a given jump source.
|
||||
func (op JumpOp) Op(source Source) OpCode {
|
||||
return OpCode(JumpClass).SetJumpOp(op).SetSource(source)
|
||||
}
|
||||
|
||||
// Imm compares dst to value, and adjusts PC by offset if the condition is fulfilled.
|
||||
func (op JumpOp) Imm(dst Register, value int32, label string) Instruction {
|
||||
if op == Exit || op == Call || op == Ja {
|
||||
return Instruction{OpCode: InvalidOpCode}
|
||||
}
|
||||
|
||||
return Instruction{
|
||||
OpCode: OpCode(JumpClass).SetJumpOp(op).SetSource(ImmSource),
|
||||
Dst: dst,
|
||||
Offset: -1,
|
||||
Constant: int64(value),
|
||||
Reference: label,
|
||||
}
|
||||
}
|
||||
|
||||
// Reg compares dst to src, and adjusts PC by offset if the condition is fulfilled.
|
||||
func (op JumpOp) Reg(dst, src Register, label string) Instruction {
|
||||
if op == Exit || op == Call || op == Ja {
|
||||
return Instruction{OpCode: InvalidOpCode}
|
||||
}
|
||||
|
||||
return Instruction{
|
||||
OpCode: OpCode(JumpClass).SetJumpOp(op).SetSource(RegSource),
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
Offset: -1,
|
||||
Reference: label,
|
||||
}
|
||||
}
|
||||
|
||||
// Label adjusts PC to the address of the label.
|
||||
func (op JumpOp) Label(label string) Instruction {
|
||||
if op == Call {
|
||||
return Instruction{
|
||||
OpCode: OpCode(JumpClass).SetJumpOp(Call),
|
||||
Src: PseudoCall,
|
||||
Constant: -1,
|
||||
Reference: label,
|
||||
}
|
||||
}
|
||||
|
||||
return Instruction{
|
||||
OpCode: OpCode(JumpClass).SetJumpOp(op),
|
||||
Offset: -1,
|
||||
Reference: label,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
// Code generated by "stringer -output jump_string.go -type=JumpOp"; DO NOT EDIT.
|
||||
|
||||
package asm
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[InvalidJumpOp-255]
|
||||
_ = x[Ja-0]
|
||||
_ = x[JEq-16]
|
||||
_ = x[JGT-32]
|
||||
_ = x[JGE-48]
|
||||
_ = x[JSet-64]
|
||||
_ = x[JNE-80]
|
||||
_ = x[JSGT-96]
|
||||
_ = x[JSGE-112]
|
||||
_ = x[Call-128]
|
||||
_ = x[Exit-144]
|
||||
_ = x[JLT-160]
|
||||
_ = x[JLE-176]
|
||||
_ = x[JSLT-192]
|
||||
_ = x[JSLE-208]
|
||||
}
|
||||
|
||||
const _JumpOp_name = "JaJEqJGTJGEJSetJNEJSGTJSGECallExitJLTJLEJSLTJSLEInvalidJumpOp"
|
||||
|
||||
var _JumpOp_map = map[JumpOp]string{
|
||||
0: _JumpOp_name[0:2],
|
||||
16: _JumpOp_name[2:5],
|
||||
32: _JumpOp_name[5:8],
|
||||
48: _JumpOp_name[8:11],
|
||||
64: _JumpOp_name[11:15],
|
||||
80: _JumpOp_name[15:18],
|
||||
96: _JumpOp_name[18:22],
|
||||
112: _JumpOp_name[22:26],
|
||||
128: _JumpOp_name[26:30],
|
||||
144: _JumpOp_name[30:34],
|
||||
160: _JumpOp_name[34:37],
|
||||
176: _JumpOp_name[37:40],
|
||||
192: _JumpOp_name[40:44],
|
||||
208: _JumpOp_name[44:48],
|
||||
255: _JumpOp_name[48:61],
|
||||
}
|
||||
|
||||
func (i JumpOp) String() string {
|
||||
if str, ok := _JumpOp_map[i]; ok {
|
||||
return str
|
||||
}
|
||||
return "JumpOp(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
|
@ -0,0 +1,204 @@
|
|||
package asm
|
||||
|
||||
//go:generate stringer -output load_store_string.go -type=Mode,Size
|
||||
|
||||
// Mode for load and store operations
|
||||
//
|
||||
// msb lsb
|
||||
// +---+--+---+
|
||||
// |MDE|sz|cls|
|
||||
// +---+--+---+
|
||||
type Mode uint8
|
||||
|
||||
const modeMask OpCode = 0xe0
|
||||
|
||||
const (
|
||||
// InvalidMode is returned by getters when invoked
|
||||
// on non load / store OpCodes
|
||||
InvalidMode Mode = 0xff
|
||||
// ImmMode - immediate value
|
||||
ImmMode Mode = 0x00
|
||||
// AbsMode - immediate value + offset
|
||||
AbsMode Mode = 0x20
|
||||
// IndMode - indirect (imm+src)
|
||||
IndMode Mode = 0x40
|
||||
// MemMode - load from memory
|
||||
MemMode Mode = 0x60
|
||||
// XAddMode - add atomically across processors.
|
||||
XAddMode Mode = 0xc0
|
||||
)
|
||||
|
||||
// Size of load and store operations
|
||||
//
|
||||
// msb lsb
|
||||
// +---+--+---+
|
||||
// |mde|SZ|cls|
|
||||
// +---+--+---+
|
||||
type Size uint8
|
||||
|
||||
const sizeMask OpCode = 0x18
|
||||
|
||||
const (
|
||||
// InvalidSize is returned by getters when invoked
|
||||
// on non load / store OpCodes
|
||||
InvalidSize Size = 0xff
|
||||
// DWord - double word; 64 bits
|
||||
DWord Size = 0x18
|
||||
// Word - word; 32 bits
|
||||
Word Size = 0x00
|
||||
// Half - half-word; 16 bits
|
||||
Half Size = 0x08
|
||||
// Byte - byte; 8 bits
|
||||
Byte Size = 0x10
|
||||
)
|
||||
|
||||
// Sizeof returns the size in bytes.
|
||||
func (s Size) Sizeof() int {
|
||||
switch s {
|
||||
case DWord:
|
||||
return 8
|
||||
case Word:
|
||||
return 4
|
||||
case Half:
|
||||
return 2
|
||||
case Byte:
|
||||
return 1
|
||||
default:
|
||||
return -1
|
||||
}
|
||||
}
|
||||
|
||||
// LoadMemOp returns the OpCode to load a value of given size from memory.
|
||||
func LoadMemOp(size Size) OpCode {
|
||||
return OpCode(LdXClass).SetMode(MemMode).SetSize(size)
|
||||
}
|
||||
|
||||
// LoadMem emits `dst = *(size *)(src + offset)`.
|
||||
func LoadMem(dst, src Register, offset int16, size Size) Instruction {
|
||||
return Instruction{
|
||||
OpCode: LoadMemOp(size),
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
Offset: offset,
|
||||
}
|
||||
}
|
||||
|
||||
// LoadImmOp returns the OpCode to load an immediate of given size.
|
||||
//
|
||||
// As of kernel 4.20, only DWord size is accepted.
|
||||
func LoadImmOp(size Size) OpCode {
|
||||
return OpCode(LdClass).SetMode(ImmMode).SetSize(size)
|
||||
}
|
||||
|
||||
// LoadImm emits `dst = (size)value`.
|
||||
//
|
||||
// As of kernel 4.20, only DWord size is accepted.
|
||||
func LoadImm(dst Register, value int64, size Size) Instruction {
|
||||
return Instruction{
|
||||
OpCode: LoadImmOp(size),
|
||||
Dst: dst,
|
||||
Constant: value,
|
||||
}
|
||||
}
|
||||
|
||||
// LoadMapPtr stores a pointer to a map in dst.
|
||||
func LoadMapPtr(dst Register, fd int) Instruction {
|
||||
if fd < 0 {
|
||||
return Instruction{OpCode: InvalidOpCode}
|
||||
}
|
||||
|
||||
return Instruction{
|
||||
OpCode: LoadImmOp(DWord),
|
||||
Dst: dst,
|
||||
Src: PseudoMapFD,
|
||||
Constant: int64(fd),
|
||||
}
|
||||
}
|
||||
|
||||
// LoadMapValue stores a pointer to the value at a certain offset of a map.
|
||||
func LoadMapValue(dst Register, fd int, offset uint32) Instruction {
|
||||
if fd < 0 {
|
||||
return Instruction{OpCode: InvalidOpCode}
|
||||
}
|
||||
|
||||
fdAndOffset := (uint64(offset) << 32) | uint64(uint32(fd))
|
||||
return Instruction{
|
||||
OpCode: LoadImmOp(DWord),
|
||||
Dst: dst,
|
||||
Src: PseudoMapValue,
|
||||
Constant: int64(fdAndOffset),
|
||||
}
|
||||
}
|
||||
|
||||
// LoadIndOp returns the OpCode for loading a value of given size from an sk_buff.
|
||||
func LoadIndOp(size Size) OpCode {
|
||||
return OpCode(LdClass).SetMode(IndMode).SetSize(size)
|
||||
}
|
||||
|
||||
// LoadInd emits `dst = ntoh(*(size *)(((sk_buff *)R6)->data + src + offset))`.
|
||||
func LoadInd(dst, src Register, offset int32, size Size) Instruction {
|
||||
return Instruction{
|
||||
OpCode: LoadIndOp(size),
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
Constant: int64(offset),
|
||||
}
|
||||
}
|
||||
|
||||
// LoadAbsOp returns the OpCode for loading a value of given size from an sk_buff.
|
||||
func LoadAbsOp(size Size) OpCode {
|
||||
return OpCode(LdClass).SetMode(AbsMode).SetSize(size)
|
||||
}
|
||||
|
||||
// LoadAbs emits `r0 = ntoh(*(size *)(((sk_buff *)R6)->data + offset))`.
|
||||
func LoadAbs(offset int32, size Size) Instruction {
|
||||
return Instruction{
|
||||
OpCode: LoadAbsOp(size),
|
||||
Dst: R0,
|
||||
Constant: int64(offset),
|
||||
}
|
||||
}
|
||||
|
||||
// StoreMemOp returns the OpCode for storing a register of given size in memory.
|
||||
func StoreMemOp(size Size) OpCode {
|
||||
return OpCode(StXClass).SetMode(MemMode).SetSize(size)
|
||||
}
|
||||
|
||||
// StoreMem emits `*(size *)(dst + offset) = src`
|
||||
func StoreMem(dst Register, offset int16, src Register, size Size) Instruction {
|
||||
return Instruction{
|
||||
OpCode: StoreMemOp(size),
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
Offset: offset,
|
||||
}
|
||||
}
|
||||
|
||||
// StoreImmOp returns the OpCode for storing an immediate of given size in memory.
|
||||
func StoreImmOp(size Size) OpCode {
|
||||
return OpCode(StClass).SetMode(MemMode).SetSize(size)
|
||||
}
|
||||
|
||||
// StoreImm emits `*(size *)(dst + offset) = value`.
|
||||
func StoreImm(dst Register, offset int16, value int64, size Size) Instruction {
|
||||
return Instruction{
|
||||
OpCode: StoreImmOp(size),
|
||||
Dst: dst,
|
||||
Offset: offset,
|
||||
Constant: value,
|
||||
}
|
||||
}
|
||||
|
||||
// StoreXAddOp returns the OpCode to atomically add a register to a value in memory.
|
||||
func StoreXAddOp(size Size) OpCode {
|
||||
return OpCode(StXClass).SetMode(XAddMode).SetSize(size)
|
||||
}
|
||||
|
||||
// StoreXAdd atomically adds src to *dst.
|
||||
func StoreXAdd(dst, src Register, size Size) Instruction {
|
||||
return Instruction{
|
||||
OpCode: StoreXAddOp(size),
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
// Code generated by "stringer -output load_store_string.go -type=Mode,Size"; DO NOT EDIT.
|
||||
|
||||
package asm
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[InvalidMode-255]
|
||||
_ = x[ImmMode-0]
|
||||
_ = x[AbsMode-32]
|
||||
_ = x[IndMode-64]
|
||||
_ = x[MemMode-96]
|
||||
_ = x[XAddMode-192]
|
||||
}
|
||||
|
||||
const (
|
||||
_Mode_name_0 = "ImmMode"
|
||||
_Mode_name_1 = "AbsMode"
|
||||
_Mode_name_2 = "IndMode"
|
||||
_Mode_name_3 = "MemMode"
|
||||
_Mode_name_4 = "XAddMode"
|
||||
_Mode_name_5 = "InvalidMode"
|
||||
)
|
||||
|
||||
func (i Mode) String() string {
|
||||
switch {
|
||||
case i == 0:
|
||||
return _Mode_name_0
|
||||
case i == 32:
|
||||
return _Mode_name_1
|
||||
case i == 64:
|
||||
return _Mode_name_2
|
||||
case i == 96:
|
||||
return _Mode_name_3
|
||||
case i == 192:
|
||||
return _Mode_name_4
|
||||
case i == 255:
|
||||
return _Mode_name_5
|
||||
default:
|
||||
return "Mode(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[InvalidSize-255]
|
||||
_ = x[DWord-24]
|
||||
_ = x[Word-0]
|
||||
_ = x[Half-8]
|
||||
_ = x[Byte-16]
|
||||
}
|
||||
|
||||
const (
|
||||
_Size_name_0 = "Word"
|
||||
_Size_name_1 = "Half"
|
||||
_Size_name_2 = "Byte"
|
||||
_Size_name_3 = "DWord"
|
||||
_Size_name_4 = "InvalidSize"
|
||||
)
|
||||
|
||||
func (i Size) String() string {
|
||||
switch {
|
||||
case i == 0:
|
||||
return _Size_name_0
|
||||
case i == 8:
|
||||
return _Size_name_1
|
||||
case i == 16:
|
||||
return _Size_name_2
|
||||
case i == 24:
|
||||
return _Size_name_3
|
||||
case i == 255:
|
||||
return _Size_name_4
|
||||
default:
|
||||
return "Size(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,237 @@
|
|||
package asm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
//go:generate stringer -output opcode_string.go -type=Class
|
||||
|
||||
type encoding int
|
||||
|
||||
const (
|
||||
unknownEncoding encoding = iota
|
||||
loadOrStore
|
||||
jumpOrALU
|
||||
)
|
||||
|
||||
// Class of operations
|
||||
//
|
||||
// msb lsb
|
||||
// +---+--+---+
|
||||
// | ?? |CLS|
|
||||
// +---+--+---+
|
||||
type Class uint8
|
||||
|
||||
const classMask OpCode = 0x07
|
||||
|
||||
const (
|
||||
// LdClass load memory
|
||||
LdClass Class = 0x00
|
||||
// LdXClass load memory from constant
|
||||
LdXClass Class = 0x01
|
||||
// StClass load register from memory
|
||||
StClass Class = 0x02
|
||||
// StXClass load register from constant
|
||||
StXClass Class = 0x03
|
||||
// ALUClass arithmetic operators
|
||||
ALUClass Class = 0x04
|
||||
// JumpClass jump operators
|
||||
JumpClass Class = 0x05
|
||||
// ALU64Class arithmetic in 64 bit mode
|
||||
ALU64Class Class = 0x07
|
||||
)
|
||||
|
||||
func (cls Class) encoding() encoding {
|
||||
switch cls {
|
||||
case LdClass, LdXClass, StClass, StXClass:
|
||||
return loadOrStore
|
||||
case ALU64Class, ALUClass, JumpClass:
|
||||
return jumpOrALU
|
||||
default:
|
||||
return unknownEncoding
|
||||
}
|
||||
}
|
||||
|
||||
// OpCode is a packed eBPF opcode.
|
||||
//
|
||||
// Its encoding is defined by a Class value:
|
||||
//
|
||||
// msb lsb
|
||||
// +----+-+---+
|
||||
// | ???? |CLS|
|
||||
// +----+-+---+
|
||||
type OpCode uint8
|
||||
|
||||
// InvalidOpCode is returned by setters on OpCode
|
||||
const InvalidOpCode OpCode = 0xff
|
||||
|
||||
// marshalledInstructions returns the number of BPF instructions required
|
||||
// to encode this opcode.
|
||||
func (op OpCode) marshalledInstructions() int {
|
||||
if op == LoadImmOp(DWord) {
|
||||
return 2
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
func (op OpCode) isDWordLoad() bool {
|
||||
return op == LoadImmOp(DWord)
|
||||
}
|
||||
|
||||
// Class returns the class of operation.
|
||||
func (op OpCode) Class() Class {
|
||||
return Class(op & classMask)
|
||||
}
|
||||
|
||||
// Mode returns the mode for load and store operations.
|
||||
func (op OpCode) Mode() Mode {
|
||||
if op.Class().encoding() != loadOrStore {
|
||||
return InvalidMode
|
||||
}
|
||||
return Mode(op & modeMask)
|
||||
}
|
||||
|
||||
// Size returns the size for load and store operations.
|
||||
func (op OpCode) Size() Size {
|
||||
if op.Class().encoding() != loadOrStore {
|
||||
return InvalidSize
|
||||
}
|
||||
return Size(op & sizeMask)
|
||||
}
|
||||
|
||||
// Source returns the source for branch and ALU operations.
|
||||
func (op OpCode) Source() Source {
|
||||
if op.Class().encoding() != jumpOrALU || op.ALUOp() == Swap {
|
||||
return InvalidSource
|
||||
}
|
||||
return Source(op & sourceMask)
|
||||
}
|
||||
|
||||
// ALUOp returns the ALUOp.
|
||||
func (op OpCode) ALUOp() ALUOp {
|
||||
if op.Class().encoding() != jumpOrALU {
|
||||
return InvalidALUOp
|
||||
}
|
||||
return ALUOp(op & aluMask)
|
||||
}
|
||||
|
||||
// Endianness returns the Endianness for a byte swap instruction.
|
||||
func (op OpCode) Endianness() Endianness {
|
||||
if op.ALUOp() != Swap {
|
||||
return InvalidEndian
|
||||
}
|
||||
return Endianness(op & endianMask)
|
||||
}
|
||||
|
||||
// JumpOp returns the JumpOp.
|
||||
func (op OpCode) JumpOp() JumpOp {
|
||||
if op.Class().encoding() != jumpOrALU {
|
||||
return InvalidJumpOp
|
||||
}
|
||||
return JumpOp(op & jumpMask)
|
||||
}
|
||||
|
||||
// SetMode sets the mode on load and store operations.
|
||||
//
|
||||
// Returns InvalidOpCode if op is of the wrong class.
|
||||
func (op OpCode) SetMode(mode Mode) OpCode {
|
||||
if op.Class().encoding() != loadOrStore || !valid(OpCode(mode), modeMask) {
|
||||
return InvalidOpCode
|
||||
}
|
||||
return (op & ^modeMask) | OpCode(mode)
|
||||
}
|
||||
|
||||
// SetSize sets the size on load and store operations.
|
||||
//
|
||||
// Returns InvalidOpCode if op is of the wrong class.
|
||||
func (op OpCode) SetSize(size Size) OpCode {
|
||||
if op.Class().encoding() != loadOrStore || !valid(OpCode(size), sizeMask) {
|
||||
return InvalidOpCode
|
||||
}
|
||||
return (op & ^sizeMask) | OpCode(size)
|
||||
}
|
||||
|
||||
// SetSource sets the source on jump and ALU operations.
|
||||
//
|
||||
// Returns InvalidOpCode if op is of the wrong class.
|
||||
func (op OpCode) SetSource(source Source) OpCode {
|
||||
if op.Class().encoding() != jumpOrALU || !valid(OpCode(source), sourceMask) {
|
||||
return InvalidOpCode
|
||||
}
|
||||
return (op & ^sourceMask) | OpCode(source)
|
||||
}
|
||||
|
||||
// SetALUOp sets the ALUOp on ALU operations.
|
||||
//
|
||||
// Returns InvalidOpCode if op is of the wrong class.
|
||||
func (op OpCode) SetALUOp(alu ALUOp) OpCode {
|
||||
class := op.Class()
|
||||
if (class != ALUClass && class != ALU64Class) || !valid(OpCode(alu), aluMask) {
|
||||
return InvalidOpCode
|
||||
}
|
||||
return (op & ^aluMask) | OpCode(alu)
|
||||
}
|
||||
|
||||
// SetJumpOp sets the JumpOp on jump operations.
|
||||
//
|
||||
// Returns InvalidOpCode if op is of the wrong class.
|
||||
func (op OpCode) SetJumpOp(jump JumpOp) OpCode {
|
||||
if op.Class() != JumpClass || !valid(OpCode(jump), jumpMask) {
|
||||
return InvalidOpCode
|
||||
}
|
||||
return (op & ^jumpMask) | OpCode(jump)
|
||||
}
|
||||
|
||||
func (op OpCode) String() string {
|
||||
var f strings.Builder
|
||||
|
||||
switch class := op.Class(); class {
|
||||
case LdClass, LdXClass, StClass, StXClass:
|
||||
f.WriteString(strings.TrimSuffix(class.String(), "Class"))
|
||||
|
||||
mode := op.Mode()
|
||||
f.WriteString(strings.TrimSuffix(mode.String(), "Mode"))
|
||||
|
||||
switch op.Size() {
|
||||
case DWord:
|
||||
f.WriteString("DW")
|
||||
case Word:
|
||||
f.WriteString("W")
|
||||
case Half:
|
||||
f.WriteString("H")
|
||||
case Byte:
|
||||
f.WriteString("B")
|
||||
}
|
||||
|
||||
case ALU64Class, ALUClass:
|
||||
f.WriteString(op.ALUOp().String())
|
||||
|
||||
if op.ALUOp() == Swap {
|
||||
// Width for Endian is controlled by Constant
|
||||
f.WriteString(op.Endianness().String())
|
||||
} else {
|
||||
if class == ALUClass {
|
||||
f.WriteString("32")
|
||||
}
|
||||
|
||||
f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
|
||||
}
|
||||
|
||||
case JumpClass:
|
||||
f.WriteString(op.JumpOp().String())
|
||||
if jop := op.JumpOp(); jop != Exit && jop != Call {
|
||||
f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
|
||||
}
|
||||
|
||||
default:
|
||||
fmt.Fprintf(&f, "OpCode(%#x)", uint8(op))
|
||||
}
|
||||
|
||||
return f.String()
|
||||
}
|
||||
|
||||
// valid returns true if all bits in value are covered by mask.
|
||||
func valid(value, mask OpCode) bool {
|
||||
return value & ^mask == 0
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
// Code generated by "stringer -output opcode_string.go -type=Class"; DO NOT EDIT.
|
||||
|
||||
package asm
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[LdClass-0]
|
||||
_ = x[LdXClass-1]
|
||||
_ = x[StClass-2]
|
||||
_ = x[StXClass-3]
|
||||
_ = x[ALUClass-4]
|
||||
_ = x[JumpClass-5]
|
||||
_ = x[ALU64Class-7]
|
||||
}
|
||||
|
||||
const (
|
||||
_Class_name_0 = "LdClassLdXClassStClassStXClassALUClassJumpClass"
|
||||
_Class_name_1 = "ALU64Class"
|
||||
)
|
||||
|
||||
var (
|
||||
_Class_index_0 = [...]uint8{0, 7, 15, 22, 30, 38, 47}
|
||||
)
|
||||
|
||||
func (i Class) String() string {
|
||||
switch {
|
||||
case 0 <= i && i <= 5:
|
||||
return _Class_name_0[_Class_index_0[i]:_Class_index_0[i+1]]
|
||||
case i == 7:
|
||||
return _Class_name_1
|
||||
default:
|
||||
return "Class(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package asm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Register is the source or destination of most operations.
|
||||
type Register uint8
|
||||
|
||||
// R0 contains return values.
|
||||
const R0 Register = 0
|
||||
|
||||
// Registers for function arguments.
|
||||
const (
|
||||
R1 Register = R0 + 1 + iota
|
||||
R2
|
||||
R3
|
||||
R4
|
||||
R5
|
||||
)
|
||||
|
||||
// Callee saved registers preserved by function calls.
|
||||
const (
|
||||
R6 Register = R5 + 1 + iota
|
||||
R7
|
||||
R8
|
||||
R9
|
||||
)
|
||||
|
||||
// Read-only frame pointer to access stack.
|
||||
const (
|
||||
R10 Register = R9 + 1
|
||||
RFP = R10
|
||||
)
|
||||
|
||||
// Pseudo registers used by 64bit loads and jumps
|
||||
const (
|
||||
PseudoMapFD = R1 // BPF_PSEUDO_MAP_FD
|
||||
PseudoMapValue = R2 // BPF_PSEUDO_MAP_VALUE
|
||||
PseudoCall = R1 // BPF_PSEUDO_CALL
|
||||
)
|
||||
|
||||
func (r Register) String() string {
|
||||
v := uint8(r)
|
||||
if v == 10 {
|
||||
return "rfp"
|
||||
}
|
||||
return fmt.Sprintf("r%d", v)
|
||||
}
|
|
@ -0,0 +1,293 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/cilium/ebpf/internal"
|
||||
"github.com/cilium/ebpf/internal/btf"
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
// CollectionOptions control loading a collection into the kernel.
|
||||
type CollectionOptions struct {
|
||||
Programs ProgramOptions
|
||||
}
|
||||
|
||||
// CollectionSpec describes a collection.
|
||||
type CollectionSpec struct {
|
||||
Maps map[string]*MapSpec
|
||||
Programs map[string]*ProgramSpec
|
||||
}
|
||||
|
||||
// Copy returns a recursive copy of the spec.
|
||||
func (cs *CollectionSpec) Copy() *CollectionSpec {
|
||||
if cs == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
cpy := CollectionSpec{
|
||||
Maps: make(map[string]*MapSpec, len(cs.Maps)),
|
||||
Programs: make(map[string]*ProgramSpec, len(cs.Programs)),
|
||||
}
|
||||
|
||||
for name, spec := range cs.Maps {
|
||||
cpy.Maps[name] = spec.Copy()
|
||||
}
|
||||
|
||||
for name, spec := range cs.Programs {
|
||||
cpy.Programs[name] = spec.Copy()
|
||||
}
|
||||
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// RewriteMaps replaces all references to specific maps.
|
||||
//
|
||||
// Use this function to use pre-existing maps instead of creating new ones
|
||||
// when calling NewCollection. Any named maps are removed from CollectionSpec.Maps.
|
||||
//
|
||||
// Returns an error if a named map isn't used in at least one program.
|
||||
func (cs *CollectionSpec) RewriteMaps(maps map[string]*Map) error {
|
||||
for symbol, m := range maps {
|
||||
// have we seen a program that uses this symbol / map
|
||||
seen := false
|
||||
fd := m.FD()
|
||||
for progName, progSpec := range cs.Programs {
|
||||
err := progSpec.Instructions.RewriteMapPtr(symbol, fd)
|
||||
|
||||
switch {
|
||||
case err == nil:
|
||||
seen = true
|
||||
|
||||
case asm.IsUnreferencedSymbol(err):
|
||||
// Not all programs need to use the map
|
||||
|
||||
default:
|
||||
return xerrors.Errorf("program %s: %w", progName, err)
|
||||
}
|
||||
}
|
||||
|
||||
if !seen {
|
||||
return xerrors.Errorf("map %s not referenced by any programs", symbol)
|
||||
}
|
||||
|
||||
// Prevent NewCollection from creating rewritten maps
|
||||
delete(cs.Maps, symbol)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RewriteConstants replaces the value of multiple constants.
|
||||
//
|
||||
// The constant must be defined like so in the C program:
|
||||
//
|
||||
// static volatile const type foobar;
|
||||
// static volatile const type foobar = default;
|
||||
//
|
||||
// Replacement values must be of the same length as the C sizeof(type).
|
||||
// If necessary, they are marshalled according to the same rules as
|
||||
// map values.
|
||||
//
|
||||
// From Linux 5.5 the verifier will use constants to eliminate dead code.
|
||||
//
|
||||
// Returns an error if a constant doesn't exist.
|
||||
func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error {
|
||||
rodata := cs.Maps[".rodata"]
|
||||
if rodata == nil {
|
||||
return xerrors.New("missing .rodata section")
|
||||
}
|
||||
|
||||
if rodata.BTF == nil {
|
||||
return xerrors.New(".rodata section has no BTF")
|
||||
}
|
||||
|
||||
if n := len(rodata.Contents); n != 1 {
|
||||
return xerrors.Errorf("expected one key in .rodata, found %d", n)
|
||||
}
|
||||
|
||||
kv := rodata.Contents[0]
|
||||
value, ok := kv.Value.([]byte)
|
||||
if !ok {
|
||||
return xerrors.Errorf("first value in .rodata is %T not []byte", kv.Value)
|
||||
}
|
||||
|
||||
buf := make([]byte, len(value))
|
||||
copy(buf, value)
|
||||
|
||||
err := patchValue(buf, btf.MapValue(rodata.BTF), consts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rodata.Contents[0] = MapKV{kv.Key, buf}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Collection is a collection of Programs and Maps associated
|
||||
// with their symbols
|
||||
type Collection struct {
|
||||
Programs map[string]*Program
|
||||
Maps map[string]*Map
|
||||
}
|
||||
|
||||
// NewCollection creates a Collection from a specification.
|
||||
//
|
||||
// Only maps referenced by at least one of the programs are initialized.
|
||||
func NewCollection(spec *CollectionSpec) (*Collection, error) {
|
||||
return NewCollectionWithOptions(spec, CollectionOptions{})
|
||||
}
|
||||
|
||||
// NewCollectionWithOptions creates a Collection from a specification.
|
||||
//
|
||||
// Only maps referenced by at least one of the programs are initialized.
|
||||
func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (coll *Collection, err error) {
|
||||
var (
|
||||
maps = make(map[string]*Map)
|
||||
progs = make(map[string]*Program)
|
||||
btfs = make(map[*btf.Spec]*btf.Handle)
|
||||
)
|
||||
|
||||
defer func() {
|
||||
for _, btf := range btfs {
|
||||
btf.Close()
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
|
||||
for _, m := range maps {
|
||||
m.Close()
|
||||
}
|
||||
|
||||
for _, p := range progs {
|
||||
p.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
loadBTF := func(spec *btf.Spec) (*btf.Handle, error) {
|
||||
if btfs[spec] != nil {
|
||||
return btfs[spec], nil
|
||||
}
|
||||
|
||||
handle, err := btf.NewHandle(spec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
btfs[spec] = handle
|
||||
return handle, nil
|
||||
}
|
||||
|
||||
for mapName, mapSpec := range spec.Maps {
|
||||
var handle *btf.Handle
|
||||
if mapSpec.BTF != nil {
|
||||
handle, err = loadBTF(btf.MapSpec(mapSpec.BTF))
|
||||
if err != nil && !xerrors.Is(err, btf.ErrNotSupported) {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
m, err := newMapWithBTF(mapSpec, handle)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("map %s: %w", mapName, err)
|
||||
}
|
||||
maps[mapName] = m
|
||||
}
|
||||
|
||||
for progName, origProgSpec := range spec.Programs {
|
||||
progSpec := origProgSpec.Copy()
|
||||
|
||||
// Rewrite any reference to a valid map.
|
||||
for i := range progSpec.Instructions {
|
||||
ins := &progSpec.Instructions[i]
|
||||
|
||||
if ins.OpCode != asm.LoadImmOp(asm.DWord) || ins.Reference == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if uint32(ins.Constant) != math.MaxUint32 {
|
||||
// Don't overwrite maps already rewritten, users can
|
||||
// rewrite programs in the spec themselves
|
||||
continue
|
||||
}
|
||||
|
||||
m := maps[ins.Reference]
|
||||
if m == nil {
|
||||
return nil, xerrors.Errorf("program %s: missing map %s", progName, ins.Reference)
|
||||
}
|
||||
|
||||
fd := m.FD()
|
||||
if fd < 0 {
|
||||
return nil, xerrors.Errorf("map %s: %w", ins.Reference, internal.ErrClosedFd)
|
||||
}
|
||||
if err := ins.RewriteMapPtr(m.FD()); err != nil {
|
||||
return nil, xerrors.Errorf("progam %s: map %s: %w", progName, ins.Reference, err)
|
||||
}
|
||||
}
|
||||
|
||||
var handle *btf.Handle
|
||||
if progSpec.BTF != nil {
|
||||
handle, err = loadBTF(btf.ProgramSpec(progSpec.BTF))
|
||||
if err != nil && !xerrors.Is(err, btf.ErrNotSupported) {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
prog, err := newProgramWithBTF(progSpec, handle, opts.Programs)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("program %s: %w", progName, err)
|
||||
}
|
||||
progs[progName] = prog
|
||||
}
|
||||
|
||||
return &Collection{
|
||||
progs,
|
||||
maps,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// LoadCollection parses an object file and converts it to a collection.
|
||||
func LoadCollection(file string) (*Collection, error) {
|
||||
spec, err := LoadCollectionSpec(file)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewCollection(spec)
|
||||
}
|
||||
|
||||
// Close frees all maps and programs associated with the collection.
|
||||
//
|
||||
// The collection mustn't be used afterwards.
|
||||
func (coll *Collection) Close() {
|
||||
for _, prog := range coll.Programs {
|
||||
prog.Close()
|
||||
}
|
||||
for _, m := range coll.Maps {
|
||||
m.Close()
|
||||
}
|
||||
}
|
||||
|
||||
// DetachMap removes the named map from the Collection.
|
||||
//
|
||||
// This means that a later call to Close() will not affect this map.
|
||||
//
|
||||
// Returns nil if no map of that name exists.
|
||||
func (coll *Collection) DetachMap(name string) *Map {
|
||||
m := coll.Maps[name]
|
||||
delete(coll.Maps, name)
|
||||
return m
|
||||
}
|
||||
|
||||
// DetachProgram removes the named program from the Collection.
|
||||
//
|
||||
// This means that a later call to Close() will not affect this program.
|
||||
//
|
||||
// Returns nil if no program of that name exists.
|
||||
func (coll *Collection) DetachProgram(name string) *Program {
|
||||
p := coll.Programs[name]
|
||||
delete(coll.Programs, name)
|
||||
return p
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
// Package ebpf is a toolkit for working with eBPF programs.
|
||||
//
|
||||
// eBPF programs are small snippets of code which are executed directly
|
||||
// in a VM in the Linux kernel, which makes them very fast and flexible.
|
||||
// Many Linux subsystems now accept eBPF programs. This makes it possible
|
||||
// to implement highly application specific logic inside the kernel,
|
||||
// without having to modify the actual kernel itself.
|
||||
//
|
||||
// This package is designed for long-running processes which
|
||||
// want to use eBPF to implement part of their application logic. It has no
|
||||
// run-time dependencies outside of the library and the Linux kernel itself.
|
||||
// eBPF code should be compiled ahead of time using clang, and shipped with
|
||||
// your application as any other resource.
|
||||
//
|
||||
// This package doesn't include code required to attach eBPF to Linux
|
||||
// subsystems, since this varies per subsystem.
|
||||
package ebpf
|
|
@ -0,0 +1,698 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"debug/elf"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/cilium/ebpf/internal"
|
||||
"github.com/cilium/ebpf/internal/btf"
|
||||
"github.com/cilium/ebpf/internal/unix"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
type elfCode struct {
|
||||
*elf.File
|
||||
symbols []elf.Symbol
|
||||
symbolsPerSection map[elf.SectionIndex]map[uint64]elf.Symbol
|
||||
license string
|
||||
version uint32
|
||||
}
|
||||
|
||||
// LoadCollectionSpec parses an ELF file into a CollectionSpec.
|
||||
func LoadCollectionSpec(file string) (*CollectionSpec, error) {
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
spec, err := LoadCollectionSpecFromReader(f)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("file %s: %w", file, err)
|
||||
}
|
||||
return spec, nil
|
||||
}
|
||||
|
||||
// LoadCollectionSpecFromReader parses an ELF file into a CollectionSpec.
|
||||
func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
|
||||
f, err := elf.NewFile(rd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
symbols, err := f.Symbols()
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("load symbols: %v", err)
|
||||
}
|
||||
|
||||
ec := &elfCode{f, symbols, symbolsPerSection(symbols), "", 0}
|
||||
|
||||
var (
|
||||
licenseSection *elf.Section
|
||||
versionSection *elf.Section
|
||||
btfMaps = make(map[elf.SectionIndex]*elf.Section)
|
||||
progSections = make(map[elf.SectionIndex]*elf.Section)
|
||||
relSections = make(map[elf.SectionIndex]*elf.Section)
|
||||
mapSections = make(map[elf.SectionIndex]*elf.Section)
|
||||
dataSections = make(map[elf.SectionIndex]*elf.Section)
|
||||
)
|
||||
|
||||
for i, sec := range ec.Sections {
|
||||
switch {
|
||||
case strings.HasPrefix(sec.Name, "license"):
|
||||
licenseSection = sec
|
||||
case strings.HasPrefix(sec.Name, "version"):
|
||||
versionSection = sec
|
||||
case strings.HasPrefix(sec.Name, "maps"):
|
||||
mapSections[elf.SectionIndex(i)] = sec
|
||||
case sec.Name == ".maps":
|
||||
btfMaps[elf.SectionIndex(i)] = sec
|
||||
case sec.Name == ".bss" || sec.Name == ".rodata" || sec.Name == ".data":
|
||||
dataSections[elf.SectionIndex(i)] = sec
|
||||
case sec.Type == elf.SHT_REL:
|
||||
if int(sec.Info) >= len(ec.Sections) {
|
||||
return nil, xerrors.Errorf("found relocation section %v for missing section %v", i, sec.Info)
|
||||
}
|
||||
|
||||
// Store relocations under the section index of the target
|
||||
idx := elf.SectionIndex(sec.Info)
|
||||
if relSections[idx] != nil {
|
||||
return nil, xerrors.Errorf("section %d has multiple relocation sections", sec.Info)
|
||||
}
|
||||
relSections[idx] = sec
|
||||
case sec.Type == elf.SHT_PROGBITS && (sec.Flags&elf.SHF_EXECINSTR) != 0 && sec.Size > 0:
|
||||
progSections[elf.SectionIndex(i)] = sec
|
||||
}
|
||||
}
|
||||
|
||||
ec.license, err = loadLicense(licenseSection)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("load license: %w", err)
|
||||
}
|
||||
|
||||
ec.version, err = loadVersion(versionSection, ec.ByteOrder)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("load version: %w", err)
|
||||
}
|
||||
|
||||
btfSpec, err := btf.LoadSpecFromReader(rd)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("load BTF: %w", err)
|
||||
}
|
||||
|
||||
maps := make(map[string]*MapSpec)
|
||||
if err := ec.loadMaps(maps, mapSections); err != nil {
|
||||
return nil, xerrors.Errorf("load maps: %w", err)
|
||||
}
|
||||
|
||||
if len(btfMaps) > 0 {
|
||||
if err := ec.loadBTFMaps(maps, btfMaps, btfSpec); err != nil {
|
||||
return nil, xerrors.Errorf("load BTF maps: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(dataSections) > 0 {
|
||||
if err := ec.loadDataSections(maps, dataSections, btfSpec); err != nil {
|
||||
return nil, xerrors.Errorf("load data sections: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
relocations, err := ec.loadRelocations(relSections)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("load relocations: %w", err)
|
||||
}
|
||||
|
||||
progs, err := ec.loadPrograms(progSections, relocations, btfSpec)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("load programs: %w", err)
|
||||
}
|
||||
|
||||
return &CollectionSpec{maps, progs}, nil
|
||||
}
|
||||
|
||||
func loadLicense(sec *elf.Section) (string, error) {
|
||||
if sec == nil {
|
||||
return "", xerrors.New("missing license section")
|
||||
}
|
||||
data, err := sec.Data()
|
||||
if err != nil {
|
||||
return "", xerrors.Errorf("section %s: %v", sec.Name, err)
|
||||
}
|
||||
return string(bytes.TrimRight(data, "\000")), nil
|
||||
}
|
||||
|
||||
func loadVersion(sec *elf.Section, bo binary.ByteOrder) (uint32, error) {
|
||||
if sec == nil {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
var version uint32
|
||||
if err := binary.Read(sec.Open(), bo, &version); err != nil {
|
||||
return 0, xerrors.Errorf("section %s: %v", sec.Name, err)
|
||||
}
|
||||
return version, nil
|
||||
}
|
||||
|
||||
func (ec *elfCode) loadPrograms(progSections map[elf.SectionIndex]*elf.Section, relocations map[elf.SectionIndex]map[uint64]elf.Symbol, btf *btf.Spec) (map[string]*ProgramSpec, error) {
|
||||
var (
|
||||
progs []*ProgramSpec
|
||||
libs []*ProgramSpec
|
||||
)
|
||||
|
||||
for idx, sec := range progSections {
|
||||
syms := ec.symbolsPerSection[idx]
|
||||
if len(syms) == 0 {
|
||||
return nil, xerrors.Errorf("section %v: missing symbols", sec.Name)
|
||||
}
|
||||
|
||||
funcSym, ok := syms[0]
|
||||
if !ok {
|
||||
return nil, xerrors.Errorf("section %v: no label at start", sec.Name)
|
||||
}
|
||||
|
||||
insns, length, err := ec.loadInstructions(sec, syms, relocations[idx])
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("program %s: can't unmarshal instructions: %w", funcSym.Name, err)
|
||||
}
|
||||
|
||||
progType, attachType, attachTo := getProgType(sec.Name)
|
||||
|
||||
spec := &ProgramSpec{
|
||||
Name: funcSym.Name,
|
||||
Type: progType,
|
||||
AttachType: attachType,
|
||||
AttachTo: attachTo,
|
||||
License: ec.license,
|
||||
KernelVersion: ec.version,
|
||||
Instructions: insns,
|
||||
ByteOrder: ec.ByteOrder,
|
||||
}
|
||||
|
||||
if btf != nil {
|
||||
spec.BTF, err = btf.Program(sec.Name, length)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("BTF for section %s (program %s): %w", sec.Name, funcSym.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
if spec.Type == UnspecifiedProgram {
|
||||
// There is no single name we can use for "library" sections,
|
||||
// since they may contain multiple functions. We'll decode the
|
||||
// labels they contain later on, and then link sections that way.
|
||||
libs = append(libs, spec)
|
||||
} else {
|
||||
progs = append(progs, spec)
|
||||
}
|
||||
}
|
||||
|
||||
res := make(map[string]*ProgramSpec, len(progs))
|
||||
for _, prog := range progs {
|
||||
err := link(prog, libs)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("program %s: %w", prog.Name, err)
|
||||
}
|
||||
res[prog.Name] = prog
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (ec *elfCode) loadInstructions(section *elf.Section, symbols, relocations map[uint64]elf.Symbol) (asm.Instructions, uint64, error) {
|
||||
var (
|
||||
r = section.Open()
|
||||
insns asm.Instructions
|
||||
offset uint64
|
||||
)
|
||||
for {
|
||||
var ins asm.Instruction
|
||||
n, err := ins.Unmarshal(r, ec.ByteOrder)
|
||||
if err == io.EOF {
|
||||
return insns, offset, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, 0, xerrors.Errorf("offset %d: %w", offset, err)
|
||||
}
|
||||
|
||||
ins.Symbol = symbols[offset].Name
|
||||
|
||||
if rel, ok := relocations[offset]; ok {
|
||||
if err = ec.relocateInstruction(&ins, rel); err != nil {
|
||||
return nil, 0, xerrors.Errorf("offset %d: can't relocate instruction: %w", offset, err)
|
||||
}
|
||||
}
|
||||
|
||||
insns = append(insns, ins)
|
||||
offset += n
|
||||
}
|
||||
}
|
||||
|
||||
func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) error {
|
||||
var (
|
||||
typ = elf.ST_TYPE(rel.Info)
|
||||
bind = elf.ST_BIND(rel.Info)
|
||||
name = rel.Name
|
||||
)
|
||||
|
||||
if typ == elf.STT_SECTION {
|
||||
// Symbols with section type do not have a name set. Get it
|
||||
// from the section itself.
|
||||
idx := int(rel.Section)
|
||||
if idx > len(ec.Sections) {
|
||||
return xerrors.New("out-of-bounds section index")
|
||||
}
|
||||
|
||||
name = ec.Sections[idx].Name
|
||||
}
|
||||
|
||||
outer:
|
||||
switch {
|
||||
case ins.OpCode == asm.LoadImmOp(asm.DWord):
|
||||
// There are two distinct types of a load from a map:
|
||||
// a direct one, where the value is extracted without
|
||||
// a call to map_lookup_elem in eBPF, and an indirect one
|
||||
// that goes via the helper. They are distinguished by
|
||||
// different relocations.
|
||||
switch typ {
|
||||
case elf.STT_SECTION:
|
||||
// This is a direct load since the referenced symbol is a
|
||||
// section. Weirdly, the offset of the real symbol in the
|
||||
// section is encoded in the instruction stream.
|
||||
if bind != elf.STB_LOCAL {
|
||||
return xerrors.Errorf("direct load: %s: unsupported relocation %s", name, bind)
|
||||
}
|
||||
|
||||
// For some reason, clang encodes the offset of the symbol its
|
||||
// section in the first basic BPF instruction, while the kernel
|
||||
// expects it in the second one.
|
||||
ins.Constant <<= 32
|
||||
ins.Src = asm.PseudoMapValue
|
||||
|
||||
case elf.STT_NOTYPE:
|
||||
if bind == elf.STB_GLOBAL && rel.Section == elf.SHN_UNDEF {
|
||||
// This is a relocation generated by inline assembly.
|
||||
// We can't do more than assigning ins.Reference.
|
||||
break outer
|
||||
}
|
||||
|
||||
// This is an ELF generated on clang < 8, which doesn't tag
|
||||
// relocations appropriately.
|
||||
fallthrough
|
||||
|
||||
case elf.STT_OBJECT:
|
||||
if bind != elf.STB_GLOBAL {
|
||||
return xerrors.Errorf("load: %s: unsupported binding: %s", name, bind)
|
||||
}
|
||||
|
||||
ins.Src = asm.PseudoMapFD
|
||||
|
||||
default:
|
||||
return xerrors.Errorf("load: %s: unsupported relocation: %s", name, typ)
|
||||
}
|
||||
|
||||
// Mark the instruction as needing an update when creating the
|
||||
// collection.
|
||||
if err := ins.RewriteMapPtr(-1); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
case ins.OpCode.JumpOp() == asm.Call:
|
||||
if ins.Src != asm.PseudoCall {
|
||||
return xerrors.Errorf("call: %s: incorrect source register", name)
|
||||
}
|
||||
|
||||
switch typ {
|
||||
case elf.STT_NOTYPE, elf.STT_FUNC:
|
||||
if bind != elf.STB_GLOBAL {
|
||||
return xerrors.Errorf("call: %s: unsupported binding: %s", name, bind)
|
||||
}
|
||||
|
||||
case elf.STT_SECTION:
|
||||
if bind != elf.STB_LOCAL {
|
||||
return xerrors.Errorf("call: %s: unsupported binding: %s", name, bind)
|
||||
}
|
||||
|
||||
// The function we want to call is in the indicated section,
|
||||
// at the offset encoded in the instruction itself. Reverse
|
||||
// the calculation to find the real function we're looking for.
|
||||
// A value of -1 references the first instruction in the section.
|
||||
offset := int64(int32(ins.Constant)+1) * asm.InstructionSize
|
||||
if offset < 0 {
|
||||
return xerrors.Errorf("call: %s: invalid offset %d", name, offset)
|
||||
}
|
||||
|
||||
sym, ok := ec.symbolsPerSection[rel.Section][uint64(offset)]
|
||||
if !ok {
|
||||
return xerrors.Errorf("call: %s: no symbol at offset %d", name, offset)
|
||||
}
|
||||
|
||||
ins.Constant = -1
|
||||
name = sym.Name
|
||||
|
||||
default:
|
||||
return xerrors.Errorf("call: %s: invalid symbol type %s", name, typ)
|
||||
}
|
||||
|
||||
default:
|
||||
return xerrors.Errorf("relocation for unsupported instruction: %s", ins.OpCode)
|
||||
}
|
||||
|
||||
ins.Reference = name
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ec *elfCode) loadMaps(maps map[string]*MapSpec, mapSections map[elf.SectionIndex]*elf.Section) error {
|
||||
for idx, sec := range mapSections {
|
||||
syms := ec.symbolsPerSection[idx]
|
||||
if len(syms) == 0 {
|
||||
return xerrors.Errorf("section %v: no symbols", sec.Name)
|
||||
}
|
||||
|
||||
if sec.Size%uint64(len(syms)) != 0 {
|
||||
return xerrors.Errorf("section %v: map descriptors are not of equal size", sec.Name)
|
||||
}
|
||||
|
||||
var (
|
||||
r = sec.Open()
|
||||
size = sec.Size / uint64(len(syms))
|
||||
)
|
||||
for i, offset := 0, uint64(0); i < len(syms); i, offset = i+1, offset+size {
|
||||
mapSym, ok := syms[offset]
|
||||
if !ok {
|
||||
return xerrors.Errorf("section %s: missing symbol for map at offset %d", sec.Name, offset)
|
||||
}
|
||||
|
||||
if maps[mapSym.Name] != nil {
|
||||
return xerrors.Errorf("section %v: map %v already exists", sec.Name, mapSym)
|
||||
}
|
||||
|
||||
lr := io.LimitReader(r, int64(size))
|
||||
|
||||
spec := MapSpec{
|
||||
Name: SanitizeName(mapSym.Name, -1),
|
||||
}
|
||||
switch {
|
||||
case binary.Read(lr, ec.ByteOrder, &spec.Type) != nil:
|
||||
return xerrors.Errorf("map %v: missing type", mapSym)
|
||||
case binary.Read(lr, ec.ByteOrder, &spec.KeySize) != nil:
|
||||
return xerrors.Errorf("map %v: missing key size", mapSym)
|
||||
case binary.Read(lr, ec.ByteOrder, &spec.ValueSize) != nil:
|
||||
return xerrors.Errorf("map %v: missing value size", mapSym)
|
||||
case binary.Read(lr, ec.ByteOrder, &spec.MaxEntries) != nil:
|
||||
return xerrors.Errorf("map %v: missing max entries", mapSym)
|
||||
case binary.Read(lr, ec.ByteOrder, &spec.Flags) != nil:
|
||||
return xerrors.Errorf("map %v: missing flags", mapSym)
|
||||
}
|
||||
|
||||
if _, err := io.Copy(internal.DiscardZeroes{}, lr); err != nil {
|
||||
return xerrors.Errorf("map %v: unknown and non-zero fields in definition", mapSym)
|
||||
}
|
||||
|
||||
maps[mapSym.Name] = &spec
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec, mapSections map[elf.SectionIndex]*elf.Section, spec *btf.Spec) error {
|
||||
if spec == nil {
|
||||
return xerrors.Errorf("missing BTF")
|
||||
}
|
||||
|
||||
for idx, sec := range mapSections {
|
||||
syms := ec.symbolsPerSection[idx]
|
||||
if len(syms) == 0 {
|
||||
return xerrors.Errorf("section %v: no symbols", sec.Name)
|
||||
}
|
||||
|
||||
for _, sym := range syms {
|
||||
name := sym.Name
|
||||
if maps[name] != nil {
|
||||
return xerrors.Errorf("section %v: map %v already exists", sec.Name, sym)
|
||||
}
|
||||
|
||||
btfMap, btfMapMembers, err := spec.Map(name)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("map %v: can't get BTF: %w", name, err)
|
||||
}
|
||||
|
||||
spec, err := mapSpecFromBTF(btfMap, btfMapMembers)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("map %v: %w", name, err)
|
||||
}
|
||||
|
||||
maps[name] = spec
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func mapSpecFromBTF(btfMap *btf.Map, btfMapMembers []btf.Member) (*MapSpec, error) {
|
||||
var (
|
||||
mapType, flags, maxEntries uint32
|
||||
err error
|
||||
)
|
||||
for _, member := range btfMapMembers {
|
||||
switch member.Name {
|
||||
case "type":
|
||||
mapType, err = uintFromBTF(member.Type)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't get type: %w", err)
|
||||
}
|
||||
|
||||
case "map_flags":
|
||||
flags, err = uintFromBTF(member.Type)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't get BTF map flags: %w", err)
|
||||
}
|
||||
|
||||
case "max_entries":
|
||||
maxEntries, err = uintFromBTF(member.Type)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't get BTF map max entries: %w", err)
|
||||
}
|
||||
|
||||
case "key":
|
||||
case "value":
|
||||
default:
|
||||
return nil, xerrors.Errorf("unrecognized field %s in BTF map definition", member.Name)
|
||||
}
|
||||
}
|
||||
|
||||
keySize, err := btf.Sizeof(btf.MapKey(btfMap))
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't get size of BTF key: %w", err)
|
||||
}
|
||||
|
||||
valueSize, err := btf.Sizeof(btf.MapValue(btfMap))
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't get size of BTF value: %w", err)
|
||||
}
|
||||
|
||||
return &MapSpec{
|
||||
Type: MapType(mapType),
|
||||
KeySize: uint32(keySize),
|
||||
ValueSize: uint32(valueSize),
|
||||
MaxEntries: maxEntries,
|
||||
Flags: flags,
|
||||
BTF: btfMap,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// uintFromBTF resolves the __uint macro, which is a pointer to a sized
|
||||
// array, e.g. for int (*foo)[10], this function will return 10.
|
||||
func uintFromBTF(typ btf.Type) (uint32, error) {
|
||||
ptr, ok := typ.(*btf.Pointer)
|
||||
if !ok {
|
||||
return 0, xerrors.Errorf("not a pointer: %v", typ)
|
||||
}
|
||||
|
||||
arr, ok := ptr.Target.(*btf.Array)
|
||||
if !ok {
|
||||
return 0, xerrors.Errorf("not a pointer to array: %v", typ)
|
||||
}
|
||||
|
||||
return arr.Nelems, nil
|
||||
}
|
||||
|
||||
func (ec *elfCode) loadDataSections(maps map[string]*MapSpec, dataSections map[elf.SectionIndex]*elf.Section, spec *btf.Spec) error {
|
||||
if spec == nil {
|
||||
return xerrors.New("data sections require BTF, make sure all consts are marked as static")
|
||||
}
|
||||
|
||||
for _, sec := range dataSections {
|
||||
btfMap, err := spec.Datasec(sec.Name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
data, err := sec.Data()
|
||||
if err != nil {
|
||||
return xerrors.Errorf("data section %s: can't get contents: %w", sec.Name, err)
|
||||
}
|
||||
|
||||
if uint64(len(data)) > math.MaxUint32 {
|
||||
return xerrors.Errorf("data section %s: contents exceed maximum size", sec.Name)
|
||||
}
|
||||
|
||||
mapSpec := &MapSpec{
|
||||
Name: SanitizeName(sec.Name, -1),
|
||||
Type: Array,
|
||||
KeySize: 4,
|
||||
ValueSize: uint32(len(data)),
|
||||
MaxEntries: 1,
|
||||
Contents: []MapKV{{uint32(0), data}},
|
||||
BTF: btfMap,
|
||||
}
|
||||
|
||||
switch sec.Name {
|
||||
case ".rodata":
|
||||
mapSpec.Flags = unix.BPF_F_RDONLY_PROG
|
||||
mapSpec.Freeze = true
|
||||
case ".bss":
|
||||
// The kernel already zero-initializes the map
|
||||
mapSpec.Contents = nil
|
||||
}
|
||||
|
||||
maps[sec.Name] = mapSpec
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getProgType(sectionName string) (ProgramType, AttachType, string) {
|
||||
types := map[string]struct {
|
||||
progType ProgramType
|
||||
attachType AttachType
|
||||
}{
|
||||
// From https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c
|
||||
"socket": {SocketFilter, AttachNone},
|
||||
"seccomp": {SocketFilter, AttachNone},
|
||||
"kprobe/": {Kprobe, AttachNone},
|
||||
"uprobe/": {Kprobe, AttachNone},
|
||||
"kretprobe/": {Kprobe, AttachNone},
|
||||
"uretprobe/": {Kprobe, AttachNone},
|
||||
"tracepoint/": {TracePoint, AttachNone},
|
||||
"raw_tracepoint/": {RawTracepoint, AttachNone},
|
||||
"xdp": {XDP, AttachNone},
|
||||
"perf_event": {PerfEvent, AttachNone},
|
||||
"lwt_in": {LWTIn, AttachNone},
|
||||
"lwt_out": {LWTOut, AttachNone},
|
||||
"lwt_xmit": {LWTXmit, AttachNone},
|
||||
"lwt_seg6local": {LWTSeg6Local, AttachNone},
|
||||
"sockops": {SockOps, AttachCGroupSockOps},
|
||||
"sk_skb/stream_parser": {SkSKB, AttachSkSKBStreamParser},
|
||||
"sk_skb/stream_verdict": {SkSKB, AttachSkSKBStreamParser},
|
||||
"sk_msg": {SkMsg, AttachSkSKBStreamVerdict},
|
||||
"lirc_mode2": {LircMode2, AttachLircMode2},
|
||||
"flow_dissector": {FlowDissector, AttachFlowDissector},
|
||||
"iter/": {Tracing, AttachTraceIter},
|
||||
|
||||
"cgroup_skb/ingress": {CGroupSKB, AttachCGroupInetIngress},
|
||||
"cgroup_skb/egress": {CGroupSKB, AttachCGroupInetEgress},
|
||||
"cgroup/dev": {CGroupDevice, AttachCGroupDevice},
|
||||
"cgroup/skb": {CGroupSKB, AttachNone},
|
||||
"cgroup/sock": {CGroupSock, AttachCGroupInetSockCreate},
|
||||
"cgroup/post_bind4": {CGroupSock, AttachCGroupInet4PostBind},
|
||||
"cgroup/post_bind6": {CGroupSock, AttachCGroupInet6PostBind},
|
||||
"cgroup/bind4": {CGroupSockAddr, AttachCGroupInet4Bind},
|
||||
"cgroup/bind6": {CGroupSockAddr, AttachCGroupInet6Bind},
|
||||
"cgroup/connect4": {CGroupSockAddr, AttachCGroupInet4Connect},
|
||||
"cgroup/connect6": {CGroupSockAddr, AttachCGroupInet6Connect},
|
||||
"cgroup/sendmsg4": {CGroupSockAddr, AttachCGroupUDP4Sendmsg},
|
||||
"cgroup/sendmsg6": {CGroupSockAddr, AttachCGroupUDP6Sendmsg},
|
||||
"cgroup/recvmsg4": {CGroupSockAddr, AttachCGroupUDP4Recvmsg},
|
||||
"cgroup/recvmsg6": {CGroupSockAddr, AttachCGroupUDP6Recvmsg},
|
||||
"cgroup/sysctl": {CGroupSysctl, AttachCGroupSysctl},
|
||||
"cgroup/getsockopt": {CGroupSockopt, AttachCGroupGetsockopt},
|
||||
"cgroup/setsockopt": {CGroupSockopt, AttachCGroupSetsockopt},
|
||||
"classifier": {SchedCLS, AttachNone},
|
||||
"action": {SchedACT, AttachNone},
|
||||
}
|
||||
|
||||
for prefix, t := range types {
|
||||
if !strings.HasPrefix(sectionName, prefix) {
|
||||
continue
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(prefix, "/") {
|
||||
return t.progType, t.attachType, ""
|
||||
}
|
||||
|
||||
return t.progType, t.attachType, sectionName[len(prefix):]
|
||||
}
|
||||
|
||||
return UnspecifiedProgram, AttachNone, ""
|
||||
}
|
||||
|
||||
func (ec *elfCode) loadRelocations(sections map[elf.SectionIndex]*elf.Section) (map[elf.SectionIndex]map[uint64]elf.Symbol, error) {
|
||||
result := make(map[elf.SectionIndex]map[uint64]elf.Symbol)
|
||||
for idx, sec := range sections {
|
||||
rels := make(map[uint64]elf.Symbol)
|
||||
|
||||
if sec.Entsize < 16 {
|
||||
return nil, xerrors.Errorf("section %s: relocations are less than 16 bytes", sec.Name)
|
||||
}
|
||||
|
||||
r := sec.Open()
|
||||
for off := uint64(0); off < sec.Size; off += sec.Entsize {
|
||||
ent := io.LimitReader(r, int64(sec.Entsize))
|
||||
|
||||
var rel elf.Rel64
|
||||
if binary.Read(ent, ec.ByteOrder, &rel) != nil {
|
||||
return nil, xerrors.Errorf("can't parse relocation at offset %v", off)
|
||||
}
|
||||
|
||||
symNo := int(elf.R_SYM64(rel.Info) - 1)
|
||||
if symNo >= len(ec.symbols) {
|
||||
return nil, xerrors.Errorf("relocation at offset %d: symbol %v doesnt exist", off, symNo)
|
||||
}
|
||||
|
||||
rels[rel.Off] = ec.symbols[symNo]
|
||||
}
|
||||
|
||||
result[idx] = rels
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func symbolsPerSection(symbols []elf.Symbol) map[elf.SectionIndex]map[uint64]elf.Symbol {
|
||||
result := make(map[elf.SectionIndex]map[uint64]elf.Symbol)
|
||||
for _, sym := range symbols {
|
||||
switch elf.ST_TYPE(sym.Info) {
|
||||
case elf.STT_NOTYPE:
|
||||
// Older versions of LLVM doesn't tag
|
||||
// symbols correctly.
|
||||
break
|
||||
case elf.STT_OBJECT:
|
||||
break
|
||||
case elf.STT_FUNC:
|
||||
break
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
if sym.Section == elf.SHN_UNDEF || sym.Section >= elf.SHN_LORESERVE {
|
||||
continue
|
||||
}
|
||||
|
||||
if sym.Name == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
idx := sym.Section
|
||||
if _, ok := result[idx]; !ok {
|
||||
result[idx] = make(map[uint64]elf.Symbol)
|
||||
}
|
||||
result[idx][sym.Value] = sym
|
||||
}
|
||||
return result
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
module github.com/cilium/ebpf
|
||||
|
||||
go 1.12
|
||||
|
||||
require (
|
||||
golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543
|
||||
)
|
|
@ -0,0 +1,6 @@
|
|||
golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7 h1:HmbHVPwrPEKPGLAcHSrMe6+hqSUlvZU0rab6x5EXfGU=
|
||||
golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9 h1:1/DFK4b7JH8DmkqhUk48onnSfrPzImPoVxuomtbT2nk=
|
||||
golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
|
@ -0,0 +1,661 @@
|
|||
package btf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"debug/elf"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"reflect"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cilium/ebpf/internal"
|
||||
"github.com/cilium/ebpf/internal/unix"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
const btfMagic = 0xeB9F
|
||||
|
||||
// Errors returned by BTF functions.
|
||||
var (
|
||||
ErrNotSupported = internal.ErrNotSupported
|
||||
ErrNotFound = xerrors.New("not found")
|
||||
)
|
||||
|
||||
// Spec represents decoded BTF.
|
||||
type Spec struct {
|
||||
rawTypes []rawType
|
||||
strings stringTable
|
||||
types map[string][]Type
|
||||
funcInfos map[string]extInfo
|
||||
lineInfos map[string]extInfo
|
||||
byteOrder binary.ByteOrder
|
||||
}
|
||||
|
||||
type btfHeader struct {
|
||||
Magic uint16
|
||||
Version uint8
|
||||
Flags uint8
|
||||
HdrLen uint32
|
||||
|
||||
TypeOff uint32
|
||||
TypeLen uint32
|
||||
StringOff uint32
|
||||
StringLen uint32
|
||||
}
|
||||
|
||||
// LoadSpecFromReader reads BTF sections from an ELF.
|
||||
//
|
||||
// Returns a nil Spec and no error if no BTF was present.
|
||||
func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) {
|
||||
file, err := elf.NewFile(rd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
var (
|
||||
btfSection *elf.Section
|
||||
btfExtSection *elf.Section
|
||||
sectionSizes = make(map[string]uint32)
|
||||
)
|
||||
|
||||
for _, sec := range file.Sections {
|
||||
switch sec.Name {
|
||||
case ".BTF":
|
||||
btfSection = sec
|
||||
case ".BTF.ext":
|
||||
btfExtSection = sec
|
||||
default:
|
||||
if sec.Type != elf.SHT_PROGBITS && sec.Type != elf.SHT_NOBITS {
|
||||
break
|
||||
}
|
||||
|
||||
if sec.Size > math.MaxUint32 {
|
||||
return nil, xerrors.Errorf("section %s exceeds maximum size", sec.Name)
|
||||
}
|
||||
|
||||
sectionSizes[sec.Name] = uint32(sec.Size)
|
||||
}
|
||||
}
|
||||
|
||||
if btfSection == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
symbols, err := file.Symbols()
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't read symbols: %v", err)
|
||||
}
|
||||
|
||||
variableOffsets := make(map[variable]uint32)
|
||||
for _, symbol := range symbols {
|
||||
if idx := symbol.Section; idx >= elf.SHN_LORESERVE && idx <= elf.SHN_HIRESERVE {
|
||||
// Ignore things like SHN_ABS
|
||||
continue
|
||||
}
|
||||
|
||||
secName := file.Sections[symbol.Section].Name
|
||||
if _, ok := sectionSizes[secName]; !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if symbol.Value > math.MaxUint32 {
|
||||
return nil, xerrors.Errorf("section %s: symbol %s: size exceeds maximum", secName, symbol.Name)
|
||||
}
|
||||
|
||||
variableOffsets[variable{secName, symbol.Name}] = uint32(symbol.Value)
|
||||
}
|
||||
|
||||
spec, err := loadNakedSpec(btfSection.Open(), file.ByteOrder, sectionSizes, variableOffsets)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if btfExtSection == nil {
|
||||
return spec, nil
|
||||
}
|
||||
|
||||
spec.funcInfos, spec.lineInfos, err = parseExtInfos(btfExtSection.Open(), file.ByteOrder, spec.strings)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't read ext info: %w", err)
|
||||
}
|
||||
|
||||
return spec, nil
|
||||
}
|
||||
|
||||
func loadNakedSpec(btf io.ReadSeeker, bo binary.ByteOrder, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) (*Spec, error) {
|
||||
rawTypes, rawStrings, err := parseBTF(btf, bo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = fixupDatasec(rawTypes, rawStrings, sectionSizes, variableOffsets)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
types, err := inflateRawTypes(rawTypes, rawStrings)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Spec{
|
||||
rawTypes: rawTypes,
|
||||
types: types,
|
||||
strings: rawStrings,
|
||||
byteOrder: bo,
|
||||
}, nil
|
||||
}
|
||||
|
||||
var kernelBTF struct {
|
||||
sync.Mutex
|
||||
*Spec
|
||||
}
|
||||
|
||||
// LoadKernelSpec returns the current kernel's BTF information.
|
||||
//
|
||||
// Requires a >= 5.5 kernel with CONFIG_DEBUG_INFO_BTF enabled. Returns
|
||||
// ErrNotSupported if BTF is not enabled.
|
||||
func LoadKernelSpec() (*Spec, error) {
|
||||
kernelBTF.Lock()
|
||||
defer kernelBTF.Unlock()
|
||||
|
||||
if kernelBTF.Spec != nil {
|
||||
return kernelBTF.Spec, nil
|
||||
}
|
||||
|
||||
var err error
|
||||
kernelBTF.Spec, err = loadKernelSpec()
|
||||
return kernelBTF.Spec, err
|
||||
}
|
||||
|
||||
func loadKernelSpec() (*Spec, error) {
|
||||
fh, err := os.Open("/sys/kernel/btf/vmlinux")
|
||||
if os.IsNotExist(err) {
|
||||
return nil, xerrors.Errorf("can't open kernel BTF at /sys/kernel/btf/vmlinux: %w", ErrNotFound)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't read kernel BTF: %s", err)
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
return loadNakedSpec(fh, internal.NativeEndian, nil, nil)
|
||||
}
|
||||
|
||||
func parseBTF(btf io.ReadSeeker, bo binary.ByteOrder) ([]rawType, stringTable, error) {
|
||||
rawBTF, err := ioutil.ReadAll(btf)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("can't read BTF: %v", err)
|
||||
}
|
||||
|
||||
rd := bytes.NewReader(rawBTF)
|
||||
|
||||
var header btfHeader
|
||||
if err := binary.Read(rd, bo, &header); err != nil {
|
||||
return nil, nil, xerrors.Errorf("can't read header: %v", err)
|
||||
}
|
||||
|
||||
if header.Magic != btfMagic {
|
||||
return nil, nil, xerrors.Errorf("incorrect magic value %v", header.Magic)
|
||||
}
|
||||
|
||||
if header.Version != 1 {
|
||||
return nil, nil, xerrors.Errorf("unexpected version %v", header.Version)
|
||||
}
|
||||
|
||||
if header.Flags != 0 {
|
||||
return nil, nil, xerrors.Errorf("unsupported flags %v", header.Flags)
|
||||
}
|
||||
|
||||
remainder := int64(header.HdrLen) - int64(binary.Size(&header))
|
||||
if remainder < 0 {
|
||||
return nil, nil, xerrors.New("header is too short")
|
||||
}
|
||||
|
||||
if _, err := io.CopyN(internal.DiscardZeroes{}, rd, remainder); err != nil {
|
||||
return nil, nil, xerrors.Errorf("header padding: %v", err)
|
||||
}
|
||||
|
||||
if _, err := rd.Seek(int64(header.HdrLen+header.StringOff), io.SeekStart); err != nil {
|
||||
return nil, nil, xerrors.Errorf("can't seek to start of string section: %v", err)
|
||||
}
|
||||
|
||||
rawStrings, err := readStringTable(io.LimitReader(rd, int64(header.StringLen)))
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("can't read type names: %w", err)
|
||||
}
|
||||
|
||||
if _, err := rd.Seek(int64(header.HdrLen+header.TypeOff), io.SeekStart); err != nil {
|
||||
return nil, nil, xerrors.Errorf("can't seek to start of type section: %v", err)
|
||||
}
|
||||
|
||||
rawTypes, err := readTypes(io.LimitReader(rd, int64(header.TypeLen)), bo)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("can't read types: %w", err)
|
||||
}
|
||||
|
||||
return rawTypes, rawStrings, nil
|
||||
}
|
||||
|
||||
type variable struct {
|
||||
section string
|
||||
name string
|
||||
}
|
||||
|
||||
func fixupDatasec(rawTypes []rawType, rawStrings stringTable, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) error {
|
||||
for i, rawType := range rawTypes {
|
||||
if rawType.Kind() != kindDatasec {
|
||||
continue
|
||||
}
|
||||
|
||||
name, err := rawStrings.Lookup(rawType.NameOff)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
size, ok := sectionSizes[name]
|
||||
if !ok {
|
||||
return xerrors.Errorf("data section %s: missing size", name)
|
||||
}
|
||||
|
||||
rawTypes[i].SizeType = size
|
||||
|
||||
secinfos := rawType.data.([]btfVarSecinfo)
|
||||
for j, secInfo := range secinfos {
|
||||
id := int(secInfo.Type - 1)
|
||||
if id >= len(rawTypes) {
|
||||
return xerrors.Errorf("data section %s: invalid type id %d for variable %d", name, id, j)
|
||||
}
|
||||
|
||||
varName, err := rawStrings.Lookup(rawTypes[id].NameOff)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("data section %s: can't get name for type %d: %w", name, id, err)
|
||||
}
|
||||
|
||||
offset, ok := variableOffsets[variable{name, varName}]
|
||||
if !ok {
|
||||
return xerrors.Errorf("data section %s: missing offset for variable %s", name, varName)
|
||||
}
|
||||
|
||||
secinfos[j].Offset = offset
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Spec) marshal(bo binary.ByteOrder) ([]byte, error) {
|
||||
var (
|
||||
buf bytes.Buffer
|
||||
header = new(btfHeader)
|
||||
headerLen = binary.Size(header)
|
||||
)
|
||||
|
||||
// Reserve space for the header. We have to write it last since
|
||||
// we don't know the size of the type section yet.
|
||||
_, _ = buf.Write(make([]byte, headerLen))
|
||||
|
||||
// Write type section, just after the header.
|
||||
for _, typ := range s.rawTypes {
|
||||
if err := typ.Marshal(&buf, bo); err != nil {
|
||||
return nil, xerrors.Errorf("can't marshal BTF: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
typeLen := uint32(buf.Len() - headerLen)
|
||||
|
||||
// Write string section after type section.
|
||||
_, _ = buf.Write(s.strings)
|
||||
|
||||
// Fill out the header, and write it out.
|
||||
header = &btfHeader{
|
||||
Magic: btfMagic,
|
||||
Version: 1,
|
||||
Flags: 0,
|
||||
HdrLen: uint32(headerLen),
|
||||
TypeOff: 0,
|
||||
TypeLen: typeLen,
|
||||
StringOff: typeLen,
|
||||
StringLen: uint32(len(s.strings)),
|
||||
}
|
||||
|
||||
raw := buf.Bytes()
|
||||
err := binary.Write(sliceWriter(raw[:headerLen]), bo, header)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't write header: %v", err)
|
||||
}
|
||||
|
||||
return raw, nil
|
||||
}
|
||||
|
||||
type sliceWriter []byte
|
||||
|
||||
func (sw sliceWriter) Write(p []byte) (int, error) {
|
||||
if len(p) != len(sw) {
|
||||
return 0, xerrors.New("size doesn't match")
|
||||
}
|
||||
|
||||
return copy(sw, p), nil
|
||||
}
|
||||
|
||||
// Program finds the BTF for a specific section.
|
||||
//
|
||||
// Length is the number of bytes in the raw BPF instruction stream.
|
||||
//
|
||||
// Returns an error if there is no BTF.
|
||||
func (s *Spec) Program(name string, length uint64) (*Program, error) {
|
||||
if length == 0 {
|
||||
return nil, xerrors.New("length musn't be zero")
|
||||
}
|
||||
|
||||
funcInfos, funcOK := s.funcInfos[name]
|
||||
lineInfos, lineOK := s.lineInfos[name]
|
||||
|
||||
if !funcOK && !lineOK {
|
||||
return nil, xerrors.Errorf("no BTF for program %s", name)
|
||||
}
|
||||
|
||||
return &Program{s, length, funcInfos, lineInfos}, nil
|
||||
}
|
||||
|
||||
// Map finds the BTF for a map.
|
||||
//
|
||||
// Returns an error if there is no BTF for the given name.
|
||||
func (s *Spec) Map(name string) (*Map, []Member, error) {
|
||||
var mapVar Var
|
||||
if err := s.FindType(name, &mapVar); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
mapStruct, ok := mapVar.Type.(*Struct)
|
||||
if !ok {
|
||||
return nil, nil, xerrors.Errorf("expected struct, have %s", mapVar.Type)
|
||||
}
|
||||
|
||||
var key, value Type
|
||||
for _, member := range mapStruct.Members {
|
||||
switch member.Name {
|
||||
case "key":
|
||||
key = member.Type
|
||||
|
||||
case "value":
|
||||
value = member.Type
|
||||
}
|
||||
}
|
||||
|
||||
if key == nil {
|
||||
return nil, nil, xerrors.Errorf("map %s: missing 'key' in type", name)
|
||||
}
|
||||
|
||||
if value == nil {
|
||||
return nil, nil, xerrors.Errorf("map %s: missing 'value' in type", name)
|
||||
}
|
||||
|
||||
return &Map{s, key, value}, mapStruct.Members, nil
|
||||
}
|
||||
|
||||
// Datasec returns the BTF required to create maps which represent data sections.
|
||||
func (s *Spec) Datasec(name string) (*Map, error) {
|
||||
var datasec Datasec
|
||||
if err := s.FindType(name, &datasec); err != nil {
|
||||
return nil, xerrors.Errorf("data section %s: can't get BTF: %w", name, err)
|
||||
}
|
||||
|
||||
return &Map{s, &Void{}, &datasec}, nil
|
||||
}
|
||||
|
||||
// FindType searches for a type with a specific name.
|
||||
//
|
||||
// hint determines the type of the returned Type.
|
||||
//
|
||||
// Returns an error wrapping ErrNotFound if no matching
|
||||
// type exists in spec.
|
||||
func (s *Spec) FindType(name string, typ Type) error {
|
||||
var (
|
||||
wanted = reflect.TypeOf(typ)
|
||||
candidate Type
|
||||
)
|
||||
|
||||
for _, typ := range s.types[name] {
|
||||
if reflect.TypeOf(typ) != wanted {
|
||||
continue
|
||||
}
|
||||
|
||||
if candidate != nil {
|
||||
return xerrors.Errorf("type %s: multiple candidates for %T", name, typ)
|
||||
}
|
||||
|
||||
candidate = typ
|
||||
}
|
||||
|
||||
if candidate == nil {
|
||||
return xerrors.Errorf("type %s: %w", name, ErrNotFound)
|
||||
}
|
||||
|
||||
value := reflect.Indirect(reflect.ValueOf(copyType(candidate)))
|
||||
reflect.Indirect(reflect.ValueOf(typ)).Set(value)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Handle is a reference to BTF loaded into the kernel.
|
||||
type Handle struct {
|
||||
fd *internal.FD
|
||||
}
|
||||
|
||||
// NewHandle loads BTF into the kernel.
|
||||
//
|
||||
// Returns ErrNotSupported if BTF is not supported.
|
||||
func NewHandle(spec *Spec) (*Handle, error) {
|
||||
if err := haveBTF(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if spec.byteOrder != internal.NativeEndian {
|
||||
return nil, xerrors.Errorf("can't load %s BTF on %s", spec.byteOrder, internal.NativeEndian)
|
||||
}
|
||||
|
||||
btf, err := spec.marshal(internal.NativeEndian)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't marshal BTF: %w", err)
|
||||
}
|
||||
|
||||
if uint64(len(btf)) > math.MaxUint32 {
|
||||
return nil, xerrors.New("BTF exceeds the maximum size")
|
||||
}
|
||||
|
||||
attr := &bpfLoadBTFAttr{
|
||||
btf: internal.NewSlicePointer(btf),
|
||||
btfSize: uint32(len(btf)),
|
||||
}
|
||||
|
||||
fd, err := bpfLoadBTF(attr)
|
||||
if err != nil {
|
||||
logBuf := make([]byte, 64*1024)
|
||||
attr.logBuf = internal.NewSlicePointer(logBuf)
|
||||
attr.btfLogSize = uint32(len(logBuf))
|
||||
attr.btfLogLevel = 1
|
||||
_, logErr := bpfLoadBTF(attr)
|
||||
return nil, internal.ErrorWithLog(err, logBuf, logErr)
|
||||
}
|
||||
|
||||
return &Handle{fd}, nil
|
||||
}
|
||||
|
||||
// Close destroys the handle.
|
||||
//
|
||||
// Subsequent calls to FD will return an invalid value.
|
||||
func (h *Handle) Close() error {
|
||||
return h.fd.Close()
|
||||
}
|
||||
|
||||
// FD returns the file descriptor for the handle.
|
||||
func (h *Handle) FD() int {
|
||||
value, err := h.fd.Value()
|
||||
if err != nil {
|
||||
return -1
|
||||
}
|
||||
|
||||
return int(value)
|
||||
}
|
||||
|
||||
// Map is the BTF for a map.
|
||||
type Map struct {
|
||||
spec *Spec
|
||||
key, value Type
|
||||
}
|
||||
|
||||
// MapSpec should be a method on Map, but is a free function
|
||||
// to hide it from users of the ebpf package.
|
||||
func MapSpec(m *Map) *Spec {
|
||||
return m.spec
|
||||
}
|
||||
|
||||
// MapKey should be a method on Map, but is a free function
|
||||
// to hide it from users of the ebpf package.
|
||||
func MapKey(m *Map) Type {
|
||||
return m.key
|
||||
}
|
||||
|
||||
// MapValue should be a method on Map, but is a free function
|
||||
// to hide it from users of the ebpf package.
|
||||
func MapValue(m *Map) Type {
|
||||
return m.value
|
||||
}
|
||||
|
||||
// Program is the BTF information for a stream of instructions.
|
||||
type Program struct {
|
||||
spec *Spec
|
||||
length uint64
|
||||
funcInfos, lineInfos extInfo
|
||||
}
|
||||
|
||||
// ProgramSpec returns the Spec needed for loading function and line infos into the kernel.
|
||||
//
|
||||
// This is a free function instead of a method to hide it from users
|
||||
// of package ebpf.
|
||||
func ProgramSpec(s *Program) *Spec {
|
||||
return s.spec
|
||||
}
|
||||
|
||||
// ProgramAppend the information from other to the Program.
|
||||
//
|
||||
// This is a free function instead of a method to hide it from users
|
||||
// of package ebpf.
|
||||
func ProgramAppend(s, other *Program) error {
|
||||
funcInfos, err := s.funcInfos.append(other.funcInfos, s.length)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("func infos: %w", err)
|
||||
}
|
||||
|
||||
lineInfos, err := s.lineInfos.append(other.lineInfos, s.length)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("line infos: %w", err)
|
||||
}
|
||||
|
||||
s.length += other.length
|
||||
s.funcInfos = funcInfos
|
||||
s.lineInfos = lineInfos
|
||||
return nil
|
||||
}
|
||||
|
||||
// ProgramFuncInfos returns the binary form of BTF function infos.
|
||||
//
|
||||
// This is a free function instead of a method to hide it from users
|
||||
// of package ebpf.
|
||||
func ProgramFuncInfos(s *Program) (recordSize uint32, bytes []byte, err error) {
|
||||
bytes, err = s.funcInfos.MarshalBinary()
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
return s.funcInfos.recordSize, bytes, nil
|
||||
}
|
||||
|
||||
// ProgramLineInfos returns the binary form of BTF line infos.
|
||||
//
|
||||
// This is a free function instead of a method to hide it from users
|
||||
// of package ebpf.
|
||||
func ProgramLineInfos(s *Program) (recordSize uint32, bytes []byte, err error) {
|
||||
bytes, err = s.lineInfos.MarshalBinary()
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
return s.lineInfos.recordSize, bytes, nil
|
||||
}
|
||||
|
||||
type bpfLoadBTFAttr struct {
|
||||
btf internal.Pointer
|
||||
logBuf internal.Pointer
|
||||
btfSize uint32
|
||||
btfLogSize uint32
|
||||
btfLogLevel uint32
|
||||
}
|
||||
|
||||
func bpfLoadBTF(attr *bpfLoadBTFAttr) (*internal.FD, error) {
|
||||
const _BTFLoad = 18
|
||||
|
||||
fd, err := internal.BPF(_BTFLoad, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return internal.NewFD(uint32(fd)), nil
|
||||
}
|
||||
|
||||
func minimalBTF(bo binary.ByteOrder) []byte {
|
||||
const minHeaderLength = 24
|
||||
|
||||
var (
|
||||
types struct {
|
||||
Integer btfType
|
||||
Var btfType
|
||||
btfVar struct{ Linkage uint32 }
|
||||
}
|
||||
typLen = uint32(binary.Size(&types))
|
||||
strings = []byte{0, 'a', 0}
|
||||
header = btfHeader{
|
||||
Magic: btfMagic,
|
||||
Version: 1,
|
||||
HdrLen: minHeaderLength,
|
||||
TypeOff: 0,
|
||||
TypeLen: typLen,
|
||||
StringOff: typLen,
|
||||
StringLen: uint32(len(strings)),
|
||||
}
|
||||
)
|
||||
|
||||
// We use a BTF_KIND_VAR here, to make sure that
|
||||
// the kernel understands BTF at least as well as we
|
||||
// do. BTF_KIND_VAR was introduced ~5.1.
|
||||
types.Integer.SetKind(kindPointer)
|
||||
types.Var.NameOff = 1
|
||||
types.Var.SetKind(kindVar)
|
||||
types.Var.SizeType = 1
|
||||
|
||||
buf := new(bytes.Buffer)
|
||||
_ = binary.Write(buf, bo, &header)
|
||||
_ = binary.Write(buf, bo, &types)
|
||||
buf.Write(strings)
|
||||
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
var haveBTF = internal.FeatureTest("BTF", "5.1", func() (bool, error) {
|
||||
btf := minimalBTF(internal.NativeEndian)
|
||||
fd, err := bpfLoadBTF(&bpfLoadBTFAttr{
|
||||
btf: internal.NewSlicePointer(btf),
|
||||
btfSize: uint32(len(btf)),
|
||||
})
|
||||
if err == nil {
|
||||
fd.Close()
|
||||
}
|
||||
// Check for EINVAL specifically, rather than err != nil since we
|
||||
// otherwise misdetect due to insufficient permissions.
|
||||
return !xerrors.Is(err, unix.EINVAL), nil
|
||||
})
|
|
@ -0,0 +1,245 @@
|
|||
package btf
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
// btfKind describes a Type.
|
||||
type btfKind uint8
|
||||
|
||||
// Equivalents of the BTF_KIND_* constants.
|
||||
const (
|
||||
kindUnknown btfKind = iota
|
||||
kindInt
|
||||
kindPointer
|
||||
kindArray
|
||||
kindStruct
|
||||
kindUnion
|
||||
kindEnum
|
||||
kindForward
|
||||
kindTypedef
|
||||
kindVolatile
|
||||
kindConst
|
||||
kindRestrict
|
||||
// Added ~4.20
|
||||
kindFunc
|
||||
kindFuncProto
|
||||
// Added ~5.1
|
||||
kindVar
|
||||
kindDatasec
|
||||
)
|
||||
|
||||
const (
|
||||
btfTypeKindShift = 24
|
||||
btfTypeKindLen = 4
|
||||
btfTypeVlenShift = 0
|
||||
btfTypeVlenMask = 16
|
||||
)
|
||||
|
||||
// btfType is equivalent to struct btf_type in Documentation/bpf/btf.rst.
|
||||
type btfType struct {
|
||||
NameOff uint32
|
||||
/* "info" bits arrangement
|
||||
* bits 0-15: vlen (e.g. # of struct's members)
|
||||
* bits 16-23: unused
|
||||
* bits 24-27: kind (e.g. int, ptr, array...etc)
|
||||
* bits 28-30: unused
|
||||
* bit 31: kind_flag, currently used by
|
||||
* struct, union and fwd
|
||||
*/
|
||||
Info uint32
|
||||
/* "size" is used by INT, ENUM, STRUCT and UNION.
|
||||
* "size" tells the size of the type it is describing.
|
||||
*
|
||||
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
|
||||
* FUNC and FUNC_PROTO.
|
||||
* "type" is a type_id referring to another type.
|
||||
*/
|
||||
SizeType uint32
|
||||
}
|
||||
|
||||
func (k btfKind) String() string {
|
||||
switch k {
|
||||
case kindUnknown:
|
||||
return "Unknown"
|
||||
case kindInt:
|
||||
return "Integer"
|
||||
case kindPointer:
|
||||
return "Pointer"
|
||||
case kindArray:
|
||||
return "Array"
|
||||
case kindStruct:
|
||||
return "Struct"
|
||||
case kindUnion:
|
||||
return "Union"
|
||||
case kindEnum:
|
||||
return "Enumeration"
|
||||
case kindForward:
|
||||
return "Forward"
|
||||
case kindTypedef:
|
||||
return "Typedef"
|
||||
case kindVolatile:
|
||||
return "Volatile"
|
||||
case kindConst:
|
||||
return "Const"
|
||||
case kindRestrict:
|
||||
return "Restrict"
|
||||
case kindFunc:
|
||||
return "Function"
|
||||
case kindFuncProto:
|
||||
return "Function Proto"
|
||||
case kindVar:
|
||||
return "Variable"
|
||||
case kindDatasec:
|
||||
return "Section"
|
||||
default:
|
||||
return fmt.Sprintf("Unknown (%d)", k)
|
||||
}
|
||||
}
|
||||
|
||||
func mask(len uint32) uint32 {
|
||||
return (1 << len) - 1
|
||||
}
|
||||
|
||||
func (bt *btfType) info(len, shift uint32) uint32 {
|
||||
return (bt.Info >> shift) & mask(len)
|
||||
}
|
||||
|
||||
func (bt *btfType) setInfo(value, len, shift uint32) {
|
||||
bt.Info &^= mask(len) << shift
|
||||
bt.Info |= (value & mask(len)) << shift
|
||||
}
|
||||
|
||||
func (bt *btfType) Kind() btfKind {
|
||||
return btfKind(bt.info(btfTypeKindLen, btfTypeKindShift))
|
||||
}
|
||||
|
||||
func (bt *btfType) SetKind(kind btfKind) {
|
||||
bt.setInfo(uint32(kind), btfTypeKindLen, btfTypeKindShift)
|
||||
}
|
||||
|
||||
func (bt *btfType) Vlen() int {
|
||||
return int(bt.info(btfTypeVlenMask, btfTypeVlenShift))
|
||||
}
|
||||
|
||||
func (bt *btfType) SetVlen(vlen int) {
|
||||
bt.setInfo(uint32(vlen), btfTypeVlenMask, btfTypeVlenShift)
|
||||
}
|
||||
|
||||
func (bt *btfType) Type() TypeID {
|
||||
// TODO: Panic here if wrong kind?
|
||||
return TypeID(bt.SizeType)
|
||||
}
|
||||
|
||||
func (bt *btfType) Size() uint32 {
|
||||
// TODO: Panic here if wrong kind?
|
||||
return bt.SizeType
|
||||
}
|
||||
|
||||
type rawType struct {
|
||||
btfType
|
||||
data interface{}
|
||||
}
|
||||
|
||||
func (rt *rawType) Marshal(w io.Writer, bo binary.ByteOrder) error {
|
||||
if err := binary.Write(w, bo, &rt.btfType); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if rt.data == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return binary.Write(w, bo, rt.data)
|
||||
}
|
||||
|
||||
type btfArray struct {
|
||||
Type TypeID
|
||||
IndexType TypeID
|
||||
Nelems uint32
|
||||
}
|
||||
|
||||
type btfMember struct {
|
||||
NameOff uint32
|
||||
Type TypeID
|
||||
Offset uint32
|
||||
}
|
||||
|
||||
type btfVarSecinfo struct {
|
||||
Type TypeID
|
||||
Offset uint32
|
||||
Size uint32
|
||||
}
|
||||
|
||||
type btfVariable struct {
|
||||
Linkage uint32
|
||||
}
|
||||
|
||||
type btfEnum struct {
|
||||
NameOff uint32
|
||||
Val int32
|
||||
}
|
||||
|
||||
type btfParam struct {
|
||||
NameOff uint32
|
||||
Type TypeID
|
||||
}
|
||||
|
||||
func readTypes(r io.Reader, bo binary.ByteOrder) ([]rawType, error) {
|
||||
var (
|
||||
header btfType
|
||||
types []rawType
|
||||
)
|
||||
|
||||
for id := TypeID(1); ; id++ {
|
||||
if err := binary.Read(r, bo, &header); err == io.EOF {
|
||||
return types, nil
|
||||
} else if err != nil {
|
||||
return nil, xerrors.Errorf("can't read type info for id %v: %v", id, err)
|
||||
}
|
||||
|
||||
var data interface{}
|
||||
switch header.Kind() {
|
||||
case kindInt:
|
||||
data = new(uint32)
|
||||
case kindPointer:
|
||||
case kindArray:
|
||||
data = new(btfArray)
|
||||
case kindStruct:
|
||||
fallthrough
|
||||
case kindUnion:
|
||||
data = make([]btfMember, header.Vlen())
|
||||
case kindEnum:
|
||||
data = make([]btfEnum, header.Vlen())
|
||||
case kindForward:
|
||||
case kindTypedef:
|
||||
case kindVolatile:
|
||||
case kindConst:
|
||||
case kindRestrict:
|
||||
case kindFunc:
|
||||
case kindFuncProto:
|
||||
data = make([]btfParam, header.Vlen())
|
||||
case kindVar:
|
||||
data = new(btfVariable)
|
||||
case kindDatasec:
|
||||
data = make([]btfVarSecinfo, header.Vlen())
|
||||
default:
|
||||
return nil, xerrors.Errorf("type id %v: unknown kind: %v", id, header.Kind())
|
||||
}
|
||||
|
||||
if data == nil {
|
||||
types = append(types, rawType{header, nil})
|
||||
continue
|
||||
}
|
||||
|
||||
if err := binary.Read(r, bo, data); err != nil {
|
||||
return nil, xerrors.Errorf("type id %d: kind %v: can't read %T: %v", id, header.Kind(), data, err)
|
||||
}
|
||||
|
||||
types = append(types, rawType{header, data})
|
||||
}
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
// Package btf handles data encoded according to the BPF Type Format.
|
||||
//
|
||||
// The canonical documentation lives in the Linux kernel repository and is
|
||||
// available at https://www.kernel.org/doc/html/latest/bpf/btf.html
|
||||
//
|
||||
// The API is very much unstable. You should only use this via the main
|
||||
// ebpf library.
|
||||
package btf
|
|
@ -0,0 +1,182 @@
|
|||
package btf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/cilium/ebpf/internal"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
type btfExtHeader struct {
|
||||
Magic uint16
|
||||
Version uint8
|
||||
Flags uint8
|
||||
HdrLen uint32
|
||||
|
||||
FuncInfoOff uint32
|
||||
FuncInfoLen uint32
|
||||
LineInfoOff uint32
|
||||
LineInfoLen uint32
|
||||
}
|
||||
|
||||
func parseExtInfos(r io.ReadSeeker, bo binary.ByteOrder, strings stringTable) (funcInfo, lineInfo map[string]extInfo, err error) {
|
||||
var header btfExtHeader
|
||||
if err := binary.Read(r, bo, &header); err != nil {
|
||||
return nil, nil, xerrors.Errorf("can't read header: %v", err)
|
||||
}
|
||||
|
||||
if header.Magic != btfMagic {
|
||||
return nil, nil, xerrors.Errorf("incorrect magic value %v", header.Magic)
|
||||
}
|
||||
|
||||
if header.Version != 1 {
|
||||
return nil, nil, xerrors.Errorf("unexpected version %v", header.Version)
|
||||
}
|
||||
|
||||
if header.Flags != 0 {
|
||||
return nil, nil, xerrors.Errorf("unsupported flags %v", header.Flags)
|
||||
}
|
||||
|
||||
remainder := int64(header.HdrLen) - int64(binary.Size(&header))
|
||||
if remainder < 0 {
|
||||
return nil, nil, xerrors.New("header is too short")
|
||||
}
|
||||
|
||||
// Of course, the .BTF.ext header has different semantics than the
|
||||
// .BTF ext header. We need to ignore non-null values.
|
||||
_, err = io.CopyN(ioutil.Discard, r, remainder)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("header padding: %v", err)
|
||||
}
|
||||
|
||||
if _, err := r.Seek(int64(header.HdrLen+header.FuncInfoOff), io.SeekStart); err != nil {
|
||||
return nil, nil, xerrors.Errorf("can't seek to function info section: %v", err)
|
||||
}
|
||||
|
||||
funcInfo, err = parseExtInfo(io.LimitReader(r, int64(header.FuncInfoLen)), bo, strings)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("function info: %w", err)
|
||||
}
|
||||
|
||||
if _, err := r.Seek(int64(header.HdrLen+header.LineInfoOff), io.SeekStart); err != nil {
|
||||
return nil, nil, xerrors.Errorf("can't seek to line info section: %v", err)
|
||||
}
|
||||
|
||||
lineInfo, err = parseExtInfo(io.LimitReader(r, int64(header.LineInfoLen)), bo, strings)
|
||||
if err != nil {
|
||||
return nil, nil, xerrors.Errorf("line info: %w", err)
|
||||
}
|
||||
|
||||
return funcInfo, lineInfo, nil
|
||||
}
|
||||
|
||||
type btfExtInfoSec struct {
|
||||
SecNameOff uint32
|
||||
NumInfo uint32
|
||||
}
|
||||
|
||||
type extInfoRecord struct {
|
||||
InsnOff uint64
|
||||
Opaque []byte
|
||||
}
|
||||
|
||||
type extInfo struct {
|
||||
recordSize uint32
|
||||
records []extInfoRecord
|
||||
}
|
||||
|
||||
func (ei extInfo) append(other extInfo, offset uint64) (extInfo, error) {
|
||||
if other.recordSize != ei.recordSize {
|
||||
return extInfo{}, xerrors.Errorf("ext_info record size mismatch, want %d (got %d)", ei.recordSize, other.recordSize)
|
||||
}
|
||||
|
||||
records := make([]extInfoRecord, 0, len(ei.records)+len(other.records))
|
||||
records = append(records, ei.records...)
|
||||
for _, info := range other.records {
|
||||
records = append(records, extInfoRecord{
|
||||
InsnOff: info.InsnOff + offset,
|
||||
Opaque: info.Opaque,
|
||||
})
|
||||
}
|
||||
return extInfo{ei.recordSize, records}, nil
|
||||
}
|
||||
|
||||
func (ei extInfo) MarshalBinary() ([]byte, error) {
|
||||
if len(ei.records) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
buf := bytes.NewBuffer(make([]byte, 0, int(ei.recordSize)*len(ei.records)))
|
||||
for _, info := range ei.records {
|
||||
// The kernel expects offsets in number of raw bpf instructions,
|
||||
// while the ELF tracks it in bytes.
|
||||
insnOff := uint32(info.InsnOff / asm.InstructionSize)
|
||||
if err := binary.Write(buf, internal.NativeEndian, insnOff); err != nil {
|
||||
return nil, xerrors.Errorf("can't write instruction offset: %v", err)
|
||||
}
|
||||
|
||||
buf.Write(info.Opaque)
|
||||
}
|
||||
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
func parseExtInfo(r io.Reader, bo binary.ByteOrder, strings stringTable) (map[string]extInfo, error) {
|
||||
var recordSize uint32
|
||||
if err := binary.Read(r, bo, &recordSize); err != nil {
|
||||
return nil, xerrors.Errorf("can't read record size: %v", err)
|
||||
}
|
||||
|
||||
if recordSize < 4 {
|
||||
// Need at least insnOff
|
||||
return nil, xerrors.New("record size too short")
|
||||
}
|
||||
|
||||
result := make(map[string]extInfo)
|
||||
for {
|
||||
var infoHeader btfExtInfoSec
|
||||
if err := binary.Read(r, bo, &infoHeader); err == io.EOF {
|
||||
return result, nil
|
||||
} else if err != nil {
|
||||
return nil, xerrors.Errorf("can't read ext info header: %v", err)
|
||||
}
|
||||
|
||||
secName, err := strings.Lookup(infoHeader.SecNameOff)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't get section name: %w", err)
|
||||
}
|
||||
|
||||
if infoHeader.NumInfo == 0 {
|
||||
return nil, xerrors.Errorf("section %s has invalid number of records", secName)
|
||||
}
|
||||
|
||||
var records []extInfoRecord
|
||||
for i := uint32(0); i < infoHeader.NumInfo; i++ {
|
||||
var byteOff uint32
|
||||
if err := binary.Read(r, bo, &byteOff); err != nil {
|
||||
return nil, xerrors.Errorf("section %v: can't read extended info offset: %v", secName, err)
|
||||
}
|
||||
|
||||
buf := make([]byte, int(recordSize-4))
|
||||
if _, err := io.ReadFull(r, buf); err != nil {
|
||||
return nil, xerrors.Errorf("section %v: can't read record: %v", secName, err)
|
||||
}
|
||||
|
||||
if byteOff%asm.InstructionSize != 0 {
|
||||
return nil, xerrors.Errorf("section %v: offset %v is not aligned with instruction size", secName, byteOff)
|
||||
}
|
||||
|
||||
records = append(records, extInfoRecord{uint64(byteOff), buf})
|
||||
}
|
||||
|
||||
result[secName] = extInfo{
|
||||
recordSize,
|
||||
records,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
package btf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
type stringTable []byte
|
||||
|
||||
func readStringTable(r io.Reader) (stringTable, error) {
|
||||
contents, err := ioutil.ReadAll(r)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't read string table: %v", err)
|
||||
}
|
||||
|
||||
if len(contents) < 1 {
|
||||
return nil, xerrors.New("string table is empty")
|
||||
}
|
||||
|
||||
if contents[0] != '\x00' {
|
||||
return nil, xerrors.New("first item in string table is non-empty")
|
||||
}
|
||||
|
||||
if contents[len(contents)-1] != '\x00' {
|
||||
return nil, xerrors.New("string table isn't null terminated")
|
||||
}
|
||||
|
||||
return stringTable(contents), nil
|
||||
}
|
||||
|
||||
func (st stringTable) Lookup(offset uint32) (string, error) {
|
||||
if int64(offset) > int64(^uint(0)>>1) {
|
||||
return "", xerrors.Errorf("offset %d overflows int", offset)
|
||||
}
|
||||
|
||||
pos := int(offset)
|
||||
if pos >= len(st) {
|
||||
return "", xerrors.Errorf("offset %d is out of bounds", offset)
|
||||
}
|
||||
|
||||
if pos > 0 && st[pos-1] != '\x00' {
|
||||
return "", xerrors.Errorf("offset %d isn't start of a string", offset)
|
||||
}
|
||||
|
||||
str := st[pos:]
|
||||
end := bytes.IndexByte(str, '\x00')
|
||||
if end == -1 {
|
||||
return "", xerrors.Errorf("offset %d isn't null terminated", offset)
|
||||
}
|
||||
|
||||
return string(str[:end]), nil
|
||||
}
|
||||
|
||||
func (st stringTable) LookupName(offset uint32) (Name, error) {
|
||||
str, err := st.Lookup(offset)
|
||||
return Name(str), err
|
||||
}
|
|
@ -0,0 +1,586 @@
|
|||
package btf
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
const maxTypeDepth = 32
|
||||
|
||||
// TypeID identifies a type in a BTF section.
|
||||
type TypeID uint32
|
||||
|
||||
// ID implements part of the Type interface.
|
||||
func (tid TypeID) ID() TypeID {
|
||||
return tid
|
||||
}
|
||||
|
||||
// Type represents a type described by BTF.
|
||||
type Type interface {
|
||||
ID() TypeID
|
||||
|
||||
// Make a copy of the type, without copying Type members.
|
||||
copy() Type
|
||||
|
||||
walk(*copyStack)
|
||||
}
|
||||
|
||||
// Name identifies a type.
|
||||
//
|
||||
// Anonymous types have an empty name.
|
||||
type Name string
|
||||
|
||||
func (n Name) name() string {
|
||||
return string(n)
|
||||
}
|
||||
|
||||
// Void is the unit type of BTF.
|
||||
type Void struct{}
|
||||
|
||||
func (v Void) ID() TypeID { return 0 }
|
||||
func (v Void) copy() Type { return Void{} }
|
||||
func (v Void) walk(*copyStack) {}
|
||||
|
||||
// Int is an integer of a given length.
|
||||
type Int struct {
|
||||
TypeID
|
||||
Name
|
||||
|
||||
// The size of the integer in bytes.
|
||||
Size uint32
|
||||
}
|
||||
|
||||
func (i *Int) size() uint32 { return i.Size }
|
||||
func (i *Int) walk(*copyStack) {}
|
||||
func (i *Int) copy() Type {
|
||||
cpy := *i
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Pointer is a pointer to another type.
|
||||
type Pointer struct {
|
||||
TypeID
|
||||
Target Type
|
||||
}
|
||||
|
||||
func (p *Pointer) size() uint32 { return 8 }
|
||||
func (p *Pointer) walk(cs *copyStack) { cs.push(&p.Target) }
|
||||
func (p *Pointer) copy() Type {
|
||||
cpy := *p
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Array is an array with a fixed number of elements.
|
||||
type Array struct {
|
||||
TypeID
|
||||
Type Type
|
||||
Nelems uint32
|
||||
}
|
||||
|
||||
func (arr *Array) walk(cs *copyStack) { cs.push(&arr.Type) }
|
||||
func (arr *Array) copy() Type {
|
||||
cpy := *arr
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Struct is a compound type of consecutive members.
|
||||
type Struct struct {
|
||||
TypeID
|
||||
Name
|
||||
// The size of the struct including padding, in bytes
|
||||
Size uint32
|
||||
Members []Member
|
||||
}
|
||||
|
||||
func (s *Struct) size() uint32 { return s.Size }
|
||||
|
||||
func (s *Struct) walk(cs *copyStack) {
|
||||
for i := range s.Members {
|
||||
cs.push(&s.Members[i].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Struct) copy() Type {
|
||||
cpy := *s
|
||||
cpy.Members = make([]Member, len(s.Members))
|
||||
copy(cpy.Members, s.Members)
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Union is a compound type where members occupy the same memory.
|
||||
type Union struct {
|
||||
TypeID
|
||||
Name
|
||||
// The size of the union including padding, in bytes.
|
||||
Size uint32
|
||||
Members []Member
|
||||
}
|
||||
|
||||
func (u *Union) size() uint32 { return u.Size }
|
||||
|
||||
func (u *Union) walk(cs *copyStack) {
|
||||
for i := range u.Members {
|
||||
cs.push(&u.Members[i].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func (u *Union) copy() Type {
|
||||
cpy := *u
|
||||
cpy.Members = make([]Member, len(u.Members))
|
||||
copy(cpy.Members, u.Members)
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Member is part of a Struct or Union.
|
||||
//
|
||||
// It is not a valid Type.
|
||||
type Member struct {
|
||||
Name
|
||||
Type Type
|
||||
Offset uint32
|
||||
}
|
||||
|
||||
// Enum lists possible values.
|
||||
type Enum struct {
|
||||
TypeID
|
||||
Name
|
||||
}
|
||||
|
||||
func (e *Enum) size() uint32 { return 4 }
|
||||
func (e *Enum) walk(*copyStack) {}
|
||||
func (e *Enum) copy() Type {
|
||||
cpy := *e
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Fwd is a forward declaration of a Type.
|
||||
type Fwd struct {
|
||||
TypeID
|
||||
Name
|
||||
}
|
||||
|
||||
func (f *Fwd) walk(*copyStack) {}
|
||||
func (f *Fwd) copy() Type {
|
||||
cpy := *f
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Typedef is an alias of a Type.
|
||||
type Typedef struct {
|
||||
TypeID
|
||||
Name
|
||||
Type Type
|
||||
}
|
||||
|
||||
func (td *Typedef) walk(cs *copyStack) { cs.push(&td.Type) }
|
||||
func (td *Typedef) copy() Type {
|
||||
cpy := *td
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Volatile is a modifier.
|
||||
type Volatile struct {
|
||||
TypeID
|
||||
Type Type
|
||||
}
|
||||
|
||||
func (v *Volatile) walk(cs *copyStack) { cs.push(&v.Type) }
|
||||
func (v *Volatile) copy() Type {
|
||||
cpy := *v
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Const is a modifier.
|
||||
type Const struct {
|
||||
TypeID
|
||||
Type Type
|
||||
}
|
||||
|
||||
func (c *Const) walk(cs *copyStack) { cs.push(&c.Type) }
|
||||
func (c *Const) copy() Type {
|
||||
cpy := *c
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Restrict is a modifier.
|
||||
type Restrict struct {
|
||||
TypeID
|
||||
Type Type
|
||||
}
|
||||
|
||||
func (r *Restrict) walk(cs *copyStack) { cs.push(&r.Type) }
|
||||
func (r *Restrict) copy() Type {
|
||||
cpy := *r
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Func is a function definition.
|
||||
type Func struct {
|
||||
TypeID
|
||||
Name
|
||||
Type Type
|
||||
}
|
||||
|
||||
func (f *Func) walk(cs *copyStack) { cs.push(&f.Type) }
|
||||
func (f *Func) copy() Type {
|
||||
cpy := *f
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// FuncProto is a function declaration.
|
||||
type FuncProto struct {
|
||||
TypeID
|
||||
Return Type
|
||||
// Parameters not supported yet
|
||||
}
|
||||
|
||||
func (fp *FuncProto) walk(cs *copyStack) { cs.push(&fp.Return) }
|
||||
func (fp *FuncProto) copy() Type {
|
||||
cpy := *fp
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Var is a global variable.
|
||||
type Var struct {
|
||||
TypeID
|
||||
Name
|
||||
Type Type
|
||||
}
|
||||
|
||||
func (v *Var) walk(cs *copyStack) { cs.push(&v.Type) }
|
||||
func (v *Var) copy() Type {
|
||||
cpy := *v
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Datasec is a global program section containing data.
|
||||
type Datasec struct {
|
||||
TypeID
|
||||
Name
|
||||
Size uint32
|
||||
Vars []VarSecinfo
|
||||
}
|
||||
|
||||
func (ds *Datasec) size() uint32 { return ds.Size }
|
||||
|
||||
func (ds *Datasec) walk(cs *copyStack) {
|
||||
for i := range ds.Vars {
|
||||
cs.push(&ds.Vars[i].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func (ds *Datasec) copy() Type {
|
||||
cpy := *ds
|
||||
cpy.Vars = make([]VarSecinfo, len(ds.Vars))
|
||||
copy(cpy.Vars, ds.Vars)
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// VarSecinfo describes variable in a Datasec
|
||||
type VarSecinfo struct {
|
||||
Type Type
|
||||
Offset uint32
|
||||
Size uint32
|
||||
}
|
||||
|
||||
type sizer interface {
|
||||
size() uint32
|
||||
}
|
||||
|
||||
var (
|
||||
_ sizer = (*Int)(nil)
|
||||
_ sizer = (*Pointer)(nil)
|
||||
_ sizer = (*Struct)(nil)
|
||||
_ sizer = (*Union)(nil)
|
||||
_ sizer = (*Enum)(nil)
|
||||
_ sizer = (*Datasec)(nil)
|
||||
)
|
||||
|
||||
// Sizeof returns the size of a type in bytes.
|
||||
//
|
||||
// Returns an error if the size can't be computed.
|
||||
func Sizeof(typ Type) (int, error) {
|
||||
var (
|
||||
n = int64(1)
|
||||
elem int64
|
||||
)
|
||||
|
||||
for i := 0; i < maxTypeDepth; i++ {
|
||||
switch v := typ.(type) {
|
||||
case *Array:
|
||||
if n > 0 && int64(v.Nelems) > math.MaxInt64/n {
|
||||
return 0, xerrors.New("overflow")
|
||||
}
|
||||
|
||||
// Arrays may be of zero length, which allows
|
||||
// n to be zero as well.
|
||||
n *= int64(v.Nelems)
|
||||
typ = v.Type
|
||||
continue
|
||||
|
||||
case sizer:
|
||||
elem = int64(v.size())
|
||||
|
||||
case *Typedef:
|
||||
typ = v.Type
|
||||
continue
|
||||
case *Volatile:
|
||||
typ = v.Type
|
||||
continue
|
||||
case *Const:
|
||||
typ = v.Type
|
||||
continue
|
||||
case *Restrict:
|
||||
typ = v.Type
|
||||
continue
|
||||
|
||||
default:
|
||||
return 0, xerrors.Errorf("unrecognized type %T", typ)
|
||||
}
|
||||
|
||||
if n > 0 && elem > math.MaxInt64/n {
|
||||
return 0, xerrors.New("overflow")
|
||||
}
|
||||
|
||||
size := n * elem
|
||||
if int64(int(size)) != size {
|
||||
return 0, xerrors.New("overflow")
|
||||
}
|
||||
|
||||
return int(size), nil
|
||||
}
|
||||
|
||||
return 0, xerrors.New("exceeded type depth")
|
||||
}
|
||||
|
||||
// copy a Type recursively.
|
||||
//
|
||||
// typ may form a cycle.
|
||||
func copyType(typ Type) Type {
|
||||
var (
|
||||
copies = make(map[Type]Type)
|
||||
work copyStack
|
||||
)
|
||||
|
||||
for t := &typ; t != nil; t = work.pop() {
|
||||
// *t is the identity of the type.
|
||||
if cpy := copies[*t]; cpy != nil {
|
||||
*t = cpy
|
||||
continue
|
||||
}
|
||||
|
||||
cpy := (*t).copy()
|
||||
copies[*t] = cpy
|
||||
*t = cpy
|
||||
|
||||
// Mark any nested types for copying.
|
||||
cpy.walk(&work)
|
||||
}
|
||||
|
||||
return typ
|
||||
}
|
||||
|
||||
// copyStack keeps track of pointers to types which still
|
||||
// need to be copied.
|
||||
type copyStack []*Type
|
||||
|
||||
// push adds a type to the stack.
|
||||
func (cs *copyStack) push(t *Type) {
|
||||
*cs = append(*cs, t)
|
||||
}
|
||||
|
||||
// pop returns the topmost Type, or nil.
|
||||
func (cs *copyStack) pop() *Type {
|
||||
n := len(*cs)
|
||||
if n == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
t := (*cs)[n-1]
|
||||
*cs = (*cs)[:n-1]
|
||||
return t
|
||||
}
|
||||
|
||||
type namer interface {
|
||||
name() string
|
||||
}
|
||||
|
||||
var _ namer = Name("")
|
||||
|
||||
// inflateRawTypes takes a list of raw btf types linked via type IDs, and turns
|
||||
// it into a graph of Types connected via pointers.
|
||||
//
|
||||
// Returns a map of named types (so, where NameOff is non-zero). Since BTF ignores
|
||||
// compilation units, multiple types may share the same name. A Type may form a
|
||||
// cyclic graph by pointing at itself.
|
||||
func inflateRawTypes(rawTypes []rawType, rawStrings stringTable) (namedTypes map[string][]Type, err error) {
|
||||
type fixupDef struct {
|
||||
id TypeID
|
||||
expectedKind btfKind
|
||||
typ *Type
|
||||
}
|
||||
|
||||
var fixups []fixupDef
|
||||
fixup := func(id TypeID, expectedKind btfKind, typ *Type) {
|
||||
fixups = append(fixups, fixupDef{id, expectedKind, typ})
|
||||
}
|
||||
|
||||
convertMembers := func(raw []btfMember) ([]Member, error) {
|
||||
// NB: The fixup below relies on pre-allocating this array to
|
||||
// work, since otherwise append might re-allocate members.
|
||||
members := make([]Member, 0, len(raw))
|
||||
for i, btfMember := range raw {
|
||||
name, err := rawStrings.LookupName(btfMember.NameOff)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't get name for member %d: %w", i, err)
|
||||
}
|
||||
members = append(members, Member{
|
||||
Name: name,
|
||||
Offset: btfMember.Offset,
|
||||
})
|
||||
}
|
||||
for i := range members {
|
||||
fixup(raw[i].Type, kindUnknown, &members[i].Type)
|
||||
}
|
||||
return members, nil
|
||||
}
|
||||
|
||||
types := make([]Type, 0, len(rawTypes))
|
||||
types = append(types, Void{})
|
||||
namedTypes = make(map[string][]Type)
|
||||
|
||||
for i, raw := range rawTypes {
|
||||
var (
|
||||
// Void is defined to always be type ID 0, and is thus
|
||||
// omitted from BTF.
|
||||
id = TypeID(i + 1)
|
||||
typ Type
|
||||
)
|
||||
|
||||
name, err := rawStrings.LookupName(raw.NameOff)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't get name for type id %d: %w", id, err)
|
||||
}
|
||||
|
||||
switch raw.Kind() {
|
||||
case kindInt:
|
||||
typ = &Int{id, name, raw.Size()}
|
||||
|
||||
case kindPointer:
|
||||
ptr := &Pointer{id, nil}
|
||||
fixup(raw.Type(), kindUnknown, &ptr.Target)
|
||||
typ = ptr
|
||||
|
||||
case kindArray:
|
||||
btfArr := raw.data.(*btfArray)
|
||||
|
||||
// IndexType is unused according to btf.rst.
|
||||
// Don't make it available right now.
|
||||
arr := &Array{id, nil, btfArr.Nelems}
|
||||
fixup(btfArr.Type, kindUnknown, &arr.Type)
|
||||
typ = arr
|
||||
|
||||
case kindStruct:
|
||||
members, err := convertMembers(raw.data.([]btfMember))
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("struct %s (id %d): %w", name, id, err)
|
||||
}
|
||||
typ = &Struct{id, name, raw.Size(), members}
|
||||
|
||||
case kindUnion:
|
||||
members, err := convertMembers(raw.data.([]btfMember))
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("union %s (id %d): %w", name, id, err)
|
||||
}
|
||||
typ = &Union{id, name, raw.Size(), members}
|
||||
|
||||
case kindEnum:
|
||||
typ = &Enum{id, name}
|
||||
|
||||
case kindForward:
|
||||
typ = &Fwd{id, name}
|
||||
|
||||
case kindTypedef:
|
||||
typedef := &Typedef{id, name, nil}
|
||||
fixup(raw.Type(), kindUnknown, &typedef.Type)
|
||||
typ = typedef
|
||||
|
||||
case kindVolatile:
|
||||
volatile := &Volatile{id, nil}
|
||||
fixup(raw.Type(), kindUnknown, &volatile.Type)
|
||||
typ = volatile
|
||||
|
||||
case kindConst:
|
||||
cnst := &Const{id, nil}
|
||||
fixup(raw.Type(), kindUnknown, &cnst.Type)
|
||||
typ = cnst
|
||||
|
||||
case kindRestrict:
|
||||
restrict := &Restrict{id, nil}
|
||||
fixup(raw.Type(), kindUnknown, &restrict.Type)
|
||||
typ = restrict
|
||||
|
||||
case kindFunc:
|
||||
fn := &Func{id, name, nil}
|
||||
fixup(raw.Type(), kindFuncProto, &fn.Type)
|
||||
typ = fn
|
||||
|
||||
case kindFuncProto:
|
||||
fp := &FuncProto{id, nil}
|
||||
fixup(raw.Type(), kindUnknown, &fp.Return)
|
||||
typ = fp
|
||||
|
||||
case kindVar:
|
||||
v := &Var{id, name, nil}
|
||||
fixup(raw.Type(), kindUnknown, &v.Type)
|
||||
typ = v
|
||||
|
||||
case kindDatasec:
|
||||
btfVars := raw.data.([]btfVarSecinfo)
|
||||
vars := make([]VarSecinfo, 0, len(btfVars))
|
||||
for _, btfVar := range btfVars {
|
||||
vars = append(vars, VarSecinfo{
|
||||
Offset: btfVar.Offset,
|
||||
Size: btfVar.Size,
|
||||
})
|
||||
}
|
||||
for i := range vars {
|
||||
fixup(btfVars[i].Type, kindVar, &vars[i].Type)
|
||||
}
|
||||
typ = &Datasec{id, name, raw.SizeType, vars}
|
||||
|
||||
default:
|
||||
return nil, xerrors.Errorf("type id %d: unknown kind: %v", id, raw.Kind())
|
||||
}
|
||||
|
||||
types = append(types, typ)
|
||||
|
||||
if namer, ok := typ.(namer); ok {
|
||||
if name := namer.name(); name != "" {
|
||||
namedTypes[name] = append(namedTypes[name], typ)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, fixup := range fixups {
|
||||
i := int(fixup.id)
|
||||
if i >= len(types) {
|
||||
return nil, xerrors.Errorf("reference to invalid type id: %d", fixup.id)
|
||||
}
|
||||
|
||||
// Default void (id 0) to unknown
|
||||
rawKind := kindUnknown
|
||||
if i > 0 {
|
||||
rawKind = rawTypes[i-1].Kind()
|
||||
}
|
||||
|
||||
if expected := fixup.expectedKind; expected != kindUnknown && rawKind != expected {
|
||||
return nil, xerrors.Errorf("expected type id %d to have kind %s, found %s", fixup.id, expected, rawKind)
|
||||
}
|
||||
|
||||
*fixup.typ = types[i]
|
||||
}
|
||||
|
||||
return namedTypes, nil
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
package internal
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var sysCPU struct {
|
||||
once sync.Once
|
||||
err error
|
||||
num int
|
||||
}
|
||||
|
||||
// PossibleCPUs returns the max number of CPUs a system may possibly have
|
||||
// Logical CPU numbers must be of the form 0-n
|
||||
func PossibleCPUs() (int, error) {
|
||||
sysCPU.once.Do(func() {
|
||||
sysCPU.num, sysCPU.err = parseCPUsFromFile("/sys/devices/system/cpu/possible")
|
||||
})
|
||||
|
||||
return sysCPU.num, sysCPU.err
|
||||
}
|
||||
|
||||
func parseCPUsFromFile(path string) (int, error) {
|
||||
spec, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
n, err := parseCPUs(string(spec))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("can't parse %s: %v", path, err)
|
||||
}
|
||||
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// parseCPUs parses the number of cpus from a string produced
|
||||
// by bitmap_list_string() in the Linux kernel.
|
||||
// Multiple ranges are rejected, since they can't be unified
|
||||
// into a single number.
|
||||
// This is the format of /sys/devices/system/cpu/possible, it
|
||||
// is not suitable for /sys/devices/system/cpu/online, etc.
|
||||
func parseCPUs(spec string) (int, error) {
|
||||
if strings.Trim(spec, "\n") == "0" {
|
||||
return 1, nil
|
||||
}
|
||||
|
||||
var low, high int
|
||||
n, err := fmt.Sscanf(spec, "%d-%d\n", &low, &high)
|
||||
if n != 2 || err != nil {
|
||||
return 0, fmt.Errorf("invalid format: %s", spec)
|
||||
}
|
||||
if low != 0 {
|
||||
return 0, fmt.Errorf("CPU spec doesn't start at zero: %s", spec)
|
||||
}
|
||||
|
||||
// cpus is 0 indexed
|
||||
return high + 1, nil
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
package internal
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
|
||||
// depending on the host's endianness.
|
||||
var NativeEndian binary.ByteOrder
|
||||
|
||||
func init() {
|
||||
if isBigEndian() {
|
||||
NativeEndian = binary.BigEndian
|
||||
} else {
|
||||
NativeEndian = binary.LittleEndian
|
||||
}
|
||||
}
|
||||
|
||||
func isBigEndian() (ret bool) {
|
||||
i := int(0x1)
|
||||
bs := (*[int(unsafe.Sizeof(i))]byte)(unsafe.Pointer(&i))
|
||||
return bs[0] == 0
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package internal
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/cilium/ebpf/internal/unix"
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
// ErrorWithLog returns an error that includes logs from the
|
||||
// kernel verifier.
|
||||
//
|
||||
// logErr should be the error returned by the syscall that generated
|
||||
// the log. It is used to check for truncation of the output.
|
||||
func ErrorWithLog(err error, log []byte, logErr error) error {
|
||||
logStr := strings.Trim(CString(log), "\t\r\n ")
|
||||
if xerrors.Is(logErr, unix.ENOSPC) {
|
||||
logStr += " (truncated...)"
|
||||
}
|
||||
|
||||
return &VerifierError{err, logStr}
|
||||
}
|
||||
|
||||
// VerifierError includes information from the eBPF verifier.
|
||||
type VerifierError struct {
|
||||
cause error
|
||||
log string
|
||||
}
|
||||
|
||||
func (le *VerifierError) Error() string {
|
||||
if le.log == "" {
|
||||
return le.cause.Error()
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s: %s", le.cause, le.log)
|
||||
}
|
||||
|
||||
// CString turns a NUL / zero terminated byte buffer into a string.
|
||||
func CString(in []byte) string {
|
||||
inLen := bytes.IndexByte(in, 0)
|
||||
if inLen == -1 {
|
||||
return ""
|
||||
}
|
||||
return string(in[:inLen])
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
package internal
|
||||
|
||||
import (
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
|
||||
"github.com/cilium/ebpf/internal/unix"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
var ErrClosedFd = xerrors.New("use of closed file descriptor")
|
||||
|
||||
type FD struct {
|
||||
raw int64
|
||||
}
|
||||
|
||||
func NewFD(value uint32) *FD {
|
||||
fd := &FD{int64(value)}
|
||||
runtime.SetFinalizer(fd, (*FD).Close)
|
||||
return fd
|
||||
}
|
||||
|
||||
func (fd *FD) String() string {
|
||||
return strconv.FormatInt(fd.raw, 10)
|
||||
}
|
||||
|
||||
func (fd *FD) Value() (uint32, error) {
|
||||
if fd.raw < 0 {
|
||||
return 0, ErrClosedFd
|
||||
}
|
||||
|
||||
return uint32(fd.raw), nil
|
||||
}
|
||||
|
||||
func (fd *FD) Close() error {
|
||||
if fd.raw < 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
value := int(fd.raw)
|
||||
fd.raw = -1
|
||||
|
||||
fd.Forget()
|
||||
return unix.Close(value)
|
||||
}
|
||||
|
||||
func (fd *FD) Forget() {
|
||||
runtime.SetFinalizer(fd, nil)
|
||||
}
|
||||
|
||||
func (fd *FD) Dup() (*FD, error) {
|
||||
if fd.raw < 0 {
|
||||
return nil, ErrClosedFd
|
||||
}
|
||||
|
||||
dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't dup fd: %v", err)
|
||||
}
|
||||
|
||||
return NewFD(uint32(dup)), nil
|
||||
}
|
||||
|
||||
func (fd *FD) File(name string) *os.File {
|
||||
fd.Forget()
|
||||
return os.NewFile(uintptr(fd.raw), name)
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
package internal
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
// ErrNotSupported indicates that a feature is not supported by the current kernel.
|
||||
var ErrNotSupported = xerrors.New("not supported")
|
||||
|
||||
// UnsupportedFeatureError is returned by FeatureTest() functions.
|
||||
type UnsupportedFeatureError struct {
|
||||
// The minimum Linux mainline version required for this feature.
|
||||
// Used for the error string, and for sanity checking during testing.
|
||||
MinimumVersion Version
|
||||
|
||||
// The name of the feature that isn't supported.
|
||||
Name string
|
||||
}
|
||||
|
||||
func (ufe *UnsupportedFeatureError) Error() string {
|
||||
return fmt.Sprintf("%s not supported (requires >= %s)", ufe.Name, ufe.MinimumVersion)
|
||||
}
|
||||
|
||||
// Is indicates that UnsupportedFeatureError is ErrNotSupported.
|
||||
func (ufe *UnsupportedFeatureError) Is(target error) bool {
|
||||
return target == ErrNotSupported
|
||||
}
|
||||
|
||||
type featureTest struct {
|
||||
sync.Mutex
|
||||
successful bool
|
||||
result error
|
||||
}
|
||||
|
||||
// FeatureTestFn is used to determine whether the kernel supports
|
||||
// a certain feature.
|
||||
//
|
||||
// The return values have the following semantics:
|
||||
//
|
||||
// err != nil: the test couldn't be executed
|
||||
// err == nil && available: the feature is available
|
||||
// err == nil && !available: the feature isn't available
|
||||
type FeatureTestFn func() (available bool, err error)
|
||||
|
||||
// FeatureTest wraps a function so that it is run at most once.
|
||||
//
|
||||
// name should identify the tested feature, while version must be in the
|
||||
// form Major.Minor[.Patch].
|
||||
//
|
||||
// Returns an error wrapping ErrNotSupported if the feature is not supported.
|
||||
func FeatureTest(name, version string, fn FeatureTestFn) func() error {
|
||||
v, err := NewVersion(version)
|
||||
if err != nil {
|
||||
return func() error { return err }
|
||||
}
|
||||
|
||||
ft := new(featureTest)
|
||||
return func() error {
|
||||
ft.Lock()
|
||||
defer ft.Unlock()
|
||||
|
||||
if ft.successful {
|
||||
return ft.result
|
||||
}
|
||||
|
||||
available, err := fn()
|
||||
if xerrors.Is(err, ErrNotSupported) {
|
||||
// The feature test aborted because a dependent feature
|
||||
// is missing, which we should cache.
|
||||
available = false
|
||||
} else if err != nil {
|
||||
// We couldn't execute the feature test to a point
|
||||
// where it could make a determination.
|
||||
// Don't cache the result, just return it.
|
||||
return xerrors.Errorf("can't detect support for %s: %w", name, err)
|
||||
}
|
||||
|
||||
ft.successful = true
|
||||
if !available {
|
||||
ft.result = &UnsupportedFeatureError{
|
||||
MinimumVersion: v,
|
||||
Name: name,
|
||||
}
|
||||
}
|
||||
return ft.result
|
||||
}
|
||||
}
|
||||
|
||||
// A Version in the form Major.Minor.Patch.
|
||||
type Version [3]uint16
|
||||
|
||||
// NewVersion creates a version from a string like "Major.Minor.Patch".
|
||||
//
|
||||
// Patch is optional.
|
||||
func NewVersion(ver string) (Version, error) {
|
||||
var major, minor, patch uint16
|
||||
n, _ := fmt.Sscanf(ver, "%d.%d.%d", &major, &minor, &patch)
|
||||
if n < 2 {
|
||||
return Version{}, xerrors.Errorf("invalid version: %s", ver)
|
||||
}
|
||||
return Version{major, minor, patch}, nil
|
||||
}
|
||||
|
||||
func (v Version) String() string {
|
||||
if v[2] == 0 {
|
||||
return fmt.Sprintf("v%d.%d", v[0], v[1])
|
||||
}
|
||||
return fmt.Sprintf("v%d.%d.%d", v[0], v[1], v[2])
|
||||
}
|
||||
|
||||
// Less returns true if the version is less than another version.
|
||||
func (v Version) Less(other Version) bool {
|
||||
for i, a := range v {
|
||||
if a == other[i] {
|
||||
continue
|
||||
}
|
||||
return a < other[i]
|
||||
}
|
||||
return false
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
package internal
|
||||
|
||||
import "golang.org/x/xerrors"
|
||||
|
||||
// DiscardZeroes makes sure that all written bytes are zero
|
||||
// before discarding them.
|
||||
type DiscardZeroes struct{}
|
||||
|
||||
func (DiscardZeroes) Write(p []byte) (int, error) {
|
||||
for _, b := range p {
|
||||
if b != 0 {
|
||||
return 0, xerrors.New("encountered non-zero byte")
|
||||
}
|
||||
}
|
||||
return len(p), nil
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
package internal
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// NewPointer creates a 64-bit pointer from an unsafe Pointer.
|
||||
func NewPointer(ptr unsafe.Pointer) Pointer {
|
||||
return Pointer{ptr: ptr}
|
||||
}
|
||||
|
||||
// NewSlicePointer creates a 64-bit pointer from a byte slice.
|
||||
func NewSlicePointer(buf []byte) Pointer {
|
||||
if len(buf) == 0 {
|
||||
return Pointer{}
|
||||
}
|
||||
|
||||
return Pointer{ptr: unsafe.Pointer(&buf[0])}
|
||||
}
|
||||
|
||||
// NewStringPointer creates a 64-bit pointer from a string.
|
||||
func NewStringPointer(str string) Pointer {
|
||||
if str == "" {
|
||||
return Pointer{}
|
||||
}
|
||||
|
||||
// The kernel expects strings to be zero terminated
|
||||
buf := make([]byte, len(str)+1)
|
||||
copy(buf, str)
|
||||
|
||||
return Pointer{ptr: unsafe.Pointer(&buf[0])}
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
// +build armbe mips mips64p32
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// Pointer wraps an unsafe.Pointer to be 64bit to
|
||||
// conform to the syscall specification.
|
||||
type Pointer struct {
|
||||
pad uint32
|
||||
ptr unsafe.Pointer
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
// +build 386 amd64p32 arm mipsle mips64p32le
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// Pointer wraps an unsafe.Pointer to be 64bit to
|
||||
// conform to the syscall specification.
|
||||
type Pointer struct {
|
||||
ptr unsafe.Pointer
|
||||
pad uint32
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
// +build !386,!amd64p32,!arm,!mipsle,!mips64p32le
|
||||
// +build !armbe,!mips,!mips64p32
|
||||
|
||||
package internal
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// Pointer wraps an unsafe.Pointer to be 64bit to
|
||||
// conform to the syscall specification.
|
||||
type Pointer struct {
|
||||
ptr unsafe.Pointer
|
||||
}
|
|
@ -0,0 +1,139 @@
|
|||
package internal
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cilium/ebpf/internal/unix"
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
//go:generate stringer -output syscall_string.go -type=BPFCmd
|
||||
|
||||
// BPFCmd identifies a subcommand of the bpf syscall.
|
||||
type BPFCmd int
|
||||
|
||||
// Well known BPF commands.
|
||||
const (
|
||||
BPF_MAP_CREATE BPFCmd = iota
|
||||
BPF_MAP_LOOKUP_ELEM
|
||||
BPF_MAP_UPDATE_ELEM
|
||||
BPF_MAP_DELETE_ELEM
|
||||
BPF_MAP_GET_NEXT_KEY
|
||||
BPF_PROG_LOAD
|
||||
BPF_OBJ_PIN
|
||||
BPF_OBJ_GET
|
||||
BPF_PROG_ATTACH
|
||||
BPF_PROG_DETACH
|
||||
BPF_PROG_TEST_RUN
|
||||
BPF_PROG_GET_NEXT_ID
|
||||
BPF_MAP_GET_NEXT_ID
|
||||
BPF_PROG_GET_FD_BY_ID
|
||||
BPF_MAP_GET_FD_BY_ID
|
||||
BPF_OBJ_GET_INFO_BY_FD
|
||||
BPF_PROG_QUERY
|
||||
BPF_RAW_TRACEPOINT_OPEN
|
||||
BPF_BTF_LOAD
|
||||
BPF_BTF_GET_FD_BY_ID
|
||||
BPF_TASK_FD_QUERY
|
||||
BPF_MAP_LOOKUP_AND_DELETE_ELEM
|
||||
BPF_MAP_FREEZE
|
||||
BPF_BTF_GET_NEXT_ID
|
||||
BPF_MAP_LOOKUP_BATCH
|
||||
BPF_MAP_LOOKUP_AND_DELETE_BATCH
|
||||
BPF_MAP_UPDATE_BATCH
|
||||
BPF_MAP_DELETE_BATCH
|
||||
BPF_LINK_CREATE
|
||||
BPF_LINK_UPDATE
|
||||
BPF_LINK_GET_FD_BY_ID
|
||||
BPF_LINK_GET_NEXT_ID
|
||||
BPF_ENABLE_STATS
|
||||
BPF_ITER_CREATE
|
||||
)
|
||||
|
||||
// BPF wraps SYS_BPF.
|
||||
//
|
||||
// Any pointers contained in attr must use the Pointer type from this package.
|
||||
func BPF(cmd BPFCmd, attr unsafe.Pointer, size uintptr) (uintptr, error) {
|
||||
r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size)
|
||||
runtime.KeepAlive(attr)
|
||||
|
||||
var err error
|
||||
if errNo != 0 {
|
||||
err = errNo
|
||||
}
|
||||
|
||||
return r1, err
|
||||
}
|
||||
|
||||
type BPFProgAttachAttr struct {
|
||||
TargetFd uint32
|
||||
AttachBpfFd uint32
|
||||
AttachType uint32
|
||||
AttachFlags uint32
|
||||
ReplaceBpfFd uint32
|
||||
}
|
||||
|
||||
func BPFProgAttach(attr *BPFProgAttachAttr) error {
|
||||
_, err := BPF(BPF_PROG_ATTACH, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
|
||||
return err
|
||||
}
|
||||
|
||||
type BPFProgDetachAttr struct {
|
||||
TargetFd uint32
|
||||
AttachBpfFd uint32
|
||||
AttachType uint32
|
||||
}
|
||||
|
||||
func BPFProgDetach(attr *BPFProgDetachAttr) error {
|
||||
_, err := BPF(BPF_PROG_DETACH, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
|
||||
return err
|
||||
}
|
||||
|
||||
type bpfObjAttr struct {
|
||||
fileName Pointer
|
||||
fd uint32
|
||||
fileFlags uint32
|
||||
}
|
||||
|
||||
const bpfFSType = 0xcafe4a11
|
||||
|
||||
// BPFObjPin wraps BPF_OBJ_PIN.
|
||||
func BPFObjPin(fileName string, fd *FD) error {
|
||||
dirName := filepath.Dir(fileName)
|
||||
var statfs unix.Statfs_t
|
||||
if err := unix.Statfs(dirName, &statfs); err != nil {
|
||||
return err
|
||||
}
|
||||
if uint64(statfs.Type) != bpfFSType {
|
||||
return xerrors.Errorf("%s is not on a bpf filesystem", fileName)
|
||||
}
|
||||
|
||||
value, err := fd.Value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := bpfObjAttr{
|
||||
fileName: NewStringPointer(fileName),
|
||||
fd: value,
|
||||
}
|
||||
_, err = BPF(BPF_OBJ_PIN, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
if err != nil {
|
||||
return xerrors.Errorf("pin object %s: %w", fileName, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// BPFObjGet wraps BPF_OBJ_GET.
|
||||
func BPFObjGet(fileName string) (*FD, error) {
|
||||
attr := bpfObjAttr{
|
||||
fileName: NewStringPointer(fileName),
|
||||
}
|
||||
ptr, err := BPF(BPF_OBJ_GET, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("get object %s: %w", fileName, err)
|
||||
}
|
||||
return NewFD(uint32(ptr)), nil
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
// Code generated by "stringer -output syscall_string.go -type=BPFCmd"; DO NOT EDIT.
|
||||
|
||||
package internal
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[BPF_MAP_CREATE-0]
|
||||
_ = x[BPF_MAP_LOOKUP_ELEM-1]
|
||||
_ = x[BPF_MAP_UPDATE_ELEM-2]
|
||||
_ = x[BPF_MAP_DELETE_ELEM-3]
|
||||
_ = x[BPF_MAP_GET_NEXT_KEY-4]
|
||||
_ = x[BPF_PROG_LOAD-5]
|
||||
_ = x[BPF_OBJ_PIN-6]
|
||||
_ = x[BPF_OBJ_GET-7]
|
||||
_ = x[BPF_PROG_ATTACH-8]
|
||||
_ = x[BPF_PROG_DETACH-9]
|
||||
_ = x[BPF_PROG_TEST_RUN-10]
|
||||
_ = x[BPF_PROG_GET_NEXT_ID-11]
|
||||
_ = x[BPF_MAP_GET_NEXT_ID-12]
|
||||
_ = x[BPF_PROG_GET_FD_BY_ID-13]
|
||||
_ = x[BPF_MAP_GET_FD_BY_ID-14]
|
||||
_ = x[BPF_OBJ_GET_INFO_BY_FD-15]
|
||||
_ = x[BPF_PROG_QUERY-16]
|
||||
_ = x[BPF_RAW_TRACEPOINT_OPEN-17]
|
||||
_ = x[BPF_BTF_LOAD-18]
|
||||
_ = x[BPF_BTF_GET_FD_BY_ID-19]
|
||||
_ = x[BPF_TASK_FD_QUERY-20]
|
||||
_ = x[BPF_MAP_LOOKUP_AND_DELETE_ELEM-21]
|
||||
_ = x[BPF_MAP_FREEZE-22]
|
||||
_ = x[BPF_BTF_GET_NEXT_ID-23]
|
||||
_ = x[BPF_MAP_LOOKUP_BATCH-24]
|
||||
_ = x[BPF_MAP_LOOKUP_AND_DELETE_BATCH-25]
|
||||
_ = x[BPF_MAP_UPDATE_BATCH-26]
|
||||
_ = x[BPF_MAP_DELETE_BATCH-27]
|
||||
_ = x[BPF_LINK_CREATE-28]
|
||||
_ = x[BPF_LINK_UPDATE-29]
|
||||
_ = x[BPF_LINK_GET_FD_BY_ID-30]
|
||||
_ = x[BPF_LINK_GET_NEXT_ID-31]
|
||||
_ = x[BPF_ENABLE_STATS-32]
|
||||
_ = x[BPF_ITER_CREATE-33]
|
||||
}
|
||||
|
||||
const _BPFCmd_name = "BPF_MAP_CREATEBPF_MAP_LOOKUP_ELEMBPF_MAP_UPDATE_ELEMBPF_MAP_DELETE_ELEMBPF_MAP_GET_NEXT_KEYBPF_PROG_LOADBPF_OBJ_PINBPF_OBJ_GETBPF_PROG_ATTACHBPF_PROG_DETACHBPF_PROG_TEST_RUNBPF_PROG_GET_NEXT_IDBPF_MAP_GET_NEXT_IDBPF_PROG_GET_FD_BY_IDBPF_MAP_GET_FD_BY_IDBPF_OBJ_GET_INFO_BY_FDBPF_PROG_QUERYBPF_RAW_TRACEPOINT_OPENBPF_BTF_LOADBPF_BTF_GET_FD_BY_IDBPF_TASK_FD_QUERYBPF_MAP_LOOKUP_AND_DELETE_ELEMBPF_MAP_FREEZEBPF_BTF_GET_NEXT_IDBPF_MAP_LOOKUP_BATCHBPF_MAP_LOOKUP_AND_DELETE_BATCHBPF_MAP_UPDATE_BATCHBPF_MAP_DELETE_BATCHBPF_LINK_CREATEBPF_LINK_UPDATEBPF_LINK_GET_FD_BY_IDBPF_LINK_GET_NEXT_IDBPF_ENABLE_STATSBPF_ITER_CREATE"
|
||||
|
||||
var _BPFCmd_index = [...]uint16{0, 14, 33, 52, 71, 91, 104, 115, 126, 141, 156, 173, 193, 212, 233, 253, 275, 289, 312, 324, 344, 361, 391, 405, 424, 444, 475, 495, 515, 530, 545, 566, 586, 602, 617}
|
||||
|
||||
func (i BPFCmd) String() string {
|
||||
if i < 0 || i >= BPFCmd(len(_BPFCmd_index)-1) {
|
||||
return "BPFCmd(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
return _BPFCmd_name[_BPFCmd_index[i]:_BPFCmd_index[i+1]]
|
||||
}
|
|
@ -0,0 +1,150 @@
|
|||
// +build linux
|
||||
|
||||
package unix
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
|
||||
linux "golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
ENOENT = linux.ENOENT
|
||||
EEXIST = linux.EEXIST
|
||||
EAGAIN = linux.EAGAIN
|
||||
ENOSPC = linux.ENOSPC
|
||||
EINVAL = linux.EINVAL
|
||||
EPOLLIN = linux.EPOLLIN
|
||||
EINTR = linux.EINTR
|
||||
EPERM = linux.EPERM
|
||||
ESRCH = linux.ESRCH
|
||||
ENODEV = linux.ENODEV
|
||||
BPF_F_RDONLY_PROG = linux.BPF_F_RDONLY_PROG
|
||||
BPF_F_WRONLY_PROG = linux.BPF_F_WRONLY_PROG
|
||||
BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN
|
||||
BPF_TAG_SIZE = linux.BPF_TAG_SIZE
|
||||
SYS_BPF = linux.SYS_BPF
|
||||
F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC
|
||||
EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD
|
||||
EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC
|
||||
O_CLOEXEC = linux.O_CLOEXEC
|
||||
O_NONBLOCK = linux.O_NONBLOCK
|
||||
PROT_READ = linux.PROT_READ
|
||||
PROT_WRITE = linux.PROT_WRITE
|
||||
MAP_SHARED = linux.MAP_SHARED
|
||||
PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE
|
||||
PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT
|
||||
PerfBitWatermark = linux.PerfBitWatermark
|
||||
PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW
|
||||
PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC
|
||||
RLIM_INFINITY = linux.RLIM_INFINITY
|
||||
RLIMIT_MEMLOCK = linux.RLIMIT_MEMLOCK
|
||||
)
|
||||
|
||||
// Statfs_t is a wrapper
|
||||
type Statfs_t = linux.Statfs_t
|
||||
|
||||
// Rlimit is a wrapper
|
||||
type Rlimit = linux.Rlimit
|
||||
|
||||
// Setrlimit is a wrapper
|
||||
func Setrlimit(resource int, rlim *Rlimit) (err error) {
|
||||
return linux.Setrlimit(resource, rlim)
|
||||
}
|
||||
|
||||
// Syscall is a wrapper
|
||||
func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
|
||||
return linux.Syscall(trap, a1, a2, a3)
|
||||
}
|
||||
|
||||
// FcntlInt is a wrapper
|
||||
func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
|
||||
return linux.FcntlInt(fd, cmd, arg)
|
||||
}
|
||||
|
||||
// Statfs is a wrapper
|
||||
func Statfs(path string, buf *Statfs_t) (err error) {
|
||||
return linux.Statfs(path, buf)
|
||||
}
|
||||
|
||||
// Close is a wrapper
|
||||
func Close(fd int) (err error) {
|
||||
return linux.Close(fd)
|
||||
}
|
||||
|
||||
// EpollEvent is a wrapper
|
||||
type EpollEvent = linux.EpollEvent
|
||||
|
||||
// EpollWait is a wrapper
|
||||
func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
|
||||
return linux.EpollWait(epfd, events, msec)
|
||||
}
|
||||
|
||||
// EpollCtl is a wrapper
|
||||
func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
|
||||
return linux.EpollCtl(epfd, op, fd, event)
|
||||
}
|
||||
|
||||
// Eventfd is a wrapper
|
||||
func Eventfd(initval uint, flags int) (fd int, err error) {
|
||||
return linux.Eventfd(initval, flags)
|
||||
}
|
||||
|
||||
// Write is a wrapper
|
||||
func Write(fd int, p []byte) (n int, err error) {
|
||||
return linux.Write(fd, p)
|
||||
}
|
||||
|
||||
// EpollCreate1 is a wrapper
|
||||
func EpollCreate1(flag int) (fd int, err error) {
|
||||
return linux.EpollCreate1(flag)
|
||||
}
|
||||
|
||||
// PerfEventMmapPage is a wrapper
|
||||
type PerfEventMmapPage linux.PerfEventMmapPage
|
||||
|
||||
// SetNonblock is a wrapper
|
||||
func SetNonblock(fd int, nonblocking bool) (err error) {
|
||||
return linux.SetNonblock(fd, nonblocking)
|
||||
}
|
||||
|
||||
// Mmap is a wrapper
|
||||
func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
|
||||
return linux.Mmap(fd, offset, length, prot, flags)
|
||||
}
|
||||
|
||||
// Munmap is a wrapper
|
||||
func Munmap(b []byte) (err error) {
|
||||
return linux.Munmap(b)
|
||||
}
|
||||
|
||||
// PerfEventAttr is a wrapper
|
||||
type PerfEventAttr = linux.PerfEventAttr
|
||||
|
||||
// PerfEventOpen is a wrapper
|
||||
func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
|
||||
return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags)
|
||||
}
|
||||
|
||||
// Utsname is a wrapper
|
||||
type Utsname = linux.Utsname
|
||||
|
||||
// Uname is a wrapper
|
||||
func Uname(buf *Utsname) (err error) {
|
||||
return linux.Uname(buf)
|
||||
}
|
||||
|
||||
// Getpid is a wrapper
|
||||
func Getpid() int {
|
||||
return linux.Getpid()
|
||||
}
|
||||
|
||||
// Gettid is a wrapper
|
||||
func Gettid() int {
|
||||
return linux.Gettid()
|
||||
}
|
||||
|
||||
// Tgkill is a wrapper
|
||||
func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) {
|
||||
return linux.Tgkill(tgid, tid, sig)
|
||||
}
|
|
@ -0,0 +1,217 @@
|
|||
// +build !linux
|
||||
|
||||
package unix
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
|
||||
|
||||
const (
|
||||
ENOENT = syscall.ENOENT
|
||||
EEXIST = syscall.EEXIST
|
||||
EAGAIN = syscall.EAGAIN
|
||||
ENOSPC = syscall.ENOSPC
|
||||
EINVAL = syscall.EINVAL
|
||||
EINTR = syscall.EINTR
|
||||
EPERM = syscall.EPERM
|
||||
ESRCH = syscall.ESRCH
|
||||
ENODEV = syscall.ENODEV
|
||||
BPF_F_RDONLY_PROG = 0
|
||||
BPF_F_WRONLY_PROG = 0
|
||||
BPF_OBJ_NAME_LEN = 0x10
|
||||
BPF_TAG_SIZE = 0x8
|
||||
SYS_BPF = 321
|
||||
F_DUPFD_CLOEXEC = 0x406
|
||||
EPOLLIN = 0x1
|
||||
EPOLL_CTL_ADD = 0x1
|
||||
EPOLL_CLOEXEC = 0x80000
|
||||
O_CLOEXEC = 0x80000
|
||||
O_NONBLOCK = 0x800
|
||||
PROT_READ = 0x1
|
||||
PROT_WRITE = 0x2
|
||||
MAP_SHARED = 0x1
|
||||
PERF_TYPE_SOFTWARE = 0x1
|
||||
PERF_COUNT_SW_BPF_OUTPUT = 0xa
|
||||
PerfBitWatermark = 0x4000
|
||||
PERF_SAMPLE_RAW = 0x400
|
||||
PERF_FLAG_FD_CLOEXEC = 0x8
|
||||
RLIM_INFINITY = 0x7fffffffffffffff
|
||||
RLIMIT_MEMLOCK = 8
|
||||
)
|
||||
|
||||
// Statfs_t is a wrapper
|
||||
type Statfs_t struct {
|
||||
Type int64
|
||||
Bsize int64
|
||||
Blocks uint64
|
||||
Bfree uint64
|
||||
Bavail uint64
|
||||
Files uint64
|
||||
Ffree uint64
|
||||
Fsid [2]int32
|
||||
Namelen int64
|
||||
Frsize int64
|
||||
Flags int64
|
||||
Spare [4]int64
|
||||
}
|
||||
|
||||
// Rlimit is a wrapper
|
||||
type Rlimit struct {
|
||||
Cur uint64
|
||||
Max uint64
|
||||
}
|
||||
|
||||
// Setrlimit is a wrapper
|
||||
func Setrlimit(resource int, rlim *Rlimit) (err error) {
|
||||
return errNonLinux
|
||||
}
|
||||
|
||||
// Syscall is a wrapper
|
||||
func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
|
||||
return 0, 0, syscall.Errno(1)
|
||||
}
|
||||
|
||||
// FcntlInt is a wrapper
|
||||
func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
|
||||
return -1, errNonLinux
|
||||
}
|
||||
|
||||
// Statfs is a wrapper
|
||||
func Statfs(path string, buf *Statfs_t) error {
|
||||
return errNonLinux
|
||||
}
|
||||
|
||||
// Close is a wrapper
|
||||
func Close(fd int) (err error) {
|
||||
return errNonLinux
|
||||
}
|
||||
|
||||
// EpollEvent is a wrapper
|
||||
type EpollEvent struct {
|
||||
Events uint32
|
||||
Fd int32
|
||||
Pad int32
|
||||
}
|
||||
|
||||
// EpollWait is a wrapper
|
||||
func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
|
||||
return 0, errNonLinux
|
||||
}
|
||||
|
||||
// EpollCtl is a wrapper
|
||||
func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
|
||||
return errNonLinux
|
||||
}
|
||||
|
||||
// Eventfd is a wrapper
|
||||
func Eventfd(initval uint, flags int) (fd int, err error) {
|
||||
return 0, errNonLinux
|
||||
}
|
||||
|
||||
// Write is a wrapper
|
||||
func Write(fd int, p []byte) (n int, err error) {
|
||||
return 0, errNonLinux
|
||||
}
|
||||
|
||||
// EpollCreate1 is a wrapper
|
||||
func EpollCreate1(flag int) (fd int, err error) {
|
||||
return 0, errNonLinux
|
||||
}
|
||||
|
||||
// PerfEventMmapPage is a wrapper
|
||||
type PerfEventMmapPage struct {
|
||||
Version uint32
|
||||
Compat_version uint32
|
||||
Lock uint32
|
||||
Index uint32
|
||||
Offset int64
|
||||
Time_enabled uint64
|
||||
Time_running uint64
|
||||
Capabilities uint64
|
||||
Pmc_width uint16
|
||||
Time_shift uint16
|
||||
Time_mult uint32
|
||||
Time_offset uint64
|
||||
Time_zero uint64
|
||||
Size uint32
|
||||
|
||||
Data_head uint64
|
||||
Data_tail uint64
|
||||
Data_offset uint64
|
||||
Data_size uint64
|
||||
Aux_head uint64
|
||||
Aux_tail uint64
|
||||
Aux_offset uint64
|
||||
Aux_size uint64
|
||||
}
|
||||
|
||||
// SetNonblock is a wrapper
|
||||
func SetNonblock(fd int, nonblocking bool) (err error) {
|
||||
return errNonLinux
|
||||
}
|
||||
|
||||
// Mmap is a wrapper
|
||||
func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
|
||||
return []byte{}, errNonLinux
|
||||
}
|
||||
|
||||
// Munmap is a wrapper
|
||||
func Munmap(b []byte) (err error) {
|
||||
return errNonLinux
|
||||
}
|
||||
|
||||
// PerfEventAttr is a wrapper
|
||||
type PerfEventAttr struct {
|
||||
Type uint32
|
||||
Size uint32
|
||||
Config uint64
|
||||
Sample uint64
|
||||
Sample_type uint64
|
||||
Read_format uint64
|
||||
Bits uint64
|
||||
Wakeup uint32
|
||||
Bp_type uint32
|
||||
Ext1 uint64
|
||||
Ext2 uint64
|
||||
Branch_sample_type uint64
|
||||
Sample_regs_user uint64
|
||||
Sample_stack_user uint32
|
||||
Clockid int32
|
||||
Sample_regs_intr uint64
|
||||
Aux_watermark uint32
|
||||
Sample_max_stack uint16
|
||||
}
|
||||
|
||||
// PerfEventOpen is a wrapper
|
||||
func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
|
||||
return 0, errNonLinux
|
||||
}
|
||||
|
||||
// Utsname is a wrapper
|
||||
type Utsname struct {
|
||||
Release [65]byte
|
||||
}
|
||||
|
||||
// Uname is a wrapper
|
||||
func Uname(buf *Utsname) (err error) {
|
||||
return errNonLinux
|
||||
}
|
||||
|
||||
// Getpid is a wrapper
|
||||
func Getpid() int {
|
||||
return -1
|
||||
}
|
||||
|
||||
// Gettid is a wrapper
|
||||
func Gettid() int {
|
||||
return -1
|
||||
}
|
||||
|
||||
// Tgkill is a wrapper
|
||||
func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) {
|
||||
return errNonLinux
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/cilium/ebpf/internal/btf"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
// link resolves bpf-to-bpf calls.
|
||||
//
|
||||
// Each library may contain multiple functions / labels, and is only linked
|
||||
// if prog references one of these functions.
|
||||
//
|
||||
// Libraries also linked.
|
||||
func link(prog *ProgramSpec, libs []*ProgramSpec) error {
|
||||
var (
|
||||
linked = make(map[*ProgramSpec]bool)
|
||||
pending = []asm.Instructions{prog.Instructions}
|
||||
insns asm.Instructions
|
||||
)
|
||||
for len(pending) > 0 {
|
||||
insns, pending = pending[0], pending[1:]
|
||||
for _, lib := range libs {
|
||||
if linked[lib] {
|
||||
continue
|
||||
}
|
||||
|
||||
needed, err := needSection(insns, lib.Instructions)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("linking %s: %w", lib.Name, err)
|
||||
}
|
||||
|
||||
if !needed {
|
||||
continue
|
||||
}
|
||||
|
||||
linked[lib] = true
|
||||
prog.Instructions = append(prog.Instructions, lib.Instructions...)
|
||||
pending = append(pending, lib.Instructions)
|
||||
|
||||
if prog.BTF != nil && lib.BTF != nil {
|
||||
if err := btf.ProgramAppend(prog.BTF, lib.BTF); err != nil {
|
||||
return xerrors.Errorf("linking BTF of %s: %w", lib.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func needSection(insns, section asm.Instructions) (bool, error) {
|
||||
// A map of symbols to the libraries which contain them.
|
||||
symbols, err := section.SymbolOffsets()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
for _, ins := range insns {
|
||||
if ins.Reference == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if ins.OpCode.JumpOp() != asm.Call || ins.Src != asm.PseudoCall {
|
||||
continue
|
||||
}
|
||||
|
||||
if ins.Constant != -1 {
|
||||
// This is already a valid call, no need to link again.
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := symbols[ins.Reference]; !ok {
|
||||
// Symbol isn't available in this section
|
||||
continue
|
||||
}
|
||||
|
||||
// At this point we know that at least one function in the
|
||||
// library is called from insns, so we have to link it.
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// None of the functions in the section are called.
|
||||
return false, nil
|
||||
}
|
|
@ -0,0 +1,799 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/cilium/ebpf/internal"
|
||||
"github.com/cilium/ebpf/internal/btf"
|
||||
"github.com/cilium/ebpf/internal/unix"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
// Errors returned by Map and MapIterator methods.
|
||||
var (
|
||||
ErrKeyNotExist = xerrors.New("key does not exist")
|
||||
ErrKeyExist = xerrors.New("key already exists")
|
||||
ErrIterationAborted = xerrors.New("iteration aborted")
|
||||
)
|
||||
|
||||
// MapID represents the unique ID of an eBPF map
|
||||
type MapID uint32
|
||||
|
||||
// MapSpec defines a Map.
|
||||
type MapSpec struct {
|
||||
// Name is passed to the kernel as a debug aid. Must only contain
|
||||
// alpha numeric and '_' characters.
|
||||
Name string
|
||||
Type MapType
|
||||
KeySize uint32
|
||||
ValueSize uint32
|
||||
MaxEntries uint32
|
||||
Flags uint32
|
||||
|
||||
// The initial contents of the map. May be nil.
|
||||
Contents []MapKV
|
||||
|
||||
// Whether to freeze a map after setting its initial contents.
|
||||
Freeze bool
|
||||
|
||||
// InnerMap is used as a template for ArrayOfMaps and HashOfMaps
|
||||
InnerMap *MapSpec
|
||||
|
||||
// The BTF associated with this map.
|
||||
BTF *btf.Map
|
||||
}
|
||||
|
||||
func (ms *MapSpec) String() string {
|
||||
return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags)
|
||||
}
|
||||
|
||||
// Copy returns a copy of the spec.
|
||||
//
|
||||
// MapSpec.Contents is a shallow copy.
|
||||
func (ms *MapSpec) Copy() *MapSpec {
|
||||
if ms == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
cpy := *ms
|
||||
cpy.Contents = make([]MapKV, len(ms.Contents))
|
||||
copy(cpy.Contents, ms.Contents)
|
||||
cpy.InnerMap = ms.InnerMap.Copy()
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// MapKV is used to initialize the contents of a Map.
|
||||
type MapKV struct {
|
||||
Key interface{}
|
||||
Value interface{}
|
||||
}
|
||||
|
||||
// Map represents a Map file descriptor.
|
||||
//
|
||||
// It is not safe to close a map which is used by other goroutines.
|
||||
//
|
||||
// Methods which take interface{} arguments by default encode
|
||||
// them using binary.Read/Write in the machine's native endianness.
|
||||
//
|
||||
// Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler
|
||||
// if you require custom encoding.
|
||||
type Map struct {
|
||||
name string
|
||||
fd *internal.FD
|
||||
abi MapABI
|
||||
// Per CPU maps return values larger than the size in the spec
|
||||
fullValueSize int
|
||||
}
|
||||
|
||||
// NewMapFromFD creates a map from a raw fd.
|
||||
//
|
||||
// You should not use fd after calling this function.
|
||||
func NewMapFromFD(fd int) (*Map, error) {
|
||||
if fd < 0 {
|
||||
return nil, xerrors.New("invalid fd")
|
||||
}
|
||||
bpfFd := internal.NewFD(uint32(fd))
|
||||
|
||||
name, abi, err := newMapABIFromFd(bpfFd)
|
||||
if err != nil {
|
||||
bpfFd.Forget()
|
||||
return nil, err
|
||||
}
|
||||
return newMap(bpfFd, name, abi)
|
||||
}
|
||||
|
||||
// NewMap creates a new Map.
|
||||
//
|
||||
// Creating a map for the first time will perform feature detection
|
||||
// by creating small, temporary maps.
|
||||
//
|
||||
// The caller is responsible for ensuring the process' rlimit is set
|
||||
// sufficiently high for locking memory during map creation. This can be done
|
||||
// by calling unix.Setrlimit with unix.RLIMIT_MEMLOCK prior to calling NewMap.
|
||||
func NewMap(spec *MapSpec) (*Map, error) {
|
||||
if spec.BTF == nil {
|
||||
return newMapWithBTF(spec, nil)
|
||||
}
|
||||
|
||||
handle, err := btf.NewHandle(btf.MapSpec(spec.BTF))
|
||||
if err != nil && !xerrors.Is(err, btf.ErrNotSupported) {
|
||||
return nil, xerrors.Errorf("can't load BTF: %w", err)
|
||||
}
|
||||
|
||||
return newMapWithBTF(spec, handle)
|
||||
}
|
||||
|
||||
func newMapWithBTF(spec *MapSpec, handle *btf.Handle) (*Map, error) {
|
||||
if spec.Type != ArrayOfMaps && spec.Type != HashOfMaps {
|
||||
return createMap(spec, nil, handle)
|
||||
}
|
||||
|
||||
if spec.InnerMap == nil {
|
||||
return nil, xerrors.Errorf("%s requires InnerMap", spec.Type)
|
||||
}
|
||||
|
||||
template, err := createMap(spec.InnerMap, nil, handle)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer template.Close()
|
||||
|
||||
return createMap(spec, template.fd, handle)
|
||||
}
|
||||
|
||||
func createMap(spec *MapSpec, inner *internal.FD, handle *btf.Handle) (*Map, error) {
|
||||
abi := newMapABIFromSpec(spec)
|
||||
|
||||
switch spec.Type {
|
||||
case ArrayOfMaps:
|
||||
fallthrough
|
||||
case HashOfMaps:
|
||||
if err := haveNestedMaps(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if abi.ValueSize != 0 && abi.ValueSize != 4 {
|
||||
return nil, xerrors.New("ValueSize must be zero or four for map of map")
|
||||
}
|
||||
abi.ValueSize = 4
|
||||
|
||||
case PerfEventArray:
|
||||
if abi.KeySize != 0 && abi.KeySize != 4 {
|
||||
return nil, xerrors.New("KeySize must be zero or four for perf event array")
|
||||
}
|
||||
abi.KeySize = 4
|
||||
|
||||
if abi.ValueSize != 0 && abi.ValueSize != 4 {
|
||||
return nil, xerrors.New("ValueSize must be zero or four for perf event array")
|
||||
}
|
||||
abi.ValueSize = 4
|
||||
|
||||
if abi.MaxEntries == 0 {
|
||||
n, err := internal.PossibleCPUs()
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("perf event array: %w", err)
|
||||
}
|
||||
abi.MaxEntries = uint32(n)
|
||||
}
|
||||
}
|
||||
|
||||
if abi.Flags&(unix.BPF_F_RDONLY_PROG|unix.BPF_F_WRONLY_PROG) > 0 || spec.Freeze {
|
||||
if err := haveMapMutabilityModifiers(); err != nil {
|
||||
return nil, xerrors.Errorf("map create: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
attr := bpfMapCreateAttr{
|
||||
mapType: abi.Type,
|
||||
keySize: abi.KeySize,
|
||||
valueSize: abi.ValueSize,
|
||||
maxEntries: abi.MaxEntries,
|
||||
flags: abi.Flags,
|
||||
}
|
||||
|
||||
if inner != nil {
|
||||
var err error
|
||||
attr.innerMapFd, err = inner.Value()
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("map create: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if handle != nil && spec.BTF != nil {
|
||||
attr.btfFd = uint32(handle.FD())
|
||||
attr.btfKeyTypeID = btf.MapKey(spec.BTF).ID()
|
||||
attr.btfValueTypeID = btf.MapValue(spec.BTF).ID()
|
||||
}
|
||||
|
||||
if haveObjName() == nil {
|
||||
attr.mapName = newBPFObjName(spec.Name)
|
||||
}
|
||||
|
||||
fd, err := bpfMapCreate(&attr)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("map create: %w", err)
|
||||
}
|
||||
|
||||
m, err := newMap(fd, spec.Name, abi)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := m.populate(spec.Contents); err != nil {
|
||||
m.Close()
|
||||
return nil, xerrors.Errorf("map create: can't set initial contents: %w", err)
|
||||
}
|
||||
|
||||
if spec.Freeze {
|
||||
if err := m.Freeze(); err != nil {
|
||||
m.Close()
|
||||
return nil, xerrors.Errorf("can't freeze map: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func newMap(fd *internal.FD, name string, abi *MapABI) (*Map, error) {
|
||||
m := &Map{
|
||||
name,
|
||||
fd,
|
||||
*abi,
|
||||
int(abi.ValueSize),
|
||||
}
|
||||
|
||||
if !abi.Type.hasPerCPUValue() {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
possibleCPUs, err := internal.PossibleCPUs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
m.fullValueSize = align(int(abi.ValueSize), 8) * possibleCPUs
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (m *Map) String() string {
|
||||
if m.name != "" {
|
||||
return fmt.Sprintf("%s(%s)#%v", m.abi.Type, m.name, m.fd)
|
||||
}
|
||||
return fmt.Sprintf("%s#%v", m.abi.Type, m.fd)
|
||||
}
|
||||
|
||||
// ABI gets the ABI of the Map
|
||||
func (m *Map) ABI() MapABI {
|
||||
return m.abi
|
||||
}
|
||||
|
||||
// Lookup retrieves a value from a Map.
|
||||
//
|
||||
// Calls Close() on valueOut if it is of type **Map or **Program,
|
||||
// and *valueOut is not nil.
|
||||
//
|
||||
// Returns an error if the key doesn't exist, see IsNotExist.
|
||||
func (m *Map) Lookup(key, valueOut interface{}) error {
|
||||
valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
|
||||
|
||||
if err := m.lookup(key, valuePtr); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if valueBytes == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if m.abi.Type.hasPerCPUValue() {
|
||||
return unmarshalPerCPUValue(valueOut, int(m.abi.ValueSize), valueBytes)
|
||||
}
|
||||
|
||||
switch value := valueOut.(type) {
|
||||
case **Map:
|
||||
m, err := unmarshalMap(valueBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
(*value).Close()
|
||||
*value = m
|
||||
return nil
|
||||
case *Map:
|
||||
return xerrors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
|
||||
case Map:
|
||||
return xerrors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
|
||||
|
||||
case **Program:
|
||||
p, err := unmarshalProgram(valueBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
(*value).Close()
|
||||
*value = p
|
||||
return nil
|
||||
case *Program:
|
||||
return xerrors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
|
||||
case Program:
|
||||
return xerrors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
|
||||
|
||||
default:
|
||||
return unmarshalBytes(valueOut, valueBytes)
|
||||
}
|
||||
}
|
||||
|
||||
// LookupAndDelete retrieves and deletes a value from a Map.
|
||||
//
|
||||
// Returns ErrKeyNotExist if the key doesn't exist.
|
||||
func (m *Map) LookupAndDelete(key, valueOut interface{}) error {
|
||||
valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
|
||||
|
||||
keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
|
||||
if err != nil {
|
||||
return xerrors.Errorf("can't marshal key: %w", err)
|
||||
}
|
||||
|
||||
if err := bpfMapLookupAndDelete(m.fd, keyPtr, valuePtr); err != nil {
|
||||
return xerrors.Errorf("lookup and delete failed: %w", err)
|
||||
}
|
||||
|
||||
return unmarshalBytes(valueOut, valueBytes)
|
||||
}
|
||||
|
||||
// LookupBytes gets a value from Map.
|
||||
//
|
||||
// Returns a nil value if a key doesn't exist.
|
||||
func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
|
||||
valueBytes := make([]byte, m.fullValueSize)
|
||||
valuePtr := internal.NewSlicePointer(valueBytes)
|
||||
|
||||
err := m.lookup(key, valuePtr)
|
||||
if xerrors.Is(err, ErrKeyNotExist) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return valueBytes, err
|
||||
}
|
||||
|
||||
func (m *Map) lookup(key interface{}, valueOut internal.Pointer) error {
|
||||
keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
|
||||
if err != nil {
|
||||
return xerrors.Errorf("can't marshal key: %w", err)
|
||||
}
|
||||
|
||||
if err = bpfMapLookupElem(m.fd, keyPtr, valueOut); err != nil {
|
||||
return xerrors.Errorf("lookup failed: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// MapUpdateFlags controls the behaviour of the Map.Update call.
|
||||
//
|
||||
// The exact semantics depend on the specific MapType.
|
||||
type MapUpdateFlags uint64
|
||||
|
||||
const (
|
||||
// UpdateAny creates a new element or update an existing one.
|
||||
UpdateAny MapUpdateFlags = iota
|
||||
// UpdateNoExist creates a new element.
|
||||
UpdateNoExist MapUpdateFlags = 1 << (iota - 1)
|
||||
// UpdateExist updates an existing element.
|
||||
UpdateExist
|
||||
)
|
||||
|
||||
// Put replaces or creates a value in map.
|
||||
//
|
||||
// It is equivalent to calling Update with UpdateAny.
|
||||
func (m *Map) Put(key, value interface{}) error {
|
||||
return m.Update(key, value, UpdateAny)
|
||||
}
|
||||
|
||||
// Update changes the value of a key.
|
||||
func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error {
|
||||
keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
|
||||
if err != nil {
|
||||
return xerrors.Errorf("can't marshal key: %w", err)
|
||||
}
|
||||
|
||||
var valuePtr internal.Pointer
|
||||
if m.abi.Type.hasPerCPUValue() {
|
||||
valuePtr, err = marshalPerCPUValue(value, int(m.abi.ValueSize))
|
||||
} else {
|
||||
valuePtr, err = marshalPtr(value, int(m.abi.ValueSize))
|
||||
}
|
||||
if err != nil {
|
||||
return xerrors.Errorf("can't marshal value: %w", err)
|
||||
}
|
||||
|
||||
if err = bpfMapUpdateElem(m.fd, keyPtr, valuePtr, uint64(flags)); err != nil {
|
||||
return xerrors.Errorf("update failed: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete removes a value.
|
||||
//
|
||||
// Returns ErrKeyNotExist if the key does not exist.
|
||||
func (m *Map) Delete(key interface{}) error {
|
||||
keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
|
||||
if err != nil {
|
||||
return xerrors.Errorf("can't marshal key: %w", err)
|
||||
}
|
||||
|
||||
if err = bpfMapDeleteElem(m.fd, keyPtr); err != nil {
|
||||
return xerrors.Errorf("delete failed: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// NextKey finds the key following an initial key.
|
||||
//
|
||||
// See NextKeyBytes for details.
|
||||
//
|
||||
// Returns ErrKeyNotExist if there is no next key.
|
||||
func (m *Map) NextKey(key, nextKeyOut interface{}) error {
|
||||
nextKeyPtr, nextKeyBytes := makeBuffer(nextKeyOut, int(m.abi.KeySize))
|
||||
|
||||
if err := m.nextKey(key, nextKeyPtr); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if nextKeyBytes == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := unmarshalBytes(nextKeyOut, nextKeyBytes); err != nil {
|
||||
return xerrors.Errorf("can't unmarshal next key: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// NextKeyBytes returns the key following an initial key as a byte slice.
|
||||
//
|
||||
// Passing nil will return the first key.
|
||||
//
|
||||
// Use Iterate if you want to traverse all entries in the map.
|
||||
//
|
||||
// Returns nil if there are no more keys.
|
||||
func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) {
|
||||
nextKey := make([]byte, m.abi.KeySize)
|
||||
nextKeyPtr := internal.NewSlicePointer(nextKey)
|
||||
|
||||
err := m.nextKey(key, nextKeyPtr)
|
||||
if xerrors.Is(err, ErrKeyNotExist) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return nextKey, err
|
||||
}
|
||||
|
||||
func (m *Map) nextKey(key interface{}, nextKeyOut internal.Pointer) error {
|
||||
var (
|
||||
keyPtr internal.Pointer
|
||||
err error
|
||||
)
|
||||
|
||||
if key != nil {
|
||||
keyPtr, err = marshalPtr(key, int(m.abi.KeySize))
|
||||
if err != nil {
|
||||
return xerrors.Errorf("can't marshal key: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err = bpfMapGetNextKey(m.fd, keyPtr, nextKeyOut); err != nil {
|
||||
return xerrors.Errorf("next key failed: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Iterate traverses a map.
|
||||
//
|
||||
// It's safe to create multiple iterators at the same time.
|
||||
//
|
||||
// It's not possible to guarantee that all keys in a map will be
|
||||
// returned if there are concurrent modifications to the map.
|
||||
func (m *Map) Iterate() *MapIterator {
|
||||
return newMapIterator(m)
|
||||
}
|
||||
|
||||
// Close removes a Map
|
||||
func (m *Map) Close() error {
|
||||
if m == nil {
|
||||
// This makes it easier to clean up when iterating maps
|
||||
// of maps / programs.
|
||||
return nil
|
||||
}
|
||||
|
||||
return m.fd.Close()
|
||||
}
|
||||
|
||||
// FD gets the file descriptor of the Map.
|
||||
//
|
||||
// Calling this function is invalid after Close has been called.
|
||||
func (m *Map) FD() int {
|
||||
fd, err := m.fd.Value()
|
||||
if err != nil {
|
||||
// Best effort: -1 is the number most likely to be an
|
||||
// invalid file descriptor.
|
||||
return -1
|
||||
}
|
||||
|
||||
return int(fd)
|
||||
}
|
||||
|
||||
// Clone creates a duplicate of the Map.
|
||||
//
|
||||
// Closing the duplicate does not affect the original, and vice versa.
|
||||
// Changes made to the map are reflected by both instances however.
|
||||
//
|
||||
// Cloning a nil Map returns nil.
|
||||
func (m *Map) Clone() (*Map, error) {
|
||||
if m == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
dup, err := m.fd.Dup()
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't clone map: %w", err)
|
||||
}
|
||||
|
||||
return newMap(dup, m.name, &m.abi)
|
||||
}
|
||||
|
||||
// Pin persists the map past the lifetime of the process that created it.
|
||||
//
|
||||
// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional
|
||||
func (m *Map) Pin(fileName string) error {
|
||||
return internal.BPFObjPin(fileName, m.fd)
|
||||
}
|
||||
|
||||
// Freeze prevents a map to be modified from user space.
|
||||
//
|
||||
// It makes no changes to kernel-side restrictions.
|
||||
func (m *Map) Freeze() error {
|
||||
if err := haveMapMutabilityModifiers(); err != nil {
|
||||
return xerrors.Errorf("can't freeze map: %w", err)
|
||||
}
|
||||
|
||||
if err := bpfMapFreeze(m.fd); err != nil {
|
||||
return xerrors.Errorf("can't freeze map: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Map) populate(contents []MapKV) error {
|
||||
for _, kv := range contents {
|
||||
if err := m.Put(kv.Key, kv.Value); err != nil {
|
||||
return xerrors.Errorf("key %v: %w", kv.Key, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// LoadPinnedMap load a Map from a BPF file.
|
||||
//
|
||||
// The function is not compatible with nested maps.
|
||||
// Use LoadPinnedMapExplicit in these situations.
|
||||
func LoadPinnedMap(fileName string) (*Map, error) {
|
||||
fd, err := internal.BPFObjGet(fileName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
name, abi, err := newMapABIFromFd(fd)
|
||||
if err != nil {
|
||||
_ = fd.Close()
|
||||
return nil, err
|
||||
}
|
||||
return newMap(fd, name, abi)
|
||||
}
|
||||
|
||||
// LoadPinnedMapExplicit loads a map with explicit parameters.
|
||||
func LoadPinnedMapExplicit(fileName string, abi *MapABI) (*Map, error) {
|
||||
fd, err := internal.BPFObjGet(fileName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return newMap(fd, "", abi)
|
||||
}
|
||||
|
||||
func unmarshalMap(buf []byte) (*Map, error) {
|
||||
if len(buf) != 4 {
|
||||
return nil, xerrors.New("map id requires 4 byte value")
|
||||
}
|
||||
|
||||
// Looking up an entry in a nested map or prog array returns an id,
|
||||
// not an fd.
|
||||
id := internal.NativeEndian.Uint32(buf)
|
||||
return NewMapFromID(MapID(id))
|
||||
}
|
||||
|
||||
// MarshalBinary implements BinaryMarshaler.
|
||||
func (m *Map) MarshalBinary() ([]byte, error) {
|
||||
fd, err := m.fd.Value()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
buf := make([]byte, 4)
|
||||
internal.NativeEndian.PutUint32(buf, fd)
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
func patchValue(value []byte, typ btf.Type, replacements map[string]interface{}) error {
|
||||
replaced := make(map[string]bool)
|
||||
replace := func(name string, offset, size int, replacement interface{}) error {
|
||||
if offset+size > len(value) {
|
||||
return xerrors.Errorf("%s: offset %d(+%d) is out of bounds", name, offset, size)
|
||||
}
|
||||
|
||||
buf, err := marshalBytes(replacement, size)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("marshal %s: %w", name, err)
|
||||
}
|
||||
|
||||
copy(value[offset:offset+size], buf)
|
||||
replaced[name] = true
|
||||
return nil
|
||||
}
|
||||
|
||||
switch parent := typ.(type) {
|
||||
case *btf.Datasec:
|
||||
for _, secinfo := range parent.Vars {
|
||||
name := string(secinfo.Type.(*btf.Var).Name)
|
||||
replacement, ok := replacements[name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
err := replace(name, int(secinfo.Offset), int(secinfo.Size), replacement)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
return xerrors.Errorf("patching %T is not supported", typ)
|
||||
}
|
||||
|
||||
if len(replaced) == len(replacements) {
|
||||
return nil
|
||||
}
|
||||
|
||||
var missing []string
|
||||
for name := range replacements {
|
||||
if !replaced[name] {
|
||||
missing = append(missing, name)
|
||||
}
|
||||
}
|
||||
|
||||
if len(missing) == 1 {
|
||||
return xerrors.Errorf("unknown field: %s", missing[0])
|
||||
}
|
||||
|
||||
return xerrors.Errorf("unknown fields: %s", strings.Join(missing, ","))
|
||||
}
|
||||
|
||||
// MapIterator iterates a Map.
|
||||
//
|
||||
// See Map.Iterate.
|
||||
type MapIterator struct {
|
||||
target *Map
|
||||
prevKey interface{}
|
||||
prevBytes []byte
|
||||
count, maxEntries uint32
|
||||
done bool
|
||||
err error
|
||||
}
|
||||
|
||||
func newMapIterator(target *Map) *MapIterator {
|
||||
return &MapIterator{
|
||||
target: target,
|
||||
maxEntries: target.abi.MaxEntries,
|
||||
prevBytes: make([]byte, int(target.abi.KeySize)),
|
||||
}
|
||||
}
|
||||
|
||||
// Next decodes the next key and value.
|
||||
//
|
||||
// Iterating a hash map from which keys are being deleted is not
|
||||
// safe. You may see the same key multiple times. Iteration may
|
||||
// also abort with an error, see IsIterationAborted.
|
||||
//
|
||||
// Returns false if there are no more entries. You must check
|
||||
// the result of Err afterwards.
|
||||
//
|
||||
// See Map.Get for further caveats around valueOut.
|
||||
func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
|
||||
if mi.err != nil || mi.done {
|
||||
return false
|
||||
}
|
||||
|
||||
for ; mi.count < mi.maxEntries; mi.count++ {
|
||||
var nextBytes []byte
|
||||
nextBytes, mi.err = mi.target.NextKeyBytes(mi.prevKey)
|
||||
if mi.err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if nextBytes == nil {
|
||||
mi.done = true
|
||||
return false
|
||||
}
|
||||
|
||||
// The user can get access to nextBytes since unmarshalBytes
|
||||
// does not copy when unmarshaling into a []byte.
|
||||
// Make a copy to prevent accidental corruption of
|
||||
// iterator state.
|
||||
copy(mi.prevBytes, nextBytes)
|
||||
mi.prevKey = mi.prevBytes
|
||||
|
||||
mi.err = mi.target.Lookup(nextBytes, valueOut)
|
||||
if xerrors.Is(mi.err, ErrKeyNotExist) {
|
||||
// Even though the key should be valid, we couldn't look up
|
||||
// its value. If we're iterating a hash map this is probably
|
||||
// because a concurrent delete removed the value before we
|
||||
// could get it. This means that the next call to NextKeyBytes
|
||||
// is very likely to restart iteration.
|
||||
// If we're iterating one of the fd maps like
|
||||
// ProgramArray it means that a given slot doesn't have
|
||||
// a valid fd associated. It's OK to continue to the next slot.
|
||||
continue
|
||||
}
|
||||
if mi.err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
mi.err = unmarshalBytes(keyOut, nextBytes)
|
||||
return mi.err == nil
|
||||
}
|
||||
|
||||
mi.err = xerrors.Errorf("%w", ErrIterationAborted)
|
||||
return false
|
||||
}
|
||||
|
||||
// Err returns any encountered error.
|
||||
//
|
||||
// The method must be called after Next returns nil.
|
||||
//
|
||||
// Returns ErrIterationAborted if it wasn't possible to do a full iteration.
|
||||
func (mi *MapIterator) Err() error {
|
||||
return mi.err
|
||||
}
|
||||
|
||||
// MapGetNextID returns the ID of the next eBPF map.
|
||||
//
|
||||
// Returns ErrNotExist, if there is no next eBPF map.
|
||||
func MapGetNextID(startID MapID) (MapID, error) {
|
||||
id, err := objGetNextID(internal.BPF_MAP_GET_NEXT_ID, uint32(startID))
|
||||
return MapID(id), err
|
||||
}
|
||||
|
||||
// NewMapFromID returns the map for a given id.
|
||||
//
|
||||
// Returns ErrNotExist, if there is no eBPF map with the given id.
|
||||
func NewMapFromID(id MapID) (*Map, error) {
|
||||
fd, err := bpfObjGetFDByID(internal.BPF_MAP_GET_FD_BY_ID, uint32(id))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
name, abi, err := newMapABIFromFd(fd)
|
||||
if err != nil {
|
||||
_ = fd.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newMap(fd, name, abi)
|
||||
}
|
||||
|
||||
// ID returns the systemwide unique ID of the map.
|
||||
func (m *Map) ID() (MapID, error) {
|
||||
info, err := bpfGetMapInfoByFD(m.fd)
|
||||
if err != nil {
|
||||
return MapID(0), err
|
||||
}
|
||||
return MapID(info.id), nil
|
||||
}
|
|
@ -0,0 +1,203 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding"
|
||||
"encoding/binary"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cilium/ebpf/internal"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
func marshalPtr(data interface{}, length int) (internal.Pointer, error) {
|
||||
if data == nil {
|
||||
if length == 0 {
|
||||
return internal.NewPointer(nil), nil
|
||||
}
|
||||
return internal.Pointer{}, xerrors.New("can't use nil as key of map")
|
||||
}
|
||||
|
||||
if ptr, ok := data.(unsafe.Pointer); ok {
|
||||
return internal.NewPointer(ptr), nil
|
||||
}
|
||||
|
||||
buf, err := marshalBytes(data, length)
|
||||
if err != nil {
|
||||
return internal.Pointer{}, err
|
||||
}
|
||||
|
||||
return internal.NewSlicePointer(buf), nil
|
||||
}
|
||||
|
||||
func marshalBytes(data interface{}, length int) (buf []byte, err error) {
|
||||
switch value := data.(type) {
|
||||
case encoding.BinaryMarshaler:
|
||||
buf, err = value.MarshalBinary()
|
||||
case string:
|
||||
buf = []byte(value)
|
||||
case []byte:
|
||||
buf = value
|
||||
case unsafe.Pointer:
|
||||
err = xerrors.New("can't marshal from unsafe.Pointer")
|
||||
default:
|
||||
var wr bytes.Buffer
|
||||
err = binary.Write(&wr, internal.NativeEndian, value)
|
||||
if err != nil {
|
||||
err = xerrors.Errorf("encoding %T: %v", value, err)
|
||||
}
|
||||
buf = wr.Bytes()
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(buf) != length {
|
||||
return nil, xerrors.Errorf("%T doesn't marshal to %d bytes", data, length)
|
||||
}
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
func makeBuffer(dst interface{}, length int) (internal.Pointer, []byte) {
|
||||
if ptr, ok := dst.(unsafe.Pointer); ok {
|
||||
return internal.NewPointer(ptr), nil
|
||||
}
|
||||
|
||||
buf := make([]byte, length)
|
||||
return internal.NewSlicePointer(buf), buf
|
||||
}
|
||||
|
||||
func unmarshalBytes(data interface{}, buf []byte) error {
|
||||
switch value := data.(type) {
|
||||
case unsafe.Pointer:
|
||||
sh := &reflect.SliceHeader{
|
||||
Data: uintptr(value),
|
||||
Len: len(buf),
|
||||
Cap: len(buf),
|
||||
}
|
||||
|
||||
dst := *(*[]byte)(unsafe.Pointer(sh))
|
||||
copy(dst, buf)
|
||||
runtime.KeepAlive(value)
|
||||
return nil
|
||||
case encoding.BinaryUnmarshaler:
|
||||
return value.UnmarshalBinary(buf)
|
||||
case *string:
|
||||
*value = string(buf)
|
||||
return nil
|
||||
case *[]byte:
|
||||
*value = buf
|
||||
return nil
|
||||
case string:
|
||||
return xerrors.New("require pointer to string")
|
||||
case []byte:
|
||||
return xerrors.New("require pointer to []byte")
|
||||
default:
|
||||
rd := bytes.NewReader(buf)
|
||||
if err := binary.Read(rd, internal.NativeEndian, value); err != nil {
|
||||
return xerrors.Errorf("decoding %T: %v", value, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// marshalPerCPUValue encodes a slice containing one value per
|
||||
// possible CPU into a buffer of bytes.
|
||||
//
|
||||
// Values are initialized to zero if the slice has less elements than CPUs.
|
||||
//
|
||||
// slice must have a type like []elementType.
|
||||
func marshalPerCPUValue(slice interface{}, elemLength int) (internal.Pointer, error) {
|
||||
sliceType := reflect.TypeOf(slice)
|
||||
if sliceType.Kind() != reflect.Slice {
|
||||
return internal.Pointer{}, xerrors.New("per-CPU value requires slice")
|
||||
}
|
||||
|
||||
possibleCPUs, err := internal.PossibleCPUs()
|
||||
if err != nil {
|
||||
return internal.Pointer{}, err
|
||||
}
|
||||
|
||||
sliceValue := reflect.ValueOf(slice)
|
||||
sliceLen := sliceValue.Len()
|
||||
if sliceLen > possibleCPUs {
|
||||
return internal.Pointer{}, xerrors.Errorf("per-CPU value exceeds number of CPUs")
|
||||
}
|
||||
|
||||
alignedElemLength := align(elemLength, 8)
|
||||
buf := make([]byte, alignedElemLength*possibleCPUs)
|
||||
|
||||
for i := 0; i < sliceLen; i++ {
|
||||
elem := sliceValue.Index(i).Interface()
|
||||
elemBytes, err := marshalBytes(elem, elemLength)
|
||||
if err != nil {
|
||||
return internal.Pointer{}, err
|
||||
}
|
||||
|
||||
offset := i * alignedElemLength
|
||||
copy(buf[offset:offset+elemLength], elemBytes)
|
||||
}
|
||||
|
||||
return internal.NewSlicePointer(buf), nil
|
||||
}
|
||||
|
||||
// unmarshalPerCPUValue decodes a buffer into a slice containing one value per
|
||||
// possible CPU.
|
||||
//
|
||||
// valueOut must have a type like *[]elementType
|
||||
func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) error {
|
||||
slicePtrType := reflect.TypeOf(slicePtr)
|
||||
if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice {
|
||||
return xerrors.Errorf("per-cpu value requires pointer to slice")
|
||||
}
|
||||
|
||||
possibleCPUs, err := internal.PossibleCPUs()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sliceType := slicePtrType.Elem()
|
||||
slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs)
|
||||
|
||||
sliceElemType := sliceType.Elem()
|
||||
sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr
|
||||
if sliceElemIsPointer {
|
||||
sliceElemType = sliceElemType.Elem()
|
||||
}
|
||||
|
||||
step := len(buf) / possibleCPUs
|
||||
if step < elemLength {
|
||||
return xerrors.Errorf("per-cpu element length is larger than available data")
|
||||
}
|
||||
for i := 0; i < possibleCPUs; i++ {
|
||||
var elem interface{}
|
||||
if sliceElemIsPointer {
|
||||
newElem := reflect.New(sliceElemType)
|
||||
slice.Index(i).Set(newElem)
|
||||
elem = newElem.Interface()
|
||||
} else {
|
||||
elem = slice.Index(i).Addr().Interface()
|
||||
}
|
||||
|
||||
// Make a copy, since unmarshal can hold on to itemBytes
|
||||
elemBytes := make([]byte, elemLength)
|
||||
copy(elemBytes, buf[:elemLength])
|
||||
|
||||
err := unmarshalBytes(elem, elemBytes)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("cpu %d: %w", i, err)
|
||||
}
|
||||
|
||||
buf = buf[step:]
|
||||
}
|
||||
|
||||
reflect.ValueOf(slicePtr).Elem().Set(slice)
|
||||
return nil
|
||||
}
|
||||
|
||||
func align(n, alignment int) int {
|
||||
return (int(n) + alignment - 1) / alignment * alignment
|
||||
}
|
|
@ -0,0 +1,623 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/cilium/ebpf/internal"
|
||||
"github.com/cilium/ebpf/internal/btf"
|
||||
"github.com/cilium/ebpf/internal/unix"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
// ErrNotSupported is returned whenever the kernel doesn't support a feature.
|
||||
var ErrNotSupported = internal.ErrNotSupported
|
||||
|
||||
// ProgramID represents the unique ID of an eBPF program
|
||||
type ProgramID uint32
|
||||
|
||||
const (
|
||||
// Number of bytes to pad the output buffer for BPF_PROG_TEST_RUN.
|
||||
// This is currently the maximum of spare space allocated for SKB
|
||||
// and XDP programs, and equal to XDP_PACKET_HEADROOM + NET_IP_ALIGN.
|
||||
outputPad = 256 + 2
|
||||
)
|
||||
|
||||
// DefaultVerifierLogSize is the default number of bytes allocated for the
|
||||
// verifier log.
|
||||
const DefaultVerifierLogSize = 64 * 1024
|
||||
|
||||
// ProgramOptions control loading a program into the kernel.
|
||||
type ProgramOptions struct {
|
||||
// Controls the detail emitted by the kernel verifier. Set to non-zero
|
||||
// to enable logging.
|
||||
LogLevel uint32
|
||||
// Controls the output buffer size for the verifier. Defaults to
|
||||
// DefaultVerifierLogSize.
|
||||
LogSize int
|
||||
}
|
||||
|
||||
// ProgramSpec defines a Program
|
||||
type ProgramSpec struct {
|
||||
// Name is passed to the kernel as a debug aid. Must only contain
|
||||
// alpha numeric and '_' characters.
|
||||
Name string
|
||||
// Type determines at which hook in the kernel a program will run.
|
||||
Type ProgramType
|
||||
AttachType AttachType
|
||||
// Name of a kernel data structure to attach to. It's interpretation
|
||||
// depends on Type and AttachType.
|
||||
AttachTo string
|
||||
Instructions asm.Instructions
|
||||
|
||||
// License of the program. Some helpers are only available if
|
||||
// the license is deemed compatible with the GPL.
|
||||
//
|
||||
// See https://www.kernel.org/doc/html/latest/process/license-rules.html#id1
|
||||
License string
|
||||
|
||||
// Version used by tracing programs.
|
||||
//
|
||||
// Deprecated: superseded by BTF.
|
||||
KernelVersion uint32
|
||||
|
||||
// The BTF associated with this program. Changing Instructions
|
||||
// will most likely invalidate the contained data, and may
|
||||
// result in errors when attempting to load it into the kernel.
|
||||
BTF *btf.Program
|
||||
|
||||
// The byte order this program was compiled for, may be nil.
|
||||
ByteOrder binary.ByteOrder
|
||||
}
|
||||
|
||||
// Copy returns a copy of the spec.
|
||||
func (ps *ProgramSpec) Copy() *ProgramSpec {
|
||||
if ps == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
cpy := *ps
|
||||
cpy.Instructions = make(asm.Instructions, len(ps.Instructions))
|
||||
copy(cpy.Instructions, ps.Instructions)
|
||||
return &cpy
|
||||
}
|
||||
|
||||
// Program represents BPF program loaded into the kernel.
|
||||
//
|
||||
// It is not safe to close a Program which is used by other goroutines.
|
||||
type Program struct {
|
||||
// Contains the output of the kernel verifier if enabled,
|
||||
// otherwise it is empty.
|
||||
VerifierLog string
|
||||
|
||||
fd *internal.FD
|
||||
name string
|
||||
abi ProgramABI
|
||||
attachType AttachType
|
||||
}
|
||||
|
||||
// NewProgram creates a new Program.
|
||||
//
|
||||
// Loading a program for the first time will perform
|
||||
// feature detection by loading small, temporary programs.
|
||||
func NewProgram(spec *ProgramSpec) (*Program, error) {
|
||||
return NewProgramWithOptions(spec, ProgramOptions{})
|
||||
}
|
||||
|
||||
// NewProgramWithOptions creates a new Program.
|
||||
//
|
||||
// Loading a program for the first time will perform
|
||||
// feature detection by loading small, temporary programs.
|
||||
func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) {
|
||||
if spec.BTF == nil {
|
||||
return newProgramWithBTF(spec, nil, opts)
|
||||
}
|
||||
|
||||
handle, err := btf.NewHandle(btf.ProgramSpec(spec.BTF))
|
||||
if err != nil && !xerrors.Is(err, btf.ErrNotSupported) {
|
||||
return nil, xerrors.Errorf("can't load BTF: %w", err)
|
||||
}
|
||||
|
||||
return newProgramWithBTF(spec, handle, opts)
|
||||
}
|
||||
|
||||
func newProgramWithBTF(spec *ProgramSpec, btf *btf.Handle, opts ProgramOptions) (*Program, error) {
|
||||
attr, err := convertProgramSpec(spec, btf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
logSize := DefaultVerifierLogSize
|
||||
if opts.LogSize > 0 {
|
||||
logSize = opts.LogSize
|
||||
}
|
||||
|
||||
var logBuf []byte
|
||||
if opts.LogLevel > 0 {
|
||||
logBuf = make([]byte, logSize)
|
||||
attr.logLevel = opts.LogLevel
|
||||
attr.logSize = uint32(len(logBuf))
|
||||
attr.logBuf = internal.NewSlicePointer(logBuf)
|
||||
}
|
||||
|
||||
fd, err := bpfProgLoad(attr)
|
||||
if err == nil {
|
||||
prog := newProgram(fd, spec.Name, &ProgramABI{spec.Type})
|
||||
prog.VerifierLog = internal.CString(logBuf)
|
||||
return prog, nil
|
||||
}
|
||||
|
||||
logErr := err
|
||||
if opts.LogLevel == 0 {
|
||||
// Re-run with the verifier enabled to get better error messages.
|
||||
logBuf = make([]byte, logSize)
|
||||
attr.logLevel = 1
|
||||
attr.logSize = uint32(len(logBuf))
|
||||
attr.logBuf = internal.NewSlicePointer(logBuf)
|
||||
|
||||
_, logErr = bpfProgLoad(attr)
|
||||
}
|
||||
|
||||
err = internal.ErrorWithLog(err, logBuf, logErr)
|
||||
return nil, xerrors.Errorf("can't load program: %w", err)
|
||||
}
|
||||
|
||||
// NewProgramFromFD creates a program from a raw fd.
|
||||
//
|
||||
// You should not use fd after calling this function.
|
||||
//
|
||||
// Requires at least Linux 4.11.
|
||||
func NewProgramFromFD(fd int) (*Program, error) {
|
||||
if fd < 0 {
|
||||
return nil, xerrors.New("invalid fd")
|
||||
}
|
||||
bpfFd := internal.NewFD(uint32(fd))
|
||||
|
||||
name, abi, err := newProgramABIFromFd(bpfFd)
|
||||
if err != nil {
|
||||
bpfFd.Forget()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newProgram(bpfFd, name, abi), nil
|
||||
}
|
||||
|
||||
func newProgram(fd *internal.FD, name string, abi *ProgramABI) *Program {
|
||||
return &Program{
|
||||
name: name,
|
||||
fd: fd,
|
||||
abi: *abi,
|
||||
}
|
||||
}
|
||||
|
||||
func convertProgramSpec(spec *ProgramSpec, handle *btf.Handle) (*bpfProgLoadAttr, error) {
|
||||
if len(spec.Instructions) == 0 {
|
||||
return nil, xerrors.New("Instructions cannot be empty")
|
||||
}
|
||||
|
||||
if len(spec.License) == 0 {
|
||||
return nil, xerrors.New("License cannot be empty")
|
||||
}
|
||||
|
||||
if spec.ByteOrder != nil && spec.ByteOrder != internal.NativeEndian {
|
||||
return nil, xerrors.Errorf("can't load %s program on %s", spec.ByteOrder, internal.NativeEndian)
|
||||
}
|
||||
|
||||
buf := bytes.NewBuffer(make([]byte, 0, len(spec.Instructions)*asm.InstructionSize))
|
||||
err := spec.Instructions.Marshal(buf, internal.NativeEndian)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
bytecode := buf.Bytes()
|
||||
insCount := uint32(len(bytecode) / asm.InstructionSize)
|
||||
attr := &bpfProgLoadAttr{
|
||||
progType: spec.Type,
|
||||
expectedAttachType: spec.AttachType,
|
||||
insCount: insCount,
|
||||
instructions: internal.NewSlicePointer(bytecode),
|
||||
license: internal.NewStringPointer(spec.License),
|
||||
kernelVersion: spec.KernelVersion,
|
||||
}
|
||||
|
||||
if haveObjName() == nil {
|
||||
attr.progName = newBPFObjName(spec.Name)
|
||||
}
|
||||
|
||||
if handle != nil && spec.BTF != nil {
|
||||
attr.progBTFFd = uint32(handle.FD())
|
||||
|
||||
recSize, bytes, err := btf.ProgramLineInfos(spec.BTF)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't get BTF line infos: %w", err)
|
||||
}
|
||||
attr.lineInfoRecSize = recSize
|
||||
attr.lineInfoCnt = uint32(uint64(len(bytes)) / uint64(recSize))
|
||||
attr.lineInfo = internal.NewSlicePointer(bytes)
|
||||
|
||||
recSize, bytes, err = btf.ProgramFuncInfos(spec.BTF)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't get BTF function infos: %w", err)
|
||||
}
|
||||
attr.funcInfoRecSize = recSize
|
||||
attr.funcInfoCnt = uint32(uint64(len(bytes)) / uint64(recSize))
|
||||
attr.funcInfo = internal.NewSlicePointer(bytes)
|
||||
}
|
||||
|
||||
if spec.AttachTo != "" {
|
||||
target, err := resolveBTFType(spec.AttachTo, spec.Type, spec.AttachType)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if target != nil {
|
||||
attr.attachBTFID = target.ID()
|
||||
}
|
||||
}
|
||||
|
||||
return attr, nil
|
||||
}
|
||||
|
||||
func (p *Program) String() string {
|
||||
if p.name != "" {
|
||||
return fmt.Sprintf("%s(%s)#%v", p.abi.Type, p.name, p.fd)
|
||||
}
|
||||
return fmt.Sprintf("%s#%v", p.abi.Type, p.fd)
|
||||
}
|
||||
|
||||
// ABI gets the ABI of the Program
|
||||
func (p *Program) ABI() ProgramABI {
|
||||
return p.abi
|
||||
}
|
||||
|
||||
// FD gets the file descriptor of the Program.
|
||||
//
|
||||
// It is invalid to call this function after Close has been called.
|
||||
func (p *Program) FD() int {
|
||||
fd, err := p.fd.Value()
|
||||
if err != nil {
|
||||
// Best effort: -1 is the number most likely to be an
|
||||
// invalid file descriptor.
|
||||
return -1
|
||||
}
|
||||
|
||||
return int(fd)
|
||||
}
|
||||
|
||||
// Clone creates a duplicate of the Program.
|
||||
//
|
||||
// Closing the duplicate does not affect the original, and vice versa.
|
||||
//
|
||||
// Cloning a nil Program returns nil.
|
||||
func (p *Program) Clone() (*Program, error) {
|
||||
if p == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
dup, err := p.fd.Dup()
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't clone program: %w", err)
|
||||
}
|
||||
|
||||
return newProgram(dup, p.name, &p.abi), nil
|
||||
}
|
||||
|
||||
// Pin persists the Program past the lifetime of the process that created it
|
||||
//
|
||||
// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional
|
||||
func (p *Program) Pin(fileName string) error {
|
||||
if err := internal.BPFObjPin(fileName, p.fd); err != nil {
|
||||
return xerrors.Errorf("can't pin program: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close unloads the program from the kernel.
|
||||
func (p *Program) Close() error {
|
||||
if p == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return p.fd.Close()
|
||||
}
|
||||
|
||||
// Test runs the Program in the kernel with the given input and returns the
|
||||
// value returned by the eBPF program. outLen may be zero.
|
||||
//
|
||||
// Note: the kernel expects at least 14 bytes input for an ethernet header for
|
||||
// XDP and SKB programs.
|
||||
//
|
||||
// This function requires at least Linux 4.12.
|
||||
func (p *Program) Test(in []byte) (uint32, []byte, error) {
|
||||
ret, out, _, err := p.testRun(in, 1, nil)
|
||||
if err != nil {
|
||||
return ret, nil, xerrors.Errorf("can't test program: %w", err)
|
||||
}
|
||||
return ret, out, nil
|
||||
}
|
||||
|
||||
// Benchmark runs the Program with the given input for a number of times
|
||||
// and returns the time taken per iteration.
|
||||
//
|
||||
// Returns the result of the last execution of the program and the time per
|
||||
// run or an error. reset is called whenever the benchmark syscall is
|
||||
// interrupted, and should be set to testing.B.ResetTimer or similar.
|
||||
//
|
||||
// Note: profiling a call to this function will skew it's results, see
|
||||
// https://github.com/cilium/ebpf/issues/24
|
||||
//
|
||||
// This function requires at least Linux 4.12.
|
||||
func (p *Program) Benchmark(in []byte, repeat int, reset func()) (uint32, time.Duration, error) {
|
||||
ret, _, total, err := p.testRun(in, repeat, reset)
|
||||
if err != nil {
|
||||
return ret, total, xerrors.Errorf("can't benchmark program: %w", err)
|
||||
}
|
||||
return ret, total, nil
|
||||
}
|
||||
|
||||
var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() (bool, error) {
|
||||
prog, err := NewProgram(&ProgramSpec{
|
||||
Type: SocketFilter,
|
||||
Instructions: asm.Instructions{
|
||||
asm.LoadImm(asm.R0, 0, asm.DWord),
|
||||
asm.Return(),
|
||||
},
|
||||
License: "MIT",
|
||||
})
|
||||
if err != nil {
|
||||
// This may be because we lack sufficient permissions, etc.
|
||||
return false, err
|
||||
}
|
||||
defer prog.Close()
|
||||
|
||||
// Programs require at least 14 bytes input
|
||||
in := make([]byte, 14)
|
||||
attr := bpfProgTestRunAttr{
|
||||
fd: uint32(prog.FD()),
|
||||
dataSizeIn: uint32(len(in)),
|
||||
dataIn: internal.NewSlicePointer(in),
|
||||
}
|
||||
|
||||
err = bpfProgTestRun(&attr)
|
||||
|
||||
// Check for EINVAL specifically, rather than err != nil since we
|
||||
// otherwise misdetect due to insufficient permissions.
|
||||
return !xerrors.Is(err, unix.EINVAL), nil
|
||||
})
|
||||
|
||||
func (p *Program) testRun(in []byte, repeat int, reset func()) (uint32, []byte, time.Duration, error) {
|
||||
if uint(repeat) > math.MaxUint32 {
|
||||
return 0, nil, 0, fmt.Errorf("repeat is too high")
|
||||
}
|
||||
|
||||
if len(in) == 0 {
|
||||
return 0, nil, 0, fmt.Errorf("missing input")
|
||||
}
|
||||
|
||||
if uint(len(in)) > math.MaxUint32 {
|
||||
return 0, nil, 0, fmt.Errorf("input is too long")
|
||||
}
|
||||
|
||||
if err := haveProgTestRun(); err != nil {
|
||||
return 0, nil, 0, err
|
||||
}
|
||||
|
||||
// Older kernels ignore the dataSizeOut argument when copying to user space.
|
||||
// Combined with things like bpf_xdp_adjust_head() we don't really know what the final
|
||||
// size will be. Hence we allocate an output buffer which we hope will always be large
|
||||
// enough, and panic if the kernel wrote past the end of the allocation.
|
||||
// See https://patchwork.ozlabs.org/cover/1006822/
|
||||
out := make([]byte, len(in)+outputPad)
|
||||
|
||||
fd, err := p.fd.Value()
|
||||
if err != nil {
|
||||
return 0, nil, 0, err
|
||||
}
|
||||
|
||||
attr := bpfProgTestRunAttr{
|
||||
fd: fd,
|
||||
dataSizeIn: uint32(len(in)),
|
||||
dataSizeOut: uint32(len(out)),
|
||||
dataIn: internal.NewSlicePointer(in),
|
||||
dataOut: internal.NewSlicePointer(out),
|
||||
repeat: uint32(repeat),
|
||||
}
|
||||
|
||||
for {
|
||||
err = bpfProgTestRun(&attr)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
|
||||
if xerrors.Is(err, unix.EINTR) {
|
||||
if reset != nil {
|
||||
reset()
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
return 0, nil, 0, xerrors.Errorf("can't run test: %w", err)
|
||||
}
|
||||
|
||||
if int(attr.dataSizeOut) > cap(out) {
|
||||
// Houston, we have a problem. The program created more data than we allocated,
|
||||
// and the kernel wrote past the end of our buffer.
|
||||
panic("kernel wrote past end of output buffer")
|
||||
}
|
||||
out = out[:int(attr.dataSizeOut)]
|
||||
|
||||
total := time.Duration(attr.duration) * time.Nanosecond
|
||||
return attr.retval, out, total, nil
|
||||
}
|
||||
|
||||
func unmarshalProgram(buf []byte) (*Program, error) {
|
||||
if len(buf) != 4 {
|
||||
return nil, xerrors.New("program id requires 4 byte value")
|
||||
}
|
||||
|
||||
// Looking up an entry in a nested map or prog array returns an id,
|
||||
// not an fd.
|
||||
id := internal.NativeEndian.Uint32(buf)
|
||||
return NewProgramFromID(ProgramID(id))
|
||||
}
|
||||
|
||||
// MarshalBinary implements BinaryMarshaler.
|
||||
func (p *Program) MarshalBinary() ([]byte, error) {
|
||||
value, err := p.fd.Value()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
buf := make([]byte, 4)
|
||||
internal.NativeEndian.PutUint32(buf, value)
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
// Attach a Program.
|
||||
//
|
||||
// Deprecated: use link.RawAttachProgram instead.
|
||||
func (p *Program) Attach(fd int, typ AttachType, flags AttachFlags) error {
|
||||
if fd < 0 {
|
||||
return xerrors.New("invalid fd")
|
||||
}
|
||||
|
||||
pfd, err := p.fd.Value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := internal.BPFProgAttachAttr{
|
||||
TargetFd: uint32(fd),
|
||||
AttachBpfFd: pfd,
|
||||
AttachType: uint32(typ),
|
||||
AttachFlags: uint32(flags),
|
||||
}
|
||||
|
||||
return internal.BPFProgAttach(&attr)
|
||||
}
|
||||
|
||||
// Detach a Program.
|
||||
//
|
||||
// Deprecated: use link.RawDetachProgram instead.
|
||||
func (p *Program) Detach(fd int, typ AttachType, flags AttachFlags) error {
|
||||
if fd < 0 {
|
||||
return xerrors.New("invalid fd")
|
||||
}
|
||||
|
||||
if flags != 0 {
|
||||
return xerrors.New("flags must be zero")
|
||||
}
|
||||
|
||||
pfd, err := p.fd.Value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := internal.BPFProgDetachAttr{
|
||||
TargetFd: uint32(fd),
|
||||
AttachBpfFd: pfd,
|
||||
AttachType: uint32(typ),
|
||||
}
|
||||
|
||||
return internal.BPFProgDetach(&attr)
|
||||
}
|
||||
|
||||
// LoadPinnedProgram loads a Program from a BPF file.
|
||||
//
|
||||
// Requires at least Linux 4.11.
|
||||
func LoadPinnedProgram(fileName string) (*Program, error) {
|
||||
fd, err := internal.BPFObjGet(fileName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
name, abi, err := newProgramABIFromFd(fd)
|
||||
if err != nil {
|
||||
_ = fd.Close()
|
||||
return nil, xerrors.Errorf("can't get ABI for %s: %w", fileName, err)
|
||||
}
|
||||
|
||||
return newProgram(fd, name, abi), nil
|
||||
}
|
||||
|
||||
// SanitizeName replaces all invalid characters in name.
|
||||
//
|
||||
// Use this to automatically generate valid names for maps and
|
||||
// programs at run time.
|
||||
//
|
||||
// Passing a negative value for replacement will delete characters
|
||||
// instead of replacing them.
|
||||
func SanitizeName(name string, replacement rune) string {
|
||||
return strings.Map(func(char rune) rune {
|
||||
if invalidBPFObjNameChar(char) {
|
||||
return replacement
|
||||
}
|
||||
return char
|
||||
}, name)
|
||||
}
|
||||
|
||||
// ProgramGetNextID returns the ID of the next eBPF program.
|
||||
//
|
||||
// Returns ErrNotExist, if there is no next eBPF program.
|
||||
func ProgramGetNextID(startID ProgramID) (ProgramID, error) {
|
||||
id, err := objGetNextID(internal.BPF_PROG_GET_NEXT_ID, uint32(startID))
|
||||
return ProgramID(id), err
|
||||
}
|
||||
|
||||
// NewProgramFromID returns the program for a given id.
|
||||
//
|
||||
// Returns ErrNotExist, if there is no eBPF program with the given id.
|
||||
func NewProgramFromID(id ProgramID) (*Program, error) {
|
||||
fd, err := bpfObjGetFDByID(internal.BPF_PROG_GET_FD_BY_ID, uint32(id))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
name, abi, err := newProgramABIFromFd(fd)
|
||||
if err != nil {
|
||||
_ = fd.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newProgram(fd, name, abi), nil
|
||||
}
|
||||
|
||||
// ID returns the systemwide unique ID of the program.
|
||||
func (p *Program) ID() (ProgramID, error) {
|
||||
info, err := bpfGetProgInfoByFD(p.fd)
|
||||
if err != nil {
|
||||
return ProgramID(0), err
|
||||
}
|
||||
return ProgramID(info.id), nil
|
||||
}
|
||||
|
||||
func resolveBTFType(name string, progType ProgramType, attachType AttachType) (btf.Type, error) {
|
||||
kernel, err := btf.LoadKernelSpec()
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't resolve BTF type %s: %w", name, err)
|
||||
}
|
||||
|
||||
type match struct {
|
||||
p ProgramType
|
||||
a AttachType
|
||||
}
|
||||
|
||||
target := match{progType, attachType}
|
||||
switch target {
|
||||
case match{Tracing, AttachTraceIter}:
|
||||
var target btf.Func
|
||||
if err := kernel.FindType("bpf_iter_"+name, &target); err != nil {
|
||||
return nil, xerrors.Errorf("can't resolve BTF for iterator %s: %w", name, err)
|
||||
}
|
||||
|
||||
return &target, nil
|
||||
|
||||
default:
|
||||
return nil, nil
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
eBPF
|
||||
-------
|
||||
[![](https://godoc.org/github.com/cilium/ebpf?status.svg)](https://godoc.org/github.com/cilium/ebpf)
|
||||
|
||||
eBPF is a pure Go library that provides utilities for loading, compiling, and debugging eBPF programs. It has minimal external dependencies and is intended to be used in long running processes.
|
||||
|
||||
[ebpf/asm](https://godoc.org/github.com/cilium/ebpf/asm) contains a basic assembler.
|
||||
|
||||
The library is maintained by [Cloudflare](https://www.cloudflare.com) and [Cilium](https://www.cilium.io). Feel free to [join](https://cilium.herokuapp.com/) the [libbpf-go](https://cilium.slack.com/messages/libbpf-go) channel on Slack.
|
||||
|
||||
## Current status
|
||||
|
||||
The package is production ready, but **the API is explicitly unstable
|
||||
right now**. Expect to update your code if you want to follow along.
|
||||
|
||||
## Requirements
|
||||
|
||||
* A version of Go that is [supported by upstream](https://golang.org/doc/devel/release.html#policy)
|
||||
* Linux 4.9, 4.19 or 5.4 (versions in-between should work, but are not tested)
|
||||
|
||||
## Useful resources
|
||||
|
||||
* [Cilium eBPF documentation](https://cilium.readthedocs.io/en/latest/bpf/#bpf-guide) (recommended)
|
||||
* [Linux documentation on BPF](http://elixir.free-electrons.com/linux/latest/source/Documentation/networking/filter.txt)
|
||||
* [eBPF features by Linux version](https://github.com/iovisor/bcc/blob/master/docs/kernel-versions.md)
|
|
@ -0,0 +1,69 @@
|
|||
#!/bin/bash
|
||||
# Test the current package under a different kernel.
|
||||
# Requires virtme and qemu to be installed.
|
||||
|
||||
set -eu
|
||||
set -o pipefail
|
||||
|
||||
if [[ "${1:-}" = "--in-vm" ]]; then
|
||||
shift
|
||||
|
||||
mount -t bpf bpf /sys/fs/bpf
|
||||
export CGO_ENABLED=0
|
||||
export GOFLAGS=-mod=readonly
|
||||
export GOPATH=/run/go-path
|
||||
export GOPROXY=file:///run/go-root/pkg/mod/cache/download
|
||||
export GOCACHE=/run/go-cache
|
||||
|
||||
echo Running tests...
|
||||
/usr/local/bin/go test -coverprofile="$1/coverage.txt" -covermode=atomic -v ./...
|
||||
touch "$1/success"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Pull all dependencies, so that we can run tests without the
|
||||
# vm having network access.
|
||||
go mod download
|
||||
|
||||
# Use sudo if /dev/kvm isn't accessible by the current user.
|
||||
sudo=""
|
||||
if [[ ! -r /dev/kvm || ! -w /dev/kvm ]]; then
|
||||
sudo="sudo"
|
||||
fi
|
||||
readonly sudo
|
||||
|
||||
readonly kernel_version="${1:-}"
|
||||
if [[ -z "${kernel_version}" ]]; then
|
||||
echo "Expecting kernel version as first argument"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
readonly kernel="linux-${kernel_version}.bz"
|
||||
readonly output="$(mktemp -d)"
|
||||
readonly tmp_dir="${TMPDIR:-$(mktemp -d)}"
|
||||
|
||||
test -e "${tmp_dir}/${kernel}" || {
|
||||
echo Fetching "${kernel}"
|
||||
curl --fail -L "https://github.com/cilium/ci-kernels/blob/master/${kernel}?raw=true" -o "${tmp_dir}/${kernel}"
|
||||
}
|
||||
|
||||
echo Testing on "${kernel_version}"
|
||||
$sudo virtme-run --kimg "${tmp_dir}/${kernel}" --memory 512M --pwd \
|
||||
--rwdir=/run/output="${output}" \
|
||||
--rodir=/run/go-path="$(go env GOPATH)" \
|
||||
--rwdir=/run/go-cache="$(go env GOCACHE)" \
|
||||
--script-sh "$(realpath "$0") --in-vm /run/output"
|
||||
|
||||
if [[ ! -e "${output}/success" ]]; then
|
||||
echo "Test failed on ${kernel_version}"
|
||||
exit 1
|
||||
else
|
||||
echo "Test successful on ${kernel_version}"
|
||||
if [[ -v CODECOV_TOKEN ]]; then
|
||||
curl --fail -s https://codecov.io/bash > "${tmp_dir}/codecov.sh"
|
||||
chmod +x "${tmp_dir}/codecov.sh"
|
||||
"${tmp_dir}/codecov.sh" -f "${output}/coverage.txt"
|
||||
fi
|
||||
fi
|
||||
|
||||
$sudo rm -r "${output}"
|
|
@ -0,0 +1,438 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"os"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cilium/ebpf/internal"
|
||||
"github.com/cilium/ebpf/internal/btf"
|
||||
"github.com/cilium/ebpf/internal/unix"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
// Generic errors returned by BPF syscalls.
|
||||
var (
|
||||
ErrNotExist = xerrors.New("requested object does not exist")
|
||||
)
|
||||
|
||||
// bpfObjName is a null-terminated string made up of
|
||||
// 'A-Za-z0-9_' characters.
|
||||
type bpfObjName [unix.BPF_OBJ_NAME_LEN]byte
|
||||
|
||||
// newBPFObjName truncates the result if it is too long.
|
||||
func newBPFObjName(name string) bpfObjName {
|
||||
var result bpfObjName
|
||||
copy(result[:unix.BPF_OBJ_NAME_LEN-1], name)
|
||||
return result
|
||||
}
|
||||
|
||||
func invalidBPFObjNameChar(char rune) bool {
|
||||
dotAllowed := objNameAllowsDot() == nil
|
||||
|
||||
switch {
|
||||
case char >= 'A' && char <= 'Z':
|
||||
fallthrough
|
||||
case char >= 'a' && char <= 'z':
|
||||
fallthrough
|
||||
case char >= '0' && char <= '9':
|
||||
fallthrough
|
||||
case dotAllowed && char == '.':
|
||||
fallthrough
|
||||
case char == '_':
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
type bpfMapCreateAttr struct {
|
||||
mapType MapType
|
||||
keySize uint32
|
||||
valueSize uint32
|
||||
maxEntries uint32
|
||||
flags uint32
|
||||
innerMapFd uint32 // since 4.12 56f668dfe00d
|
||||
numaNode uint32 // since 4.14 96eabe7a40aa
|
||||
mapName bpfObjName // since 4.15 ad5b177bd73f
|
||||
mapIfIndex uint32
|
||||
btfFd uint32
|
||||
btfKeyTypeID btf.TypeID
|
||||
btfValueTypeID btf.TypeID
|
||||
}
|
||||
|
||||
type bpfMapOpAttr struct {
|
||||
mapFd uint32
|
||||
padding uint32
|
||||
key internal.Pointer
|
||||
value internal.Pointer
|
||||
flags uint64
|
||||
}
|
||||
|
||||
type bpfMapInfo struct {
|
||||
mapType uint32
|
||||
id uint32
|
||||
keySize uint32
|
||||
valueSize uint32
|
||||
maxEntries uint32
|
||||
flags uint32
|
||||
mapName bpfObjName // since 4.15 ad5b177bd73f
|
||||
}
|
||||
|
||||
type bpfProgLoadAttr struct {
|
||||
progType ProgramType
|
||||
insCount uint32
|
||||
instructions internal.Pointer
|
||||
license internal.Pointer
|
||||
logLevel uint32
|
||||
logSize uint32
|
||||
logBuf internal.Pointer
|
||||
kernelVersion uint32 // since 4.1 2541517c32be
|
||||
progFlags uint32 // since 4.11 e07b98d9bffe
|
||||
progName bpfObjName // since 4.15 067cae47771c
|
||||
progIfIndex uint32 // since 4.15 1f6f4cb7ba21
|
||||
expectedAttachType AttachType // since 4.17 5e43f899b03a
|
||||
progBTFFd uint32
|
||||
funcInfoRecSize uint32
|
||||
funcInfo internal.Pointer
|
||||
funcInfoCnt uint32
|
||||
lineInfoRecSize uint32
|
||||
lineInfo internal.Pointer
|
||||
lineInfoCnt uint32
|
||||
attachBTFID btf.TypeID
|
||||
attachProgFd uint32
|
||||
}
|
||||
|
||||
type bpfProgInfo struct {
|
||||
progType uint32
|
||||
id uint32
|
||||
tag [unix.BPF_TAG_SIZE]byte
|
||||
jitedLen uint32
|
||||
xlatedLen uint32
|
||||
jited internal.Pointer
|
||||
xlated internal.Pointer
|
||||
loadTime uint64 // since 4.15 cb4d2b3f03d8
|
||||
createdByUID uint32
|
||||
nrMapIDs uint32
|
||||
mapIds internal.Pointer
|
||||
name bpfObjName
|
||||
}
|
||||
|
||||
type bpfProgTestRunAttr struct {
|
||||
fd uint32
|
||||
retval uint32
|
||||
dataSizeIn uint32
|
||||
dataSizeOut uint32
|
||||
dataIn internal.Pointer
|
||||
dataOut internal.Pointer
|
||||
repeat uint32
|
||||
duration uint32
|
||||
}
|
||||
|
||||
type bpfObjGetInfoByFDAttr struct {
|
||||
fd uint32
|
||||
infoLen uint32
|
||||
info internal.Pointer // May be either bpfMapInfo or bpfProgInfo
|
||||
}
|
||||
|
||||
type bpfGetFDByIDAttr struct {
|
||||
id uint32
|
||||
next uint32
|
||||
}
|
||||
|
||||
type bpfMapFreezeAttr struct {
|
||||
mapFd uint32
|
||||
}
|
||||
|
||||
type bpfObjGetNextIDAttr struct {
|
||||
startID uint32
|
||||
nextID uint32
|
||||
openFlags uint32
|
||||
}
|
||||
|
||||
func bpfProgLoad(attr *bpfProgLoadAttr) (*internal.FD, error) {
|
||||
for {
|
||||
fd, err := internal.BPF(internal.BPF_PROG_LOAD, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
|
||||
// As of ~4.20 the verifier can be interrupted by a signal,
|
||||
// and returns EAGAIN in that case.
|
||||
if err == unix.EAGAIN {
|
||||
continue
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return internal.NewFD(uint32(fd)), nil
|
||||
}
|
||||
}
|
||||
|
||||
func bpfProgTestRun(attr *bpfProgTestRunAttr) error {
|
||||
_, err := internal.BPF(internal.BPF_PROG_TEST_RUN, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
|
||||
return err
|
||||
}
|
||||
|
||||
func bpfMapCreate(attr *bpfMapCreateAttr) (*internal.FD, error) {
|
||||
fd, err := internal.BPF(internal.BPF_MAP_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
|
||||
if xerrors.Is(err, os.ErrPermission) {
|
||||
return nil, xerrors.New("permission denied or insufficient rlimit to lock memory for map")
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return internal.NewFD(uint32(fd)), nil
|
||||
}
|
||||
|
||||
var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() (bool, error) {
|
||||
inner, err := bpfMapCreate(&bpfMapCreateAttr{
|
||||
mapType: Array,
|
||||
keySize: 4,
|
||||
valueSize: 4,
|
||||
maxEntries: 1,
|
||||
})
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer inner.Close()
|
||||
|
||||
innerFd, _ := inner.Value()
|
||||
nested, err := bpfMapCreate(&bpfMapCreateAttr{
|
||||
mapType: ArrayOfMaps,
|
||||
keySize: 4,
|
||||
valueSize: 4,
|
||||
maxEntries: 1,
|
||||
innerMapFd: innerFd,
|
||||
})
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
_ = nested.Close()
|
||||
return true, nil
|
||||
})
|
||||
|
||||
var haveMapMutabilityModifiers = internal.FeatureTest("read- and write-only maps", "5.2", func() (bool, error) {
|
||||
// This checks BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG. Since
|
||||
// BPF_MAP_FREEZE appeared in 5.2 as well we don't do a separate check.
|
||||
m, err := bpfMapCreate(&bpfMapCreateAttr{
|
||||
mapType: Array,
|
||||
keySize: 4,
|
||||
valueSize: 4,
|
||||
maxEntries: 1,
|
||||
flags: unix.BPF_F_RDONLY_PROG,
|
||||
})
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
_ = m.Close()
|
||||
return true, nil
|
||||
})
|
||||
|
||||
func bpfMapLookupElem(m *internal.FD, key, valueOut internal.Pointer) error {
|
||||
fd, err := m.Value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := bpfMapOpAttr{
|
||||
mapFd: fd,
|
||||
key: key,
|
||||
value: valueOut,
|
||||
}
|
||||
_, err = internal.BPF(internal.BPF_MAP_LOOKUP_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
return wrapMapError(err)
|
||||
}
|
||||
|
||||
func bpfMapLookupAndDelete(m *internal.FD, key, valueOut internal.Pointer) error {
|
||||
fd, err := m.Value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := bpfMapOpAttr{
|
||||
mapFd: fd,
|
||||
key: key,
|
||||
value: valueOut,
|
||||
}
|
||||
_, err = internal.BPF(internal.BPF_MAP_LOOKUP_AND_DELETE_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
return wrapMapError(err)
|
||||
}
|
||||
|
||||
func bpfMapUpdateElem(m *internal.FD, key, valueOut internal.Pointer, flags uint64) error {
|
||||
fd, err := m.Value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := bpfMapOpAttr{
|
||||
mapFd: fd,
|
||||
key: key,
|
||||
value: valueOut,
|
||||
flags: flags,
|
||||
}
|
||||
_, err = internal.BPF(internal.BPF_MAP_UPDATE_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
return wrapMapError(err)
|
||||
}
|
||||
|
||||
func bpfMapDeleteElem(m *internal.FD, key internal.Pointer) error {
|
||||
fd, err := m.Value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := bpfMapOpAttr{
|
||||
mapFd: fd,
|
||||
key: key,
|
||||
}
|
||||
_, err = internal.BPF(internal.BPF_MAP_DELETE_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
return wrapMapError(err)
|
||||
}
|
||||
|
||||
func bpfMapGetNextKey(m *internal.FD, key, nextKeyOut internal.Pointer) error {
|
||||
fd, err := m.Value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := bpfMapOpAttr{
|
||||
mapFd: fd,
|
||||
key: key,
|
||||
value: nextKeyOut,
|
||||
}
|
||||
_, err = internal.BPF(internal.BPF_MAP_GET_NEXT_KEY, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
return wrapMapError(err)
|
||||
}
|
||||
|
||||
func objGetNextID(cmd internal.BPFCmd, start uint32) (uint32, error) {
|
||||
attr := bpfObjGetNextIDAttr{
|
||||
startID: start,
|
||||
}
|
||||
_, err := internal.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
return attr.nextID, wrapObjError(err)
|
||||
}
|
||||
|
||||
func wrapObjError(err error) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if xerrors.Is(err, unix.ENOENT) {
|
||||
return xerrors.Errorf("%w", ErrNotExist)
|
||||
}
|
||||
|
||||
return xerrors.New(err.Error())
|
||||
}
|
||||
|
||||
func wrapMapError(err error) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if xerrors.Is(err, unix.ENOENT) {
|
||||
return ErrKeyNotExist
|
||||
}
|
||||
|
||||
if xerrors.Is(err, unix.EEXIST) {
|
||||
return ErrKeyExist
|
||||
}
|
||||
|
||||
return xerrors.New(err.Error())
|
||||
}
|
||||
|
||||
func bpfMapFreeze(m *internal.FD) error {
|
||||
fd, err := m.Value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
attr := bpfMapFreezeAttr{
|
||||
mapFd: fd,
|
||||
}
|
||||
_, err = internal.BPF(internal.BPF_MAP_FREEZE, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
return err
|
||||
}
|
||||
|
||||
func bpfGetObjectInfoByFD(fd *internal.FD, info unsafe.Pointer, size uintptr) error {
|
||||
value, err := fd.Value()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// available from 4.13
|
||||
attr := bpfObjGetInfoByFDAttr{
|
||||
fd: value,
|
||||
infoLen: uint32(size),
|
||||
info: internal.NewPointer(info),
|
||||
}
|
||||
_, err = internal.BPF(internal.BPF_OBJ_GET_INFO_BY_FD, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
if err != nil {
|
||||
return xerrors.Errorf("fd %d: %w", fd, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func bpfGetProgInfoByFD(fd *internal.FD) (*bpfProgInfo, error) {
|
||||
var info bpfProgInfo
|
||||
if err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info)); err != nil {
|
||||
return nil, xerrors.Errorf("can't get program info: %w", err)
|
||||
}
|
||||
return &info, nil
|
||||
}
|
||||
|
||||
func bpfGetMapInfoByFD(fd *internal.FD) (*bpfMapInfo, error) {
|
||||
var info bpfMapInfo
|
||||
err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info))
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("can't get map info: %w", err)
|
||||
}
|
||||
return &info, nil
|
||||
}
|
||||
|
||||
var haveObjName = internal.FeatureTest("object names", "4.15", func() (bool, error) {
|
||||
attr := bpfMapCreateAttr{
|
||||
mapType: Array,
|
||||
keySize: 4,
|
||||
valueSize: 4,
|
||||
maxEntries: 1,
|
||||
mapName: newBPFObjName("feature_test"),
|
||||
}
|
||||
|
||||
fd, err := bpfMapCreate(&attr)
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
_ = fd.Close()
|
||||
return true, nil
|
||||
})
|
||||
|
||||
var objNameAllowsDot = internal.FeatureTest("dot in object names", "5.2", func() (bool, error) {
|
||||
if err := haveObjName(); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
attr := bpfMapCreateAttr{
|
||||
mapType: Array,
|
||||
keySize: 4,
|
||||
valueSize: 4,
|
||||
maxEntries: 1,
|
||||
mapName: newBPFObjName(".test"),
|
||||
}
|
||||
|
||||
fd, err := bpfMapCreate(&attr)
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
_ = fd.Close()
|
||||
return true, nil
|
||||
})
|
||||
|
||||
func bpfObjGetFDByID(cmd internal.BPFCmd, id uint32) (*internal.FD, error) {
|
||||
attr := bpfGetFDByIDAttr{
|
||||
id: id,
|
||||
}
|
||||
ptr, err := internal.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
|
||||
return internal.NewFD(uint32(ptr)), wrapObjError(err)
|
||||
}
|
|
@ -0,0 +1,196 @@
|
|||
package ebpf
|
||||
|
||||
//go:generate stringer -output types_string.go -type=MapType,ProgramType,AttachType
|
||||
|
||||
// MapType indicates the type map structure
|
||||
// that will be initialized in the kernel.
|
||||
type MapType uint32
|
||||
|
||||
// All the various map types that can be created
|
||||
const (
|
||||
UnspecifiedMap MapType = iota
|
||||
// Hash is a hash map
|
||||
Hash
|
||||
// Array is an array map
|
||||
Array
|
||||
// ProgramArray - A program array map is a special kind of array map whose map
|
||||
// values contain only file descriptors referring to other eBPF
|
||||
// programs. Thus, both the key_size and value_size must be
|
||||
// exactly four bytes. This map is used in conjunction with the
|
||||
// TailCall helper.
|
||||
ProgramArray
|
||||
// PerfEventArray - A perf event array is used in conjunction with PerfEventRead
|
||||
// and PerfEventOutput calls, to read the raw bpf_perf_data from the registers.
|
||||
PerfEventArray
|
||||
// PerCPUHash - This data structure is useful for people who have high performance
|
||||
// network needs and can reconcile adds at the end of some cycle, so that
|
||||
// hashes can be lock free without the use of XAdd, which can be costly.
|
||||
PerCPUHash
|
||||
// PerCPUArray - This data structure is useful for people who have high performance
|
||||
// network needs and can reconcile adds at the end of some cycle, so that
|
||||
// hashes can be lock free without the use of XAdd, which can be costly.
|
||||
// Each CPU gets a copy of this hash, the contents of all of which can be reconciled
|
||||
// later.
|
||||
PerCPUArray
|
||||
// StackTrace - This holds whole user and kernel stack traces, it can be retrieved with
|
||||
// GetStackID
|
||||
StackTrace
|
||||
// CGroupArray - This is a very niche structure used to help SKBInCGroup determine
|
||||
// if an skb is from a socket belonging to a specific cgroup
|
||||
CGroupArray
|
||||
// LRUHash - This allows you to create a small hash structure that will purge the
|
||||
// least recently used items rather than thow an error when you run out of memory
|
||||
LRUHash
|
||||
// LRUCPUHash - This is NOT like PerCPUHash, this structure is shared among the CPUs,
|
||||
// it has more to do with including the CPU id with the LRU calculation so that if a
|
||||
// particular CPU is using a value over-and-over again, then it will be saved, but if
|
||||
// a value is being retrieved a lot but sparsely across CPUs it is not as important, basically
|
||||
// giving weight to CPU locality over overall usage.
|
||||
LRUCPUHash
|
||||
// LPMTrie - This is an implementation of Longest-Prefix-Match Trie structure. It is useful,
|
||||
// for storing things like IP addresses which can be bit masked allowing for keys of differing
|
||||
// values to refer to the same reference based on their masks. See wikipedia for more details.
|
||||
LPMTrie
|
||||
// ArrayOfMaps - Each item in the array is another map. The inner map mustn't be a map of maps
|
||||
// itself.
|
||||
ArrayOfMaps
|
||||
// HashOfMaps - Each item in the hash map is another map. The inner map mustn't be a map of maps
|
||||
// itself.
|
||||
HashOfMaps
|
||||
// DevMap - Specialized map to store references to network devices.
|
||||
DevMap
|
||||
// SockMap - Specialized map to store references to sockets.
|
||||
SockMap
|
||||
// CPUMap - Specialized map to store references to CPUs.
|
||||
CPUMap
|
||||
// XSKMap - Specialized map for XDP programs to store references to open sockets.
|
||||
XSKMap
|
||||
// SockHash - Specialized hash to store references to sockets.
|
||||
SockHash
|
||||
// CGroupStorage - Special map for CGroups.
|
||||
CGroupStorage
|
||||
// ReusePortSockArray - Specialized map to store references to sockets that can be reused.
|
||||
ReusePortSockArray
|
||||
// PerCPUCGroupStorage - Special per CPU map for CGroups.
|
||||
PerCPUCGroupStorage
|
||||
// Queue - FIFO storage for BPF programs.
|
||||
Queue
|
||||
// Stack - LIFO storage for BPF programs.
|
||||
Stack
|
||||
// SkStorage - Specialized map for local storage at SK for BPF programs.
|
||||
SkStorage
|
||||
// DevMapHash - Hash-based indexing scheme for references to network devices.
|
||||
DevMapHash
|
||||
)
|
||||
|
||||
// hasPerCPUValue returns true if the Map stores a value per CPU.
|
||||
func (mt MapType) hasPerCPUValue() bool {
|
||||
if mt == PerCPUHash || mt == PerCPUArray || mt == LRUCPUHash {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ProgramType of the eBPF program
|
||||
type ProgramType uint32
|
||||
|
||||
// eBPF program types
|
||||
const (
|
||||
// Unrecognized program type
|
||||
UnspecifiedProgram ProgramType = iota
|
||||
// SocketFilter socket or seccomp filter
|
||||
SocketFilter
|
||||
// Kprobe program
|
||||
Kprobe
|
||||
// SchedCLS traffic control shaper
|
||||
SchedCLS
|
||||
// SchedACT routing control shaper
|
||||
SchedACT
|
||||
// TracePoint program
|
||||
TracePoint
|
||||
// XDP program
|
||||
XDP
|
||||
// PerfEvent program
|
||||
PerfEvent
|
||||
// CGroupSKB program
|
||||
CGroupSKB
|
||||
// CGroupSock program
|
||||
CGroupSock
|
||||
// LWTIn program
|
||||
LWTIn
|
||||
// LWTOut program
|
||||
LWTOut
|
||||
// LWTXmit program
|
||||
LWTXmit
|
||||
// SockOps program
|
||||
SockOps
|
||||
// SkSKB program
|
||||
SkSKB
|
||||
// CGroupDevice program
|
||||
CGroupDevice
|
||||
// SkMsg program
|
||||
SkMsg
|
||||
// RawTracepoint program
|
||||
RawTracepoint
|
||||
// CGroupSockAddr program
|
||||
CGroupSockAddr
|
||||
// LWTSeg6Local program
|
||||
LWTSeg6Local
|
||||
// LircMode2 program
|
||||
LircMode2
|
||||
// SkReuseport program
|
||||
SkReuseport
|
||||
// FlowDissector program
|
||||
FlowDissector
|
||||
// CGroupSysctl program
|
||||
CGroupSysctl
|
||||
// RawTracepointWritable program
|
||||
RawTracepointWritable
|
||||
// CGroupSockopt program
|
||||
CGroupSockopt
|
||||
// Tracing program
|
||||
Tracing
|
||||
)
|
||||
|
||||
// AttachType of the eBPF program, needed to differentiate allowed context accesses in
|
||||
// some newer program types like CGroupSockAddr. Should be set to AttachNone if not required.
|
||||
// Will cause invalid argument (EINVAL) at program load time if set incorrectly.
|
||||
type AttachType uint32
|
||||
|
||||
// AttachNone is an alias for AttachCGroupInetIngress for readability reasons
|
||||
const AttachNone AttachType = 0
|
||||
|
||||
const (
|
||||
AttachCGroupInetIngress AttachType = iota
|
||||
AttachCGroupInetEgress
|
||||
AttachCGroupInetSockCreate
|
||||
AttachCGroupSockOps
|
||||
AttachSkSKBStreamParser
|
||||
AttachSkSKBStreamVerdict
|
||||
AttachCGroupDevice
|
||||
AttachSkMsgVerdict
|
||||
AttachCGroupInet4Bind
|
||||
AttachCGroupInet6Bind
|
||||
AttachCGroupInet4Connect
|
||||
AttachCGroupInet6Connect
|
||||
AttachCGroupInet4PostBind
|
||||
AttachCGroupInet6PostBind
|
||||
AttachCGroupUDP4Sendmsg
|
||||
AttachCGroupUDP6Sendmsg
|
||||
AttachLircMode2
|
||||
AttachFlowDissector
|
||||
AttachCGroupSysctl
|
||||
AttachCGroupUDP4Recvmsg
|
||||
AttachCGroupUDP6Recvmsg
|
||||
AttachCGroupGetsockopt
|
||||
AttachCGroupSetsockopt
|
||||
AttachTraceRawTp
|
||||
AttachTraceFEntry
|
||||
AttachTraceFExit
|
||||
AttachModifyReturn
|
||||
AttachLSMMac
|
||||
AttachTraceIter
|
||||
)
|
||||
|
||||
// AttachFlags of the eBPF program used in BPF_PROG_ATTACH command
|
||||
type AttachFlags uint32
|
|
@ -0,0 +1,137 @@
|
|||
// Code generated by "stringer -output types_string.go -type=MapType,ProgramType,AttachType"; DO NOT EDIT.
|
||||
|
||||
package ebpf
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[UnspecifiedMap-0]
|
||||
_ = x[Hash-1]
|
||||
_ = x[Array-2]
|
||||
_ = x[ProgramArray-3]
|
||||
_ = x[PerfEventArray-4]
|
||||
_ = x[PerCPUHash-5]
|
||||
_ = x[PerCPUArray-6]
|
||||
_ = x[StackTrace-7]
|
||||
_ = x[CGroupArray-8]
|
||||
_ = x[LRUHash-9]
|
||||
_ = x[LRUCPUHash-10]
|
||||
_ = x[LPMTrie-11]
|
||||
_ = x[ArrayOfMaps-12]
|
||||
_ = x[HashOfMaps-13]
|
||||
_ = x[DevMap-14]
|
||||
_ = x[SockMap-15]
|
||||
_ = x[CPUMap-16]
|
||||
_ = x[XSKMap-17]
|
||||
_ = x[SockHash-18]
|
||||
_ = x[CGroupStorage-19]
|
||||
_ = x[ReusePortSockArray-20]
|
||||
_ = x[PerCPUCGroupStorage-21]
|
||||
_ = x[Queue-22]
|
||||
_ = x[Stack-23]
|
||||
_ = x[SkStorage-24]
|
||||
_ = x[DevMapHash-25]
|
||||
}
|
||||
|
||||
const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMapsDevMapSockMapCPUMapXSKMapSockHashCGroupStorageReusePortSockArrayPerCPUCGroupStorageQueueStackSkStorageDevMapHash"
|
||||
|
||||
var _MapType_index = [...]uint8{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136, 142, 149, 155, 161, 169, 182, 200, 219, 224, 229, 238, 248}
|
||||
|
||||
func (i MapType) String() string {
|
||||
if i >= MapType(len(_MapType_index)-1) {
|
||||
return "MapType(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
return _MapType_name[_MapType_index[i]:_MapType_index[i+1]]
|
||||
}
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[UnspecifiedProgram-0]
|
||||
_ = x[SocketFilter-1]
|
||||
_ = x[Kprobe-2]
|
||||
_ = x[SchedCLS-3]
|
||||
_ = x[SchedACT-4]
|
||||
_ = x[TracePoint-5]
|
||||
_ = x[XDP-6]
|
||||
_ = x[PerfEvent-7]
|
||||
_ = x[CGroupSKB-8]
|
||||
_ = x[CGroupSock-9]
|
||||
_ = x[LWTIn-10]
|
||||
_ = x[LWTOut-11]
|
||||
_ = x[LWTXmit-12]
|
||||
_ = x[SockOps-13]
|
||||
_ = x[SkSKB-14]
|
||||
_ = x[CGroupDevice-15]
|
||||
_ = x[SkMsg-16]
|
||||
_ = x[RawTracepoint-17]
|
||||
_ = x[CGroupSockAddr-18]
|
||||
_ = x[LWTSeg6Local-19]
|
||||
_ = x[LircMode2-20]
|
||||
_ = x[SkReuseport-21]
|
||||
_ = x[FlowDissector-22]
|
||||
_ = x[CGroupSysctl-23]
|
||||
_ = x[RawTracepointWritable-24]
|
||||
_ = x[CGroupSockopt-25]
|
||||
_ = x[Tracing-26]
|
||||
}
|
||||
|
||||
const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockoptTracing"
|
||||
|
||||
var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258, 265}
|
||||
|
||||
func (i ProgramType) String() string {
|
||||
if i >= ProgramType(len(_ProgramType_index)-1) {
|
||||
return "ProgramType(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
return _ProgramType_name[_ProgramType_index[i]:_ProgramType_index[i+1]]
|
||||
}
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[AttachNone-0]
|
||||
_ = x[AttachCGroupInetIngress-0]
|
||||
_ = x[AttachCGroupInetEgress-1]
|
||||
_ = x[AttachCGroupInetSockCreate-2]
|
||||
_ = x[AttachCGroupSockOps-3]
|
||||
_ = x[AttachSkSKBStreamParser-4]
|
||||
_ = x[AttachSkSKBStreamVerdict-5]
|
||||
_ = x[AttachCGroupDevice-6]
|
||||
_ = x[AttachSkMsgVerdict-7]
|
||||
_ = x[AttachCGroupInet4Bind-8]
|
||||
_ = x[AttachCGroupInet6Bind-9]
|
||||
_ = x[AttachCGroupInet4Connect-10]
|
||||
_ = x[AttachCGroupInet6Connect-11]
|
||||
_ = x[AttachCGroupInet4PostBind-12]
|
||||
_ = x[AttachCGroupInet6PostBind-13]
|
||||
_ = x[AttachCGroupUDP4Sendmsg-14]
|
||||
_ = x[AttachCGroupUDP6Sendmsg-15]
|
||||
_ = x[AttachLircMode2-16]
|
||||
_ = x[AttachFlowDissector-17]
|
||||
_ = x[AttachCGroupSysctl-18]
|
||||
_ = x[AttachCGroupUDP4Recvmsg-19]
|
||||
_ = x[AttachCGroupUDP6Recvmsg-20]
|
||||
_ = x[AttachCGroupGetsockopt-21]
|
||||
_ = x[AttachCGroupSetsockopt-22]
|
||||
_ = x[AttachTraceRawTp-23]
|
||||
_ = x[AttachTraceFEntry-24]
|
||||
_ = x[AttachTraceFExit-25]
|
||||
_ = x[AttachModifyReturn-26]
|
||||
_ = x[AttachLSMMac-27]
|
||||
_ = x[AttachTraceIter-28]
|
||||
}
|
||||
|
||||
const _AttachType_name = "AttachNoneAttachCGroupInetEgressAttachCGroupInetSockCreateAttachCGroupSockOpsAttachSkSKBStreamParserAttachSkSKBStreamVerdictAttachCGroupDeviceAttachSkMsgVerdictAttachCGroupInet4BindAttachCGroupInet6BindAttachCGroupInet4ConnectAttachCGroupInet6ConnectAttachCGroupInet4PostBindAttachCGroupInet6PostBindAttachCGroupUDP4SendmsgAttachCGroupUDP6SendmsgAttachLircMode2AttachFlowDissectorAttachCGroupSysctlAttachCGroupUDP4RecvmsgAttachCGroupUDP6RecvmsgAttachCGroupGetsockoptAttachCGroupSetsockoptAttachTraceRawTpAttachTraceFEntryAttachTraceFExitAttachModifyReturnAttachLSMMacAttachTraceIter"
|
||||
|
||||
var _AttachType_index = [...]uint16{0, 10, 32, 58, 77, 100, 124, 142, 160, 181, 202, 226, 250, 275, 300, 323, 346, 361, 380, 398, 421, 444, 466, 488, 504, 521, 537, 555, 567, 582}
|
||||
|
||||
func (i AttachType) String() string {
|
||||
if i >= AttachType(len(_AttachType_index)-1) {
|
||||
return "AttachType(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
return _AttachType_name[_AttachType_index[i]:_AttachType_index[i+1]]
|
||||
}
|
|
@ -1,90 +0,0 @@
|
|||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package util contains utility functions related to systemd that applications
|
||||
// can use to check things like whether systemd is running. Note that some of
|
||||
// these functions attempt to manually load systemd libraries at runtime rather
|
||||
// than linking against them.
|
||||
package util
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrNoCGO = fmt.Errorf("go-systemd built with CGO disabled")
|
||||
)
|
||||
|
||||
// GetRunningSlice attempts to retrieve the name of the systemd slice in which
|
||||
// the current process is running.
|
||||
// This function is a wrapper around the libsystemd C library; if it cannot be
|
||||
// opened, an error is returned.
|
||||
func GetRunningSlice() (string, error) {
|
||||
return getRunningSlice()
|
||||
}
|
||||
|
||||
// RunningFromSystemService tries to detect whether the current process has
|
||||
// been invoked from a system service. The condition for this is whether the
|
||||
// process is _not_ a user process. User processes are those running in session
|
||||
// scopes or under per-user `systemd --user` instances.
|
||||
//
|
||||
// To avoid false positives on systems without `pam_systemd` (which is
|
||||
// responsible for creating user sessions), this function also uses a heuristic
|
||||
// to detect whether it's being invoked from a session leader process. This is
|
||||
// the case if the current process is executed directly from a service file
|
||||
// (e.g. with `ExecStart=/this/cmd`). Note that this heuristic will fail if the
|
||||
// command is instead launched in a subshell or similar so that it is not
|
||||
// session leader (e.g. `ExecStart=/bin/bash -c "/this/cmd"`)
|
||||
//
|
||||
// This function is a wrapper around the libsystemd C library; if this is
|
||||
// unable to successfully open a handle to the library for any reason (e.g. it
|
||||
// cannot be found), an error will be returned.
|
||||
func RunningFromSystemService() (bool, error) {
|
||||
return runningFromSystemService()
|
||||
}
|
||||
|
||||
// CurrentUnitName attempts to retrieve the name of the systemd system unit
|
||||
// from which the calling process has been invoked. It wraps the systemd
|
||||
// `sd_pid_get_unit` call, with the same caveat: for processes not part of a
|
||||
// systemd system unit, this function will return an error.
|
||||
func CurrentUnitName() (string, error) {
|
||||
return currentUnitName()
|
||||
}
|
||||
|
||||
// IsRunningSystemd checks whether the host was booted with systemd as its init
|
||||
// system. This functions similarly to systemd's `sd_booted(3)`: internally, it
|
||||
// checks whether /run/systemd/system/ exists and is a directory.
|
||||
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
|
||||
func IsRunningSystemd() bool {
|
||||
fi, err := os.Lstat("/run/systemd/system")
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return fi.IsDir()
|
||||
}
|
||||
|
||||
// GetMachineID returns a host's 128-bit machine ID as a string. This functions
|
||||
// similarly to systemd's `sd_id128_get_machine`: internally, it simply reads
|
||||
// the contents of /etc/machine-id
|
||||
// http://www.freedesktop.org/software/systemd/man/sd_id128_get_machine.html
|
||||
func GetMachineID() (string, error) {
|
||||
machineID, err := ioutil.ReadFile("/etc/machine-id")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to read /etc/machine-id: %v", err)
|
||||
}
|
||||
return strings.TrimSpace(string(machineID)), nil
|
||||
}
|
|
@ -1,175 +0,0 @@
|
|||
// Copyright 2016 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build cgo
|
||||
|
||||
package util
|
||||
|
||||
// #include <stdlib.h>
|
||||
// #include <sys/types.h>
|
||||
// #include <unistd.h>
|
||||
//
|
||||
// int
|
||||
// my_sd_pid_get_owner_uid(void *f, pid_t pid, uid_t *uid)
|
||||
// {
|
||||
// int (*sd_pid_get_owner_uid)(pid_t, uid_t *);
|
||||
//
|
||||
// sd_pid_get_owner_uid = (int (*)(pid_t, uid_t *))f;
|
||||
// return sd_pid_get_owner_uid(pid, uid);
|
||||
// }
|
||||
//
|
||||
// int
|
||||
// my_sd_pid_get_unit(void *f, pid_t pid, char **unit)
|
||||
// {
|
||||
// int (*sd_pid_get_unit)(pid_t, char **);
|
||||
//
|
||||
// sd_pid_get_unit = (int (*)(pid_t, char **))f;
|
||||
// return sd_pid_get_unit(pid, unit);
|
||||
// }
|
||||
//
|
||||
// int
|
||||
// my_sd_pid_get_slice(void *f, pid_t pid, char **slice)
|
||||
// {
|
||||
// int (*sd_pid_get_slice)(pid_t, char **);
|
||||
//
|
||||
// sd_pid_get_slice = (int (*)(pid_t, char **))f;
|
||||
// return sd_pid_get_slice(pid, slice);
|
||||
// }
|
||||
//
|
||||
// int
|
||||
// am_session_leader()
|
||||
// {
|
||||
// return (getsid(0) == getpid());
|
||||
// }
|
||||
import "C"
|
||||
import (
|
||||
"fmt"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
"github.com/coreos/pkg/dlopen"
|
||||
)
|
||||
|
||||
var libsystemdNames = []string{
|
||||
// systemd < 209
|
||||
"libsystemd-login.so.0",
|
||||
"libsystemd-login.so",
|
||||
|
||||
// systemd >= 209 merged libsystemd-login into libsystemd proper
|
||||
"libsystemd.so.0",
|
||||
"libsystemd.so",
|
||||
}
|
||||
|
||||
func getRunningSlice() (slice string, err error) {
|
||||
var h *dlopen.LibHandle
|
||||
h, err = dlopen.GetHandle(libsystemdNames)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if err1 := h.Close(); err1 != nil {
|
||||
err = err1
|
||||
}
|
||||
}()
|
||||
|
||||
sd_pid_get_slice, err := h.GetSymbolPointer("sd_pid_get_slice")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var s string
|
||||
sl := C.CString(s)
|
||||
defer C.free(unsafe.Pointer(sl))
|
||||
|
||||
ret := C.my_sd_pid_get_slice(sd_pid_get_slice, 0, &sl)
|
||||
if ret < 0 {
|
||||
err = fmt.Errorf("error calling sd_pid_get_slice: %v", syscall.Errno(-ret))
|
||||
return
|
||||
}
|
||||
|
||||
return C.GoString(sl), nil
|
||||
}
|
||||
|
||||
func runningFromSystemService() (ret bool, err error) {
|
||||
var h *dlopen.LibHandle
|
||||
h, err = dlopen.GetHandle(libsystemdNames)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if err1 := h.Close(); err1 != nil {
|
||||
err = err1
|
||||
}
|
||||
}()
|
||||
|
||||
sd_pid_get_owner_uid, err := h.GetSymbolPointer("sd_pid_get_owner_uid")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var uid C.uid_t
|
||||
errno := C.my_sd_pid_get_owner_uid(sd_pid_get_owner_uid, 0, &uid)
|
||||
serrno := syscall.Errno(-errno)
|
||||
// when we're running from a unit file, sd_pid_get_owner_uid returns
|
||||
// ENOENT (systemd <220), ENXIO (systemd 220-223), or ENODATA
|
||||
// (systemd >=234)
|
||||
switch {
|
||||
case errno >= 0:
|
||||
ret = false
|
||||
case serrno == syscall.ENOENT, serrno == syscall.ENXIO, serrno == syscall.ENODATA:
|
||||
// Since the implementation of sessions in systemd relies on
|
||||
// the `pam_systemd` module, using the sd_pid_get_owner_uid
|
||||
// heuristic alone can result in false positives if that module
|
||||
// (or PAM itself) is not present or properly configured on the
|
||||
// system. As such, we also check if we're the session leader,
|
||||
// which should be the case if we're invoked from a unit file,
|
||||
// but not if e.g. we're invoked from the command line from a
|
||||
// user's login session
|
||||
ret = C.am_session_leader() == 1
|
||||
default:
|
||||
err = fmt.Errorf("error calling sd_pid_get_owner_uid: %v", syscall.Errno(-errno))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func currentUnitName() (unit string, err error) {
|
||||
var h *dlopen.LibHandle
|
||||
h, err = dlopen.GetHandle(libsystemdNames)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
if err1 := h.Close(); err1 != nil {
|
||||
err = err1
|
||||
}
|
||||
}()
|
||||
|
||||
sd_pid_get_unit, err := h.GetSymbolPointer("sd_pid_get_unit")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var s string
|
||||
u := C.CString(s)
|
||||
defer C.free(unsafe.Pointer(u))
|
||||
|
||||
ret := C.my_sd_pid_get_unit(sd_pid_get_unit, 0, &u)
|
||||
if ret < 0 {
|
||||
err = fmt.Errorf("error calling sd_pid_get_unit: %v", syscall.Errno(-ret))
|
||||
return
|
||||
}
|
||||
|
||||
unit = C.GoString(u)
|
||||
return
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
// Copyright 2016 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build !cgo
|
||||
|
||||
package util
|
||||
|
||||
func getRunningSlice() (string, error) { return "", ErrNoCGO }
|
||||
|
||||
func runningFromSystemService() (bool, error) { return false, ErrNoCGO }
|
||||
|
||||
func currentUnitName() (string, error) { return "", ErrNoCGO }
|
|
@ -1,202 +0,0 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright {yyyy} {name of copyright owner}
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
CoreOS Project
|
||||
Copyright 2014 CoreOS, Inc
|
||||
|
||||
This product includes software developed at CoreOS, Inc.
|
||||
(http://www.coreos.com/).
|
|
@ -1,82 +0,0 @@
|
|||
// Copyright 2016 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package dlopen provides some convenience functions to dlopen a library and
|
||||
// get its symbols.
|
||||
package dlopen
|
||||
|
||||
// #cgo LDFLAGS: -ldl
|
||||
// #include <stdlib.h>
|
||||
// #include <dlfcn.h>
|
||||
import "C"
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
var ErrSoNotFound = errors.New("unable to open a handle to the library")
|
||||
|
||||
// LibHandle represents an open handle to a library (.so)
|
||||
type LibHandle struct {
|
||||
Handle unsafe.Pointer
|
||||
Libname string
|
||||
}
|
||||
|
||||
// GetHandle tries to get a handle to a library (.so), attempting to access it
|
||||
// by the names specified in libs and returning the first that is successfully
|
||||
// opened. Callers are responsible for closing the handler. If no library can
|
||||
// be successfully opened, an error is returned.
|
||||
func GetHandle(libs []string) (*LibHandle, error) {
|
||||
for _, name := range libs {
|
||||
libname := C.CString(name)
|
||||
defer C.free(unsafe.Pointer(libname))
|
||||
handle := C.dlopen(libname, C.RTLD_LAZY)
|
||||
if handle != nil {
|
||||
h := &LibHandle{
|
||||
Handle: handle,
|
||||
Libname: name,
|
||||
}
|
||||
return h, nil
|
||||
}
|
||||
}
|
||||
return nil, ErrSoNotFound
|
||||
}
|
||||
|
||||
// GetSymbolPointer takes a symbol name and returns a pointer to the symbol.
|
||||
func (l *LibHandle) GetSymbolPointer(symbol string) (unsafe.Pointer, error) {
|
||||
sym := C.CString(symbol)
|
||||
defer C.free(unsafe.Pointer(sym))
|
||||
|
||||
C.dlerror()
|
||||
p := C.dlsym(l.Handle, sym)
|
||||
e := C.dlerror()
|
||||
if e != nil {
|
||||
return nil, fmt.Errorf("error resolving symbol %q: %v", symbol, errors.New(C.GoString(e)))
|
||||
}
|
||||
|
||||
return p, nil
|
||||
}
|
||||
|
||||
// Close closes a LibHandle.
|
||||
func (l *LibHandle) Close() error {
|
||||
C.dlerror()
|
||||
C.dlclose(l.Handle)
|
||||
e := C.dlerror()
|
||||
if e != nil {
|
||||
return fmt.Errorf("error closing %v: %v", l.Libname, errors.New(C.GoString(e)))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// +build linux
|
||||
|
||||
package dlopen
|
||||
|
||||
// #include <string.h>
|
||||
// #include <stdlib.h>
|
||||
//
|
||||
// int
|
||||
// my_strlen(void *f, const char *s)
|
||||
// {
|
||||
// size_t (*strlen)(const char *);
|
||||
//
|
||||
// strlen = (size_t (*)(const char *))f;
|
||||
// return strlen(s);
|
||||
// }
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func strlen(libs []string, s string) (int, error) {
|
||||
h, err := GetHandle(libs)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf(`couldn't get a handle to the library: %v`, err)
|
||||
}
|
||||
defer h.Close()
|
||||
|
||||
f := "strlen"
|
||||
cs := C.CString(s)
|
||||
defer C.free(unsafe.Pointer(cs))
|
||||
|
||||
strlen, err := h.GetSymbolPointer(f)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf(`couldn't get symbol %q: %v`, f, err)
|
||||
}
|
||||
|
||||
len := C.my_strlen(strlen, cs)
|
||||
|
||||
return int(len), nil
|
||||
}
|
|
@ -261,6 +261,7 @@ process := &libcontainer.Process{
|
|||
Stdin: os.Stdin,
|
||||
Stdout: os.Stdout,
|
||||
Stderr: os.Stderr,
|
||||
Init: true,
|
||||
}
|
||||
|
||||
err := container.Run(process)
|
||||
|
|
|
@ -6,6 +6,8 @@ import (
|
|||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
// IsEnabled returns true if apparmor is enabled for the host.
|
||||
|
@ -19,7 +21,7 @@ func IsEnabled() bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func setprocattr(attr, value string) error {
|
||||
func setProcAttr(attr, value string) error {
|
||||
// Under AppArmor you can only change your own attr, so use /proc/self/
|
||||
// instead of /proc/<tid>/ like libapparmor does
|
||||
path := fmt.Sprintf("/proc/self/attr/%s", attr)
|
||||
|
@ -30,6 +32,10 @@ func setprocattr(attr, value string) error {
|
|||
}
|
||||
defer f.Close()
|
||||
|
||||
if err := utils.EnsureProcHandle(f); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = fmt.Fprintf(f, "%s", value)
|
||||
return err
|
||||
}
|
||||
|
@ -37,7 +43,7 @@ func setprocattr(attr, value string) error {
|
|||
// changeOnExec reimplements aa_change_onexec from libapparmor in Go
|
||||
func changeOnExec(name string) error {
|
||||
value := "exec " + name
|
||||
if err := setprocattr("exec", value); err != nil {
|
||||
if err := setProcAttr("exec", value); err != nil {
|
||||
return fmt.Errorf("apparmor failed to apply profile: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
|
|
@ -71,7 +71,11 @@ func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilitie
|
|||
}
|
||||
ambient = append(ambient, v)
|
||||
}
|
||||
pid, err := capability.NewPid(0)
|
||||
pid, err := capability.NewPid2(0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = pid.Load()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -37,8 +37,18 @@ type Manager interface {
|
|||
// restore the object later.
|
||||
GetPaths() map[string]string
|
||||
|
||||
// GetUnifiedPath returns the unified path when running in unified mode.
|
||||
// The value corresponds to the all values of GetPaths() map.
|
||||
//
|
||||
// GetUnifiedPath returns error when running in hybrid mode as well as
|
||||
// in legacy mode.
|
||||
GetUnifiedPath() (string, error)
|
||||
|
||||
// Sets the cgroup as configured.
|
||||
Set(container *configs.Config) error
|
||||
|
||||
// Gets the cgroup as configured.
|
||||
GetCgroups() (*configs.Cgroup, error)
|
||||
}
|
||||
|
||||
type NotFoundError struct {
|
||||
|
|
180
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
generated
vendored
Normal file
180
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
generated
vendored
Normal file
|
@ -0,0 +1,180 @@
|
|||
// Package devicefilter containes eBPF device filter program
|
||||
//
|
||||
// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
|
||||
//
|
||||
// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano)
|
||||
// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397
|
||||
package devicefilter
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
// license string format is same as kernel MODULE_LICENSE macro
|
||||
license = "Apache"
|
||||
)
|
||||
|
||||
// DeviceFilter returns eBPF device filter program and its license string
|
||||
func DeviceFilter(devices []*configs.Device) (asm.Instructions, string, error) {
|
||||
p := &program{}
|
||||
p.init()
|
||||
for i := len(devices) - 1; i >= 0; i-- {
|
||||
if err := p.appendDevice(devices[i]); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
}
|
||||
insts, err := p.finalize()
|
||||
return insts, license, err
|
||||
}
|
||||
|
||||
type program struct {
|
||||
insts asm.Instructions
|
||||
hasWildCard bool
|
||||
blockID int
|
||||
}
|
||||
|
||||
func (p *program) init() {
|
||||
// struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423
|
||||
/*
|
||||
u32 access_type
|
||||
u32 major
|
||||
u32 minor
|
||||
*/
|
||||
// R2 <- type (lower 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R2, asm.R1, 0, asm.Half))
|
||||
|
||||
// R3 <- access (upper 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R3, asm.R1, 0, asm.Word),
|
||||
// RSh: bitwise shift right
|
||||
asm.RSh.Imm32(asm.R3, 16))
|
||||
|
||||
// R4 <- major (u32 major at R1[4])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R4, asm.R1, 4, asm.Word))
|
||||
|
||||
// R5 <- minor (u32 minor at R1[8])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
|
||||
}
|
||||
|
||||
// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
|
||||
func (p *program) appendDevice(dev *configs.Device) error {
|
||||
if p.blockID < 0 {
|
||||
return errors.New("the program is finalized")
|
||||
}
|
||||
if p.hasWildCard {
|
||||
// All entries after wildcard entry are ignored
|
||||
return nil
|
||||
}
|
||||
|
||||
bpfType := int32(-1)
|
||||
hasType := true
|
||||
switch dev.Type {
|
||||
case 'c':
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
|
||||
case 'b':
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
|
||||
case 'a':
|
||||
hasType = false
|
||||
default:
|
||||
// if not specified in OCI json, typ is set to DeviceTypeAll
|
||||
return errors.Errorf("invalid DeviceType %q", string(dev.Type))
|
||||
}
|
||||
if dev.Major > math.MaxUint32 {
|
||||
return errors.Errorf("invalid major %d", dev.Major)
|
||||
}
|
||||
if dev.Minor > math.MaxUint32 {
|
||||
return errors.Errorf("invalid minor %d", dev.Major)
|
||||
}
|
||||
hasMajor := dev.Major >= 0 // if not specified in OCI json, major is set to -1
|
||||
hasMinor := dev.Minor >= 0
|
||||
bpfAccess := int32(0)
|
||||
for _, r := range dev.Permissions {
|
||||
switch r {
|
||||
case 'r':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_READ
|
||||
case 'w':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_WRITE
|
||||
case 'm':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
|
||||
default:
|
||||
return errors.Errorf("unknown device access %v", r)
|
||||
}
|
||||
}
|
||||
// If the access is rwm, skip the check.
|
||||
hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)
|
||||
|
||||
blockSym := fmt.Sprintf("block-%d", p.blockID)
|
||||
nextBlockSym := fmt.Sprintf("block-%d", p.blockID+1)
|
||||
prevBlockLastIdx := len(p.insts) - 1
|
||||
if hasType {
|
||||
p.insts = append(p.insts,
|
||||
// if (R2 != bpfType) goto next
|
||||
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasAccess {
|
||||
p.insts = append(p.insts,
|
||||
// if (R3 & bpfAccess == 0 /* use R1 as a temp var */) goto next
|
||||
asm.Mov.Reg32(asm.R1, asm.R3),
|
||||
asm.And.Imm32(asm.R1, bpfAccess),
|
||||
asm.JEq.Imm(asm.R1, 0, nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMajor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R4 != major) goto next
|
||||
asm.JNE.Imm(asm.R4, int32(dev.Major), nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMinor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R5 != minor) goto next
|
||||
asm.JNE.Imm(asm.R5, int32(dev.Minor), nextBlockSym),
|
||||
)
|
||||
}
|
||||
if !hasType && !hasAccess && !hasMajor && !hasMinor {
|
||||
p.hasWildCard = true
|
||||
}
|
||||
p.insts = append(p.insts, acceptBlock(dev.Allow)...)
|
||||
// set blockSym to the first instruction we added in this iteration
|
||||
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
|
||||
p.blockID++
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *program) finalize() (asm.Instructions, error) {
|
||||
if p.hasWildCard {
|
||||
// acceptBlock with asm.Return() is already inserted
|
||||
return p.insts, nil
|
||||
}
|
||||
blockSym := fmt.Sprintf("block-%d", p.blockID)
|
||||
p.insts = append(p.insts,
|
||||
// R0 <- 0
|
||||
asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
|
||||
asm.Return(),
|
||||
)
|
||||
p.blockID = -1
|
||||
return p.insts, nil
|
||||
}
|
||||
|
||||
func acceptBlock(accept bool) asm.Instructions {
|
||||
v := int32(0)
|
||||
if accept {
|
||||
v = 1
|
||||
}
|
||||
return []asm.Instruction{
|
||||
// R0 <- v
|
||||
asm.Mov.Imm32(asm.R0, v),
|
||||
asm.Return(),
|
||||
}
|
||||
}
|
45
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf.go
generated
vendored
Normal file
45
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf.go
generated
vendored
Normal file
|
@ -0,0 +1,45 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
|
||||
//
|
||||
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
|
||||
//
|
||||
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
|
||||
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
|
||||
nilCloser := func() error {
|
||||
return nil
|
||||
}
|
||||
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
|
||||
// This limit is not inherited into the container.
|
||||
memlockLimit := &unix.Rlimit{
|
||||
Cur: unix.RLIM_INFINITY,
|
||||
Max: unix.RLIM_INFINITY,
|
||||
}
|
||||
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
|
||||
spec := &ebpf.ProgramSpec{
|
||||
Type: ebpf.CGroupDevice,
|
||||
Instructions: insts,
|
||||
License: license,
|
||||
}
|
||||
prog, err := ebpf.NewProgram(spec)
|
||||
if err != nil {
|
||||
return nilCloser, err
|
||||
}
|
||||
if err := prog.Attach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
|
||||
return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
|
||||
}
|
||||
closer := func() error {
|
||||
if err := prog.Detach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
|
||||
return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return closer, nil
|
||||
}
|
|
@ -5,7 +5,6 @@ package fs
|
|||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
@ -18,7 +17,7 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
subsystems = subsystemSet{
|
||||
subsystemsLegacy = subsystemSet{
|
||||
&CpusetGroup{},
|
||||
&DevicesGroup{},
|
||||
&MemoryGroup{},
|
||||
|
@ -129,6 +128,10 @@ func isIgnorableError(rootless bool, err error) bool {
|
|||
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
||||
}
|
||||
|
||||
func (m *Manager) getSubsystems() subsystemSet {
|
||||
return subsystemsLegacy
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) (err error) {
|
||||
if m.Cgroups == nil {
|
||||
return nil
|
||||
|
@ -158,7 +161,7 @@ func (m *Manager) Apply(pid int) (err error) {
|
|||
return cgroups.EnterPid(m.Paths, pid)
|
||||
}
|
||||
|
||||
for _, sys := range subsystems {
|
||||
for _, sys := range m.getSubsystems() {
|
||||
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
|
||||
// create and join phase so that the cgroup hierarchy for a container can be
|
||||
// created then join consists of writing the process pids to cgroup.procs
|
||||
|
@ -209,12 +212,16 @@ func (m *Manager) GetPaths() map[string]string {
|
|||
return paths
|
||||
}
|
||||
|
||||
func (m *Manager) GetUnifiedPath() (string, error) {
|
||||
return "", errors.New("unified path is only supported when running in unified mode")
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for name, path := range m.Paths {
|
||||
sys, err := subsystems.Get(name)
|
||||
sys, err := m.getSubsystems().Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
continue
|
||||
}
|
||||
|
@ -226,14 +233,18 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
|||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
if container.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If Paths are set, then we are just joining cgroups paths
|
||||
// and there is no need to set any values.
|
||||
if m.Cgroups.Paths != nil {
|
||||
if m.Cgroups != nil && m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
paths := m.GetPaths()
|
||||
for _, sys := range subsystems {
|
||||
for _, sys := range m.getSubsystems() {
|
||||
path := paths[sys.Name()]
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
if m.Rootless && sys.Name() == "devices" {
|
||||
|
@ -262,11 +273,15 @@ func (m *Manager) Set(container *configs.Config) error {
|
|||
// Freeze toggles the container's freezer cgroup depending on the state
|
||||
// provided
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
if m.Cgroups == nil {
|
||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||
}
|
||||
|
||||
paths := m.GetPaths()
|
||||
dir := paths["freezer"]
|
||||
prevState := m.Cgroups.Resources.Freezer
|
||||
m.Cgroups.Resources.Freezer = state
|
||||
freezer, err := subsystems.Get("freezer")
|
||||
freezer, err := m.getSubsystems().Get("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -354,23 +369,6 @@ func (raw *cgroupData) join(subsystem string) (string, error) {
|
|||
return path, nil
|
||||
}
|
||||
|
||||
func writeFile(dir, file, data string) error {
|
||||
// Normally dir should not be empty, one case is that cgroup subsystem
|
||||
// is not mounted, we will get empty dir, and we want it fail here.
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", file)
|
||||
}
|
||||
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil {
|
||||
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func readFile(dir, file string) (string, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join(dir, file))
|
||||
return string(data), err
|
||||
}
|
||||
|
||||
func removePath(p string, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -407,3 +405,7 @@ func CheckCpushares(path string, c uint64) error {
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@ import (
|
|||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -31,41 +32,41 @@ func (s *BlkioGroup) Apply(d *cgroupData) error {
|
|||
|
||||
func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.BlkioWeight != 0 {
|
||||
if err := writeFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.BlkioLeafWeight != 0 {
|
||||
if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, wd := range cgroup.Resources.BlkioWeightDevice {
|
||||
if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
|
||||
if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice {
|
||||
if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice {
|
||||
if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice {
|
||||
if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -51,12 +52,12 @@ func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error
|
|||
|
||||
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuRtPeriod != 0 {
|
||||
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuRtRuntime != 0 {
|
||||
if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -65,17 +66,17 @@ func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
|
|||
|
||||
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuShares != 0 {
|
||||
if err := writeFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuPeriod != 0 {
|
||||
if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuQuota != 0 {
|
||||
if err := writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -98,7 +99,7 @@ func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := getCgroupParamKeyValue(sc.Text())
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
@ -51,7 +52,7 @@ func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
return err
|
||||
}
|
||||
|
||||
totalUsage, err := getCgroupParamUint(path, "cpuacct.usage")
|
||||
totalUsage, err := fscommon.GetCgroupParamUint(path, "cpuacct.usage")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -85,8 +86,8 @@ func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
|
|||
return 0, 0, err
|
||||
}
|
||||
fields := strings.Fields(string(data))
|
||||
if len(fields) != 4 {
|
||||
return 0, 0, fmt.Errorf("failure - %s is expected to have 4 fields", filepath.Join(path, cgroupCpuacctStat))
|
||||
if len(fields) < 4 {
|
||||
return 0, 0, fmt.Errorf("failure - %s is expected to have at least 4 fields", filepath.Join(path, cgroupCpuacctStat))
|
||||
}
|
||||
if fields[0] != userField {
|
||||
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField)
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"path/filepath"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
@ -31,12 +32,12 @@ func (s *CpusetGroup) Apply(d *cgroupData) error {
|
|||
|
||||
func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpusetCpus != "" {
|
||||
if err := writeFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
||||
if err := fscommon.WriteFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpusetMems != "" {
|
||||
if err := writeFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
|
||||
if err := fscommon.WriteFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -135,12 +136,12 @@ func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
|
|||
}
|
||||
|
||||
if s.isEmpty(currentCpus) {
|
||||
if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
|
||||
if err := fscommon.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if s.isEmpty(currentMems) {
|
||||
if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil {
|
||||
if err := fscommon.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ package fs
|
|||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
@ -37,7 +38,7 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
if dev.Allow {
|
||||
file = "devices.allow"
|
||||
}
|
||||
if err := writeFile(path, file, dev.CgroupString()); err != nil {
|
||||
if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -45,25 +46,25 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
}
|
||||
if cgroup.Resources.AllowAllDevices != nil {
|
||||
if *cgroup.Resources.AllowAllDevices == false {
|
||||
if err := writeFile(path, "devices.deny", "a"); err != nil {
|
||||
if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.AllowedDevices {
|
||||
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "devices.allow", dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := writeFile(path, "devices.allow", "a"); err != nil {
|
||||
if err := fscommon.WriteFile(path, "devices.allow", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.DeniedDevices {
|
||||
if err := writeFile(path, "devices.deny", dev.CgroupString()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "devices.deny", dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -34,11 +35,11 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
// state, let's write again this state, hoping it's going to be properly
|
||||
// set this time. Otherwise, this loop could run infinitely, waiting for
|
||||
// a state change that would never happen.
|
||||
if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
state, err := readFile(path, "freezer.state")
|
||||
state, err := fscommon.ReadFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -28,7 +29,7 @@ func (s *HugetlbGroup) Apply(d *cgroupData) error {
|
|||
|
||||
func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
for _, hugetlb := range cgroup.Resources.HugetlbLimit {
|
||||
if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -44,21 +45,21 @@ func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
for _, pageSize := range HugePageSizes {
|
||||
usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".")
|
||||
value, err := getCgroupParamUint(path, usage)
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", usage, err)
|
||||
}
|
||||
hugetlbStats.Usage = value
|
||||
|
||||
maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".")
|
||||
value, err = getCgroupParamUint(path, maxUsage)
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
||||
}
|
||||
hugetlbStats.MaxUsage = value
|
||||
|
||||
failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".")
|
||||
value, err = getCgroupParamUint(path, failcnt)
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@ import (
|
|||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -84,28 +85,28 @@ func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
|
|||
// for memory and swap memory, so it won't fail because the new
|
||||
// value and the old value don't fit kernel's validation.
|
||||
if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
|
||||
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if cgroup.Resources.Memory != 0 {
|
||||
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwap != 0 {
|
||||
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -126,25 +127,25 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
}
|
||||
|
||||
if cgroup.Resources.MemoryReservation != 0 {
|
||||
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.KernelMemoryTCP != 0 {
|
||||
if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.OomKillDisable {
|
||||
if err := writeFile(path, "memory.oom_control", "1"); err != nil {
|
||||
if err := fscommon.WriteFile(path, "memory.oom_control", "1"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
|
||||
return nil
|
||||
} else if *cgroup.Resources.MemorySwappiness <= 100 {
|
||||
if err := writeFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
|
@ -171,7 +172,7 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := getCgroupParamKeyValue(sc.Text())
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
|
||||
}
|
||||
|
@ -201,7 +202,7 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
|
||||
|
||||
useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
|
||||
value, err := getCgroupParamUint(path, useHierarchy)
|
||||
value, err := fscommon.GetCgroupParamUint(path, useHierarchy)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -233,7 +234,7 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
|||
failcnt := strings.Join([]string{moduleName, "failcnt"}, ".")
|
||||
limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".")
|
||||
|
||||
value, err := getCgroupParamUint(path, usage)
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
|
@ -241,7 +242,7 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
|||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
|
||||
}
|
||||
memoryData.Usage = value
|
||||
value, err = getCgroupParamUint(path, maxUsage)
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
|
@ -249,7 +250,7 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
|||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
||||
}
|
||||
memoryData.MaxUsage = value
|
||||
value, err = getCgroupParamUint(path, failcnt)
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
|
@ -257,7 +258,7 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
|||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
||||
}
|
||||
memoryData.Failcnt = value
|
||||
value, err = getCgroupParamUint(path, limit)
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -26,7 +27,7 @@ func (s *NetClsGroup) Apply(d *cgroupData) error {
|
|||
|
||||
func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.NetClsClassid != 0 {
|
||||
if err := writeFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
|
||||
if err := fscommon.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ package fs
|
|||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -24,7 +25,7 @@ func (s *NetPrioGroup) Apply(d *cgroupData) error {
|
|||
|
||||
func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
for _, prioMap := range cgroup.Resources.NetPrioIfpriomap {
|
||||
if err := writeFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
||||
if err := fscommon.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
|
@ -35,7 +36,7 @@ func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := writeFile(path, "pids.max", limit); err != nil {
|
||||
if err := fscommon.WriteFile(path, "pids.max", limit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -48,12 +49,12 @@ func (s *PidsGroup) Remove(d *cgroupData) error {
|
|||
}
|
||||
|
||||
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
current, err := getCgroupParamUint(path, "pids.current")
|
||||
current, err := fscommon.GetCgroupParamUint(path, "pids.current")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.current - %s", err)
|
||||
}
|
||||
|
||||
maxString, err := getCgroupParamString(path, "pids.max")
|
||||
maxString, err := fscommon.GetCgroupParamString(path, "pids.max")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.max - %s", err)
|
||||
}
|
||||
|
@ -61,7 +62,7 @@ func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
// Default if pids.max == "max" is 0 -- which represents "no limit".
|
||||
var max uint64
|
||||
if maxString != "max" {
|
||||
max, err = parseUint(maxString, 10, 64)
|
||||
max, err = fscommon.ParseUint(maxString, 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max"))
|
||||
}
|
||||
|
|
56
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
Normal file
56
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
Normal file
|
@ -0,0 +1,56 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func setCpu(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuWeight != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(cgroup.Resources.CpuWeight, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.CpuMax != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpu.max", cgroup.Resources.CpuMax); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
func statCpu(dirPath string, stats *cgroups.Stats) error {
|
||||
f, err := os.Open(filepath.Join(dirPath, "cpu.stat"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
switch t {
|
||||
case "usage_usec":
|
||||
stats.CpuStats.CpuUsage.TotalUsage = v * 1000
|
||||
|
||||
case "user_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInUsermode = v * 1000
|
||||
|
||||
case "system_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = v * 1000
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
22
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
Normal file
22
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
Normal file
|
@ -0,0 +1,22 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func setCpuset(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpusetCpus != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpusetMems != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
99
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
Normal file
99
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
Normal file
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
const UnifiedMountpoint = "/sys/fs/cgroup"
|
||||
|
||||
func defaultDirPath(c *configs.Cgroup) (string, error) {
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return "", errors.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
|
||||
}
|
||||
if len(c.Paths) != 0 {
|
||||
// never set by specconv
|
||||
return "", errors.Errorf("cgroup: Paths is unsupported, use Path, got %+v", c)
|
||||
}
|
||||
|
||||
// XXX: Do not remove this code. Path safety is important! -- cyphar
|
||||
cgPath := libcontainerUtils.CleanPath(c.Path)
|
||||
cgParent := libcontainerUtils.CleanPath(c.Parent)
|
||||
cgName := libcontainerUtils.CleanPath(c.Name)
|
||||
|
||||
ownCgroup, err := parseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return _defaultDirPath(UnifiedMountpoint, cgPath, cgParent, cgName, ownCgroup)
|
||||
}
|
||||
|
||||
func _defaultDirPath(root, cgPath, cgParent, cgName, ownCgroup string) (string, error) {
|
||||
if (cgName != "" || cgParent != "") && cgPath != "" {
|
||||
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
innerPath := cgPath
|
||||
if innerPath == "" {
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
if filepath.IsAbs(innerPath) {
|
||||
return filepath.Join(root, innerPath), nil
|
||||
}
|
||||
return filepath.Join(root, ownCgroup, innerPath), nil
|
||||
}
|
||||
|
||||
// parseCgroupFile parses /proc/PID/cgroup file and return string
|
||||
func parseCgroupFile(path string) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
return parseCgroupFromReader(f)
|
||||
}
|
||||
|
||||
func parseCgroupFromReader(r io.Reader) (string, error) {
|
||||
var (
|
||||
s = bufio.NewScanner(r)
|
||||
)
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
var (
|
||||
text = s.Text()
|
||||
parts = strings.SplitN(text, ":", 3)
|
||||
)
|
||||
if len(parts) < 3 {
|
||||
return "", errors.Errorf("invalid cgroup entry: %q", text)
|
||||
}
|
||||
// text is like "0::/user.slice/user-1001.slice/session-1.scope"
|
||||
if parts[0] == "0" && parts[1] == "" {
|
||||
return parts[2], nil
|
||||
}
|
||||
}
|
||||
return "", errors.New("cgroup path not found")
|
||||
}
|
73
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
generated
vendored
Normal file
73
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
generated
vendored
Normal file
|
@ -0,0 +1,73 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func isRWM(cgroupPermissions string) bool {
|
||||
r := false
|
||||
w := false
|
||||
m := false
|
||||
for _, rn := range cgroupPermissions {
|
||||
switch rn {
|
||||
case 'r':
|
||||
r = true
|
||||
case 'w':
|
||||
w = true
|
||||
case 'm':
|
||||
m = true
|
||||
}
|
||||
}
|
||||
return r && w && m
|
||||
}
|
||||
|
||||
// the logic is from crun
|
||||
// https://github.com/containers/crun/blob/0.10.2/src/libcrun/cgroup.c#L1644-L1652
|
||||
func canSkipEBPFError(cgroup *configs.Cgroup) bool {
|
||||
for _, dev := range cgroup.Resources.Devices {
|
||||
if dev.Allow || !isRWM(dev.Permissions) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func setDevices(dirPath string, cgroup *configs.Cgroup) error {
|
||||
devices := cgroup.Devices
|
||||
if allowAllDevices := cgroup.Resources.AllowAllDevices; allowAllDevices != nil {
|
||||
// never set by OCI specconv, but *allowAllDevices=false is still used by the integration test
|
||||
if *allowAllDevices == true {
|
||||
return errors.New("libcontainer AllowAllDevices is not supported, use Devices")
|
||||
}
|
||||
for _, ad := range cgroup.Resources.AllowedDevices {
|
||||
d := *ad
|
||||
d.Allow = true
|
||||
devices = append(devices, &d)
|
||||
}
|
||||
}
|
||||
if len(cgroup.Resources.DeniedDevices) != 0 {
|
||||
// never set by OCI specconv
|
||||
return errors.New("libcontainer DeniedDevices is not supported, use Devices")
|
||||
}
|
||||
insts, license, err := devicefilter.DeviceFilter(devices)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0600)
|
||||
if err != nil {
|
||||
return errors.Errorf("cannot get dir FD for %s", dirPath)
|
||||
}
|
||||
defer unix.Close(dirFD)
|
||||
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
|
||||
if !canSkipEBPFError(cgroup) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
53
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
Normal file
53
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
Normal file
|
@ -0,0 +1,53 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func setFreezer(dirPath string, state configs.FreezerState) error {
|
||||
var desired int
|
||||
switch state {
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
case configs.Frozen:
|
||||
desired = 1
|
||||
case configs.Thawed:
|
||||
desired = 0
|
||||
default:
|
||||
return errors.Errorf("unknown freezer state %+v", state)
|
||||
}
|
||||
supportedErr := supportsFreezer(dirPath)
|
||||
if supportedErr != nil && desired != 0 {
|
||||
// can ignore error if desired == 1
|
||||
return errors.Wrap(supportedErr, "freezer not supported")
|
||||
}
|
||||
return freezeWithInt(dirPath, desired)
|
||||
}
|
||||
|
||||
func supportsFreezer(dirPath string) error {
|
||||
_, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
|
||||
return err
|
||||
}
|
||||
|
||||
// freeze writes desired int to "cgroup.freeze".
|
||||
func freezeWithInt(dirPath string, desired int) error {
|
||||
desiredS := strconv.Itoa(desired)
|
||||
if err := fscommon.WriteFile(dirPath, "cgroup.freeze", desiredS); err != nil {
|
||||
return err
|
||||
}
|
||||
got, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if gotS := strings.TrimSpace(string(got)); gotS != desiredS {
|
||||
return errors.Errorf("expected \"cgroup.freeze\" in %q to be %q, got %q", dirPath, desiredS, gotS)
|
||||
}
|
||||
return nil
|
||||
}
|
214
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
Normal file
214
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
Normal file
|
@ -0,0 +1,214 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// NewManager creates a manager for cgroup v2 unified hierarchy.
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope".
|
||||
// If dirPath is empty, it is automatically set using config.
|
||||
func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups.Manager, error) {
|
||||
if config == nil {
|
||||
config = &configs.Cgroup{}
|
||||
}
|
||||
if dirPath != "" {
|
||||
if filepath.Clean(dirPath) != dirPath || !filepath.IsAbs(dirPath) {
|
||||
return nil, errors.Errorf("invalid dir path %q", dirPath)
|
||||
}
|
||||
} else {
|
||||
var err error
|
||||
dirPath, err = defaultDirPath(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
controllers, err := detectControllers(dirPath)
|
||||
if err != nil && !rootless {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
m := &manager{
|
||||
config: config,
|
||||
dirPath: dirPath,
|
||||
controllers: controllers,
|
||||
rootless: rootless,
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func detectControllers(dirPath string) (map[string]struct{}, error) {
|
||||
if err := os.MkdirAll(dirPath, 0755); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
controllersPath, err := securejoin.SecureJoin(dirPath, "cgroup.controllers")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
controllersData, err := ioutil.ReadFile(controllersPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
controllersFields := strings.Fields(string(controllersData))
|
||||
controllers := make(map[string]struct{}, len(controllersFields))
|
||||
for _, c := range controllersFields {
|
||||
controllers[c] = struct{}{}
|
||||
}
|
||||
return controllers, nil
|
||||
}
|
||||
|
||||
type manager struct {
|
||||
config *configs.Cgroup
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
dirPath string
|
||||
// controllers is content of "cgroup.controllers" file.
|
||||
// excludes pseudo-controllers ("devices" and "freezer").
|
||||
controllers map[string]struct{}
|
||||
rootless bool
|
||||
}
|
||||
|
||||
func (m *manager) Apply(pid int) error {
|
||||
if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil && !m.rootless {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) GetPids() ([]int, error) {
|
||||
return cgroups.GetPids(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *manager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *manager) GetStats() (*cgroups.Stats, error) {
|
||||
var (
|
||||
st cgroups.Stats
|
||||
errs []error
|
||||
)
|
||||
// pids (since kernel 4.5)
|
||||
if _, ok := m.controllers["pids"]; ok {
|
||||
if err := statPids(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
} else {
|
||||
if err := statPidsWithoutController(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// memory (since kenrel 4.5)
|
||||
if _, ok := m.controllers["memory"]; ok {
|
||||
if err := statMemory(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if _, ok := m.controllers["io"]; ok {
|
||||
if err := statIo(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
if _, ok := m.controllers["cpu"]; ok {
|
||||
if err := statCpu(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
if len(errs) > 0 && !m.rootless {
|
||||
return &st, errors.Errorf("error while statting cgroup v2: %+v", errs)
|
||||
}
|
||||
return &st, nil
|
||||
}
|
||||
|
||||
func (m *manager) Freeze(state configs.FreezerState) error {
|
||||
if err := setFreezer(m.dirPath, state); err != nil {
|
||||
return err
|
||||
}
|
||||
m.config.Resources.Freezer = state
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) Destroy() error {
|
||||
return os.RemoveAll(m.dirPath)
|
||||
}
|
||||
|
||||
// GetPaths is for compatibility purpose and should be removed in future
|
||||
func (m *manager) GetPaths() map[string]string {
|
||||
paths := map[string]string{
|
||||
// pseudo-controller for compatibility
|
||||
"devices": m.dirPath,
|
||||
"freezer": m.dirPath,
|
||||
}
|
||||
for c := range m.controllers {
|
||||
paths[c] = m.dirPath
|
||||
}
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *manager) GetUnifiedPath() (string, error) {
|
||||
return m.dirPath, nil
|
||||
}
|
||||
|
||||
func (m *manager) Set(container *configs.Config) error {
|
||||
if container == nil || container.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
var errs []error
|
||||
// pids (since kernel 4.5)
|
||||
if _, ok := m.controllers["pids"]; ok {
|
||||
if err := setPids(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// memory (since kernel 4.5)
|
||||
if _, ok := m.controllers["memory"]; ok {
|
||||
if err := setMemory(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if _, ok := m.controllers["io"]; ok {
|
||||
if err := setIo(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
if _, ok := m.controllers["cpu"]; ok {
|
||||
if err := setCpu(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// devices (since kernel 4.15, pseudo-controller)
|
||||
if err := setDevices(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// cpuset (since kernel 5.0)
|
||||
if _, ok := m.controllers["cpuset"]; ok {
|
||||
if err := setCpuset(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// freezer (since kernel 5.2, pseudo-controller)
|
||||
if err := setFreezer(m.dirPath, container.Cgroups.Freezer); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if len(errs) > 0 && !m.rootless {
|
||||
return errors.Errorf("error while setting cgroup v2: %+v", errs)
|
||||
}
|
||||
m.config = container.Cgroups
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.config, nil
|
||||
}
|
124
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
Normal file
124
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
Normal file
|
@ -0,0 +1,124 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func setIo(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.BlkioWeight != 0 {
|
||||
filename := "io.bfq.weight"
|
||||
if err := fscommon.WriteFile(dirPath, filename, strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) {
|
||||
ret := map[string][]string{}
|
||||
p := filepath.Join(dirPath, name)
|
||||
f, err := os.Open(p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
ret[parts[0]] = parts[1:]
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func statIo(dirPath string, stats *cgroups.Stats) error {
|
||||
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||
var ioServiceBytesRecursive []cgroups.BlkioStatEntry
|
||||
values, err := readCgroup2MapFile(dirPath, "io.stat")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for k, v := range values {
|
||||
d := strings.Split(k, ":")
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
minor, err := strconv.ParseUint(d[0], 10, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
major, err := strconv.ParseUint(d[1], 10, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, item := range v {
|
||||
d := strings.Split(item, "=")
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
op := d[0]
|
||||
|
||||
// Accommodate the cgroup v1 naming
|
||||
switch op {
|
||||
case "rbytes":
|
||||
op = "read"
|
||||
case "wbytes":
|
||||
op = "write"
|
||||
}
|
||||
|
||||
value, err := strconv.ParseUint(d[1], 10, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entry := cgroups.BlkioStatEntry{
|
||||
Op: op,
|
||||
Major: major,
|
||||
Minor: minor,
|
||||
Value: value,
|
||||
}
|
||||
ioServiceBytesRecursive = append(ioServiceBytesRecursive, entry)
|
||||
}
|
||||
}
|
||||
stats.BlkioStats = cgroups.BlkioStats{IoServiceBytesRecursive: ioServiceBytesRecursive}
|
||||
return nil
|
||||
}
|
103
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
Normal file
103
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
Normal file
|
@ -0,0 +1,103 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func setMemory(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.MemorySwap != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.swap.max", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.Memory != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.max", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// cgroup.Resources.KernelMemory is ignored
|
||||
|
||||
if cgroup.Resources.MemoryReservation != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.low", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statMemory(dirPath string, stats *cgroups.Stats) error {
|
||||
// Set stats from memory.stat.
|
||||
statsFile, err := os.Open(filepath.Join(dirPath, "memory.stat"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer statsFile.Close()
|
||||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to parse memory.stat (%q)", sc.Text())
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
|
||||
|
||||
memoryUsage, err := getMemoryDataV2(dirPath, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.Usage = memoryUsage
|
||||
swapUsage, err := getMemoryDataV2(dirPath, "swap")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.SwapUsage = swapUsage
|
||||
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
|
||||
memoryData := cgroups.MemoryData{}
|
||||
|
||||
moduleName := "memory"
|
||||
if name != "" {
|
||||
moduleName = strings.Join([]string{"memory", name}, ".")
|
||||
}
|
||||
usage := strings.Join([]string{moduleName, "current"}, ".")
|
||||
limit := strings.Join([]string{moduleName, "max"}, ".")
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", usage)
|
||||
}
|
||||
memoryData.Usage = value
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", limit)
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
90
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
Normal file
90
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
Normal file
|
@ -0,0 +1,90 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func setPids(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.PidsLimit != 0 {
|
||||
// "max" is the fallback value.
|
||||
limit := "max"
|
||||
|
||||
if cgroup.Resources.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := fscommon.WriteFile(dirPath, "pids.max", limit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func isNOTSUP(err error) bool {
|
||||
switch err := err.(type) {
|
||||
case *os.PathError:
|
||||
return err.Err == unix.ENOTSUP
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func statPidsWithoutController(dirPath string, stats *cgroups.Stats) error {
|
||||
// if the controller is not enabled, let's read PIDS from cgroups.procs
|
||||
// (or threads if cgroup.threads is enabled)
|
||||
contents, err := ioutil.ReadFile(filepath.Join(dirPath, "cgroup.procs"))
|
||||
if err != nil && isNOTSUP(err) {
|
||||
contents, err = ioutil.ReadFile(filepath.Join(dirPath, "cgroup.threads"))
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pids := make(map[string]string)
|
||||
for _, i := range strings.Split(string(contents), "\n") {
|
||||
if i != "" {
|
||||
pids[i] = i
|
||||
}
|
||||
}
|
||||
stats.PidsStats.Current = uint64(len(pids))
|
||||
stats.PidsStats.Limit = 0
|
||||
return nil
|
||||
}
|
||||
|
||||
func statPids(dirPath string, stats *cgroups.Stats) error {
|
||||
current, err := fscommon.GetCgroupParamUint(dirPath, "pids.current")
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to parse pids.current")
|
||||
}
|
||||
|
||||
maxString, err := fscommon.GetCgroupParamString(dirPath, "pids.max")
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to parse pids.max")
|
||||
}
|
||||
|
||||
// Default if pids.max == "max" is 0 -- which represents "no limit".
|
||||
var max uint64
|
||||
if maxString != "max" {
|
||||
max, err = fscommon.ParseUint(maxString, 10, 64)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q",
|
||||
maxString, filepath.Join(dirPath, "pids.max"))
|
||||
}
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
stats.PidsStats.Limit = max
|
||||
return nil
|
||||
}
|
36
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/fscommon.go
generated
vendored
Normal file
36
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/fscommon.go
generated
vendored
Normal file
|
@ -0,0 +1,36 @@
|
|||
// +build linux
|
||||
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func WriteFile(dir, file, data string) error {
|
||||
if dir == "" {
|
||||
return errors.Errorf("no directory specified for %s", file)
|
||||
}
|
||||
path, err := securejoin.SecureJoin(dir, file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := ioutil.WriteFile(path, []byte(data), 0700); err != nil {
|
||||
return errors.Wrapf(err, "failed to write %q to %q", data, path)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func ReadFile(dir, file string) (string, error) {
|
||||
if dir == "" {
|
||||
return "", errors.Errorf("no directory specified for %s", file)
|
||||
}
|
||||
path, err := securejoin.SecureJoin(dir, file)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
data, err := ioutil.ReadFile(path)
|
||||
return string(data), err
|
||||
}
|
|
@ -1,11 +1,12 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
@ -17,7 +18,7 @@ var (
|
|||
|
||||
// Saturates negative values at zero and returns a uint64.
|
||||
// Due to kernel bugs, some of the memory cgroup stats can be negative.
|
||||
func parseUint(s string, base, bitSize int) (uint64, error) {
|
||||
func ParseUint(s string, base, bitSize int) (uint64, error) {
|
||||
value, err := strconv.ParseUint(s, base, bitSize)
|
||||
if err != nil {
|
||||
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
||||
|
@ -37,11 +38,11 @@ func parseUint(s string, base, bitSize int) (uint64, error) {
|
|||
|
||||
// Parses a cgroup param and returns as name, value
|
||||
// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234
|
||||
func getCgroupParamKeyValue(t string) (string, uint64, error) {
|
||||
func GetCgroupParamKeyValue(t string) (string, uint64, error) {
|
||||
parts := strings.Fields(t)
|
||||
switch len(parts) {
|
||||
case 2:
|
||||
value, err := parseUint(parts[1], 10, 64)
|
||||
value, err := ParseUint(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
return "", 0, fmt.Errorf("unable to convert param value (%q) to uint64: %v", parts[1], err)
|
||||
}
|
||||
|
@ -53,14 +54,18 @@ func getCgroupParamKeyValue(t string) (string, uint64, error) {
|
|||
}
|
||||
|
||||
// Gets a single uint64 value from the specified cgroup file.
|
||||
func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) {
|
||||
func GetCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) {
|
||||
fileName := filepath.Join(cgroupPath, cgroupFile)
|
||||
contents, err := ioutil.ReadFile(fileName)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
trimmed := strings.TrimSpace(string(contents))
|
||||
if trimmed == "max" {
|
||||
return math.MaxUint64, nil
|
||||
}
|
||||
|
||||
res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64)
|
||||
res, err := ParseUint(trimmed, 10, 64)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), fileName)
|
||||
}
|
||||
|
@ -68,7 +73,7 @@ func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) {
|
|||
}
|
||||
|
||||
// Gets a string value from the specified cgroup file
|
||||
func getCgroupParamString(cgroupPath, cgroupFile string) (string, error) {
|
||||
func GetCgroupParamString(cgroupPath, cgroupFile string) (string, error) {
|
||||
contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile))
|
||||
if err != nil {
|
||||
return "", err
|
14
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go
generated
vendored
14
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go
generated
vendored
|
@ -1,4 +1,4 @@
|
|||
// +build !linux static_build
|
||||
// +build !linux
|
||||
|
||||
package systemd
|
||||
|
||||
|
@ -18,6 +18,10 @@ func UseSystemd() bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
@ -38,6 +42,10 @@ func (m *Manager) GetPaths() map[string]string {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetUnifiedPath() (string, error) {
|
||||
return "", fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
@ -53,3 +61,7 @@ func (m *Manager) Freeze(state configs.FreezerState) error {
|
|||
func Freeze(c *configs.Cgroup, state configs.FreezerState) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
|
158
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
generated
vendored
158
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
generated
vendored
|
@ -1,4 +1,4 @@
|
|||
// +build linux,!static_build
|
||||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
|
@ -14,7 +14,6 @@ import (
|
|||
"time"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/dbus"
|
||||
systemdUtil "github.com/coreos/go-systemd/util"
|
||||
"github.com/godbus/dbus"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
|
@ -22,7 +21,7 @@ import (
|
|||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type Manager struct {
|
||||
type LegacyManager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
|
@ -50,7 +49,7 @@ func (s subsystemSet) Get(name string) (subsystem, error) {
|
|||
return nil, errSubsystemDoesNotExist
|
||||
}
|
||||
|
||||
var subsystems = subsystemSet{
|
||||
var legacySubsystems = subsystemSet{
|
||||
&fs.CpusetGroup{},
|
||||
&fs.DevicesGroup{},
|
||||
&fs.MemoryGroup{},
|
||||
|
@ -72,11 +71,8 @@ const (
|
|||
)
|
||||
|
||||
var (
|
||||
connLock sync.Mutex
|
||||
theConn *systemdDbus.Conn
|
||||
hasStartTransientUnit bool
|
||||
hasStartTransientSliceUnit bool
|
||||
hasDelegateSlice bool
|
||||
connLock sync.Mutex
|
||||
theConn *systemdDbus.Conn
|
||||
)
|
||||
|
||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||
|
@ -86,8 +82,23 @@ func newProp(name string, units interface{}) systemdDbus.Property {
|
|||
}
|
||||
}
|
||||
|
||||
// NOTE: This function comes from package github.com/coreos/go-systemd/util
|
||||
// It was borrowed here to avoid a dependency on cgo.
|
||||
//
|
||||
// IsRunningSystemd checks whether the host was booted with systemd as its init
|
||||
// system. This functions similarly to systemd's `sd_booted(3)`: internally, it
|
||||
// checks whether /run/systemd/system/ exists and is a directory.
|
||||
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
|
||||
func isRunningSystemd() bool {
|
||||
fi, err := os.Lstat("/run/systemd/system")
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return fi.IsDir()
|
||||
}
|
||||
|
||||
func UseSystemd() bool {
|
||||
if !systemdUtil.IsRunningSystemd() {
|
||||
if !isRunningSystemd() {
|
||||
return false
|
||||
}
|
||||
|
||||
|
@ -100,70 +111,31 @@ func UseSystemd() bool {
|
|||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Assume we have StartTransientUnit
|
||||
hasStartTransientUnit = true
|
||||
|
||||
// But if we get UnknownMethod error we don't
|
||||
if _, err := theConn.StartTransientUnit("test.scope", "invalid", nil, nil); err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" {
|
||||
hasStartTransientUnit = false
|
||||
return hasStartTransientUnit
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assume we have the ability to start a transient unit as a slice
|
||||
// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
|
||||
// For details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1370299
|
||||
hasStartTransientSliceUnit = true
|
||||
|
||||
// To ensure simple clean-up, we create a slice off the root with no hierarchy
|
||||
slice := fmt.Sprintf("libcontainer_%d_systemd_test_default.slice", os.Getpid())
|
||||
if _, err := theConn.StartTransientUnit(slice, "replace", nil, nil); err != nil {
|
||||
if _, ok := err.(dbus.Error); ok {
|
||||
hasStartTransientSliceUnit = false
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i <= testSliceWait; i++ {
|
||||
if _, err := theConn.StopUnit(slice, "replace", nil); err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") {
|
||||
hasStartTransientSliceUnit = false
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
|
||||
// Not critical because of the stop unit logic above.
|
||||
theConn.StopUnit(slice, "replace", nil)
|
||||
|
||||
// Assume StartTransientUnit on a slice allows Delegate
|
||||
hasDelegateSlice = true
|
||||
dlSlice := newProp("Delegate", true)
|
||||
if _, err := theConn.StartTransientUnit(slice, "replace", []systemdDbus.Property{dlSlice}, nil); err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
// Starting with systemd v237, Delegate is not even a property of slices anymore,
|
||||
// so the D-Bus call fails with "InvalidArgs" error.
|
||||
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") || strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.InvalidArgs") {
|
||||
hasDelegateSlice = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Not critical because of the stop unit logic above.
|
||||
theConn.StopUnit(slice, "replace", nil)
|
||||
}
|
||||
return hasStartTransientUnit
|
||||
return true
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) error {
|
||||
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
|
||||
if !isRunningSystemd() {
|
||||
return nil, fmt.Errorf("systemd not running on this host, can't use systemd as a cgroups.Manager")
|
||||
}
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &UnifiedManager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &LegacyManager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.Cgroups
|
||||
unitName = getUnitName(c)
|
||||
|
@ -196,10 +168,6 @@ func (m *Manager) Apply(pid int) error {
|
|||
|
||||
// if we create a slice, the parent is defined via a Wants=
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
|
||||
if !hasStartTransientSliceUnit {
|
||||
return fmt.Errorf("systemd version does not support ability to start a slice as transient unit")
|
||||
}
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// otherwise, we use Slice=
|
||||
|
@ -212,12 +180,7 @@ func (m *Manager) Apply(pid int) error {
|
|||
}
|
||||
|
||||
// Check if we can delegate. This is only supported on systemd versions 218 and above.
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
if hasDelegateSlice {
|
||||
// systemd 237 and above no longer allows delegation on a slice
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
} else {
|
||||
if !strings.HasSuffix(unitName, ".slice") {
|
||||
// Assume scopes always support delegation.
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
@ -298,7 +261,7 @@ func (m *Manager) Apply(pid int) error {
|
|||
}
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, s := range subsystems {
|
||||
for _, s := range legacySubsystems {
|
||||
subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name())
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
|
@ -313,7 +276,7 @@ func (m *Manager) Apply(pid int) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
func (m *LegacyManager) Destroy() error {
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
|
@ -327,18 +290,23 @@ func (m *Manager) Destroy() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
func (m *LegacyManager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
paths := m.Paths
|
||||
m.mu.Unlock()
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetUnifiedPath() (string, error) {
|
||||
return "", errors.New("unified path is only supported when running in unified mode")
|
||||
}
|
||||
|
||||
func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
|
||||
path, err := getSubsystemPath(c, subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
@ -349,7 +317,7 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
|
|||
}
|
||||
|
||||
func joinCgroups(c *configs.Cgroup, pid int) error {
|
||||
for _, sys := range subsystems {
|
||||
for _, sys := range legacySubsystems {
|
||||
name := sys.Name()
|
||||
switch name {
|
||||
case "name=systemd":
|
||||
|
@ -444,14 +412,14 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
|||
return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil
|
||||
}
|
||||
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
func (m *LegacyManager) Freeze(state configs.FreezerState) error {
|
||||
path, err := getSubsystemPath(m.Cgroups, "freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
prevState := m.Cgroups.Resources.Freezer
|
||||
m.Cgroups.Resources.Freezer = state
|
||||
freezer, err := subsystems.Get("freezer")
|
||||
freezer, err := legacySubsystems.Get("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -463,7 +431,7 @@ func (m *Manager) Freeze(state configs.FreezerState) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
func (m *LegacyManager) GetPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.Cgroups, "devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -471,7 +439,7 @@ func (m *Manager) GetPids() ([]int, error) {
|
|||
return cgroups.GetPids(path)
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
func (m *LegacyManager) GetAllPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.Cgroups, "devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -479,12 +447,12 @@ func (m *Manager) GetAllPids() ([]int, error) {
|
|||
return cgroups.GetAllPids(path)
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
func (m *LegacyManager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for name, path := range m.Paths {
|
||||
sys, err := subsystems.Get(name)
|
||||
sys, err := legacySubsystems.Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
continue
|
||||
}
|
||||
|
@ -496,13 +464,13 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
|||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
func (m *LegacyManager) Set(container *configs.Config) error {
|
||||
// If Paths are set, then we are just joining cgroups paths
|
||||
// and there is no need to set any values.
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
for _, sys := range subsystems {
|
||||
for _, sys := range legacySubsystems {
|
||||
// Get the subsystem path, but don't error out for not found cgroups.
|
||||
path, err := getSubsystemPath(container.Cgroups, sys.Name())
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
|
@ -560,3 +528,7 @@ func isUnitExists(err error) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
}
|
||||
|
|
312
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unified_hierarchy.go
generated
vendored
Normal file
312
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unified_hierarchy.go
generated
vendored
Normal file
|
@ -0,0 +1,312 @@
|
|||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/dbus"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type UnifiedManager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.Cgroups
|
||||
unitName = getUnitName(c)
|
||||
slice = "system.slice"
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
if c.Paths != nil {
|
||||
paths := make(map[string]string)
|
||||
for name, path := range c.Paths {
|
||||
_, err := getSubsystemPath(m.Cgroups, name)
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
m.Paths = paths
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
}
|
||||
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
// if we create a slice, the parent is defined via a Wants=
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// otherwise, we use Slice=
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Check if we can delegate. This is only supported on systemd versions 218 and above.
|
||||
if !strings.HasSuffix(unitName, ".slice") {
|
||||
// Assume scopes always support delegation.
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("BlockIOAccounting", true))
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
if c.Resources.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLimit", uint64(c.Resources.Memory)))
|
||||
}
|
||||
|
||||
if c.Resources.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUShares", c.Resources.CpuShares))
|
||||
}
|
||||
|
||||
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
|
||||
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
|
||||
// always setting a property value ensures we can apply a quota and remove it later
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if c.Resources.CpuQuota > 0 {
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
properties = append(properties,
|
||||
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
|
||||
if c.Resources.BlkioWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
|
||||
}
|
||||
|
||||
if c.Resources.PidsLimit > 0 {
|
||||
properties = append(properties,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", uint64(c.Resources.PidsLimit)))
|
||||
}
|
||||
|
||||
// We have to set kernel memory here, as we can't change it once
|
||||
// processes have been attached to the cgroup.
|
||||
if c.Resources.KernelMemory != 0 {
|
||||
if err := setKernelMemory(c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
|
||||
select {
|
||||
case <-statusChan:
|
||||
case <-time.After(time.Second):
|
||||
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName)
|
||||
}
|
||||
} else if !isUnitExists(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinCgroupsV2(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
path, err := getSubsystemPath(m.Cgroups, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = map[string]string{
|
||||
"pids": path,
|
||||
"memory": path,
|
||||
"io": path,
|
||||
"cpu": path,
|
||||
"devices": path,
|
||||
"cpuset": path,
|
||||
"freezer": path,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Destroy() error {
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil)
|
||||
if err := cgroups.RemovePaths(m.Paths); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = make(map[string]string)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
paths := m.Paths
|
||||
m.mu.Unlock()
|
||||
return paths
|
||||
}
|
||||
func (m *UnifiedManager) GetUnifiedPath() (string, error) {
|
||||
unifiedPath := ""
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
for k, v := range m.Paths {
|
||||
if unifiedPath == "" {
|
||||
unifiedPath = v
|
||||
} else if v != unifiedPath {
|
||||
return unifiedPath,
|
||||
errors.Errorf("expected %q path to be unified path %q, got %q", k, unifiedPath, v)
|
||||
}
|
||||
}
|
||||
if unifiedPath == "" {
|
||||
// FIXME: unified path could be detected even when no controller is available
|
||||
return unifiedPath, errors.New("cannot detect unified path")
|
||||
}
|
||||
return unifiedPath, nil
|
||||
}
|
||||
func createCgroupsv2Path(path string) (Err error) {
|
||||
content, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !filepath.HasPrefix(path, "/sys/fs/cgroup") {
|
||||
return fmt.Errorf("invalid cgroup path %s", path)
|
||||
}
|
||||
|
||||
res := ""
|
||||
for i, c := range strings.Split(strings.TrimSpace(string(content)), " ") {
|
||||
if i == 0 {
|
||||
res = fmt.Sprintf("+%s", c)
|
||||
} else {
|
||||
res = res + fmt.Sprintf(" +%s", c)
|
||||
}
|
||||
}
|
||||
resByte := []byte(res)
|
||||
|
||||
current := "/sys/fs"
|
||||
elements := strings.Split(path, "/")
|
||||
for i, e := range elements[3:] {
|
||||
current = filepath.Join(current, e)
|
||||
if i > 0 {
|
||||
if err := os.Mkdir(current, 0755); err != nil {
|
||||
if !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// If the directory was created, be sure it is not left around on errors.
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
os.Remove(current)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
if i < len(elements[3:])-1 {
|
||||
if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), resByte, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinCgroupsV2(c *configs.Cgroup, pid int) error {
|
||||
path, err := getSubsystemPath(c, "memory")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return createCgroupsv2Path(path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) fsManager() (cgroups.Manager, error) {
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fs2.NewManager(m.Cgroups, path, false)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Freeze(state configs.FreezerState) error {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Freeze(state)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetPids() ([]int, error) {
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetAllPids() ([]int, error) {
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fsMgr.GetStats()
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Set(container *configs.Config) error {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Set(container)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
}
|
|
@ -11,6 +11,8 @@ import (
|
|||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
units "github.com/docker/go-units"
|
||||
|
@ -18,12 +20,40 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
CgroupNamePrefix = "name="
|
||||
CgroupProcesses = "cgroup.procs"
|
||||
CgroupNamePrefix = "name="
|
||||
CgroupProcesses = "cgroup.procs"
|
||||
unifiedMountpoint = "/sys/fs/cgroup"
|
||||
)
|
||||
|
||||
var (
|
||||
isUnifiedOnce sync.Once
|
||||
isUnified bool
|
||||
)
|
||||
|
||||
// HugePageSizeUnitList is a list of the units used by the linux kernel when
|
||||
// naming the HugePage control files.
|
||||
// https://www.kernel.org/doc/Documentation/cgroup-v1/hugetlb.txt
|
||||
// TODO Since the kernel only use KB, MB and GB; TB and PB should be removed,
|
||||
// depends on https://github.com/docker/go-units/commit/a09cd47f892041a4fac473133d181f5aea6fa393
|
||||
var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
|
||||
|
||||
// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
|
||||
func IsCgroup2UnifiedMode() bool {
|
||||
isUnifiedOnce.Do(func() {
|
||||
var st syscall.Statfs_t
|
||||
if err := syscall.Statfs(unifiedMountpoint, &st); err != nil {
|
||||
panic("cannot statfs cgroup root")
|
||||
}
|
||||
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
|
||||
})
|
||||
return isUnified
|
||||
}
|
||||
|
||||
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
|
||||
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return unifiedMountpoint, nil
|
||||
}
|
||||
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
|
||||
return mnt, err
|
||||
}
|
||||
|
@ -42,6 +72,10 @@ func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string,
|
|||
}
|
||||
defer f.Close()
|
||||
|
||||
if IsCgroup2UnifiedMode() {
|
||||
subsystem = ""
|
||||
}
|
||||
|
||||
return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
|
||||
}
|
||||
|
||||
|
@ -50,12 +84,12 @@ func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsyst
|
|||
for scanner.Scan() {
|
||||
txt := scanner.Text()
|
||||
fields := strings.Fields(txt)
|
||||
if len(fields) < 5 {
|
||||
if len(fields) < 9 {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(fields[4], cgroupPath) {
|
||||
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
||||
if opt == subsystem {
|
||||
if (subsystem == "" && fields[9] == "cgroup2") || opt == subsystem {
|
||||
return fields[4], fields[3], nil
|
||||
}
|
||||
}
|
||||
|
@ -69,6 +103,19 @@ func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsyst
|
|||
}
|
||||
|
||||
func isSubsystemAvailable(subsystem string) bool {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
controllers, err := GetAllSubsystems()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, c := range controllers {
|
||||
if c == subsystem {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return false
|
||||
|
@ -113,7 +160,7 @@ func FindCgroupMountpointDir() (string, error) {
|
|||
return "", fmt.Errorf("Found no fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
if postSeparatorFields[0] == "cgroup" {
|
||||
if postSeparatorFields[0] == "cgroup" || postSeparatorFields[0] == "cgroup2" {
|
||||
// Check that the mount is properly formatted.
|
||||
if numPostFields < 3 {
|
||||
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
|
||||
|
@ -186,6 +233,19 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount,
|
|||
// GetCgroupMounts returns the mounts for the cgroup subsystems.
|
||||
// all indicates whether to return just the first instance or all the mounts.
|
||||
func GetCgroupMounts(all bool) ([]Mount, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
availableControllers, err := GetAllSubsystems()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := Mount{
|
||||
Mountpoint: unifiedMountpoint,
|
||||
Root: unifiedMountpoint,
|
||||
Subsystems: availableControllers,
|
||||
}
|
||||
return []Mount{m}, nil
|
||||
}
|
||||
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -206,6 +266,21 @@ func GetCgroupMounts(all bool) ([]Mount, error) {
|
|||
|
||||
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
|
||||
func GetAllSubsystems() ([]string, error) {
|
||||
// /proc/cgroups is meaningless for v2
|
||||
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features
|
||||
if IsCgroup2UnifiedMode() {
|
||||
// "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers.
|
||||
// - devices: implemented in kernel 4.15
|
||||
// - freezer: implemented in kernel 5.2
|
||||
// We assume these are always available, as it is hard to detect availability.
|
||||
pseudo := []string{"devices", "freezer"}
|
||||
data, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
subsystems := append(pseudo, strings.Fields(string(data))...)
|
||||
return subsystems, nil
|
||||
}
|
||||
f, err := os.Open("/proc/cgroups")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -349,6 +424,9 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
|
|||
}
|
||||
|
||||
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "/", nil
|
||||
}
|
||||
|
||||
if p, ok := cgroups[subsystem]; ok {
|
||||
return p, nil
|
||||
|
@ -409,19 +487,26 @@ func RemovePaths(paths map[string]string) (err error) {
|
|||
}
|
||||
|
||||
func GetHugePageSize() ([]string, error) {
|
||||
var pageSizes []string
|
||||
sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"}
|
||||
files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
|
||||
if err != nil {
|
||||
return pageSizes, err
|
||||
return []string{}, err
|
||||
}
|
||||
var fileNames []string
|
||||
for _, st := range files {
|
||||
nameArray := strings.Split(st.Name(), "-")
|
||||
fileNames = append(fileNames, st.Name())
|
||||
}
|
||||
return getHugePageSizeFromFilenames(fileNames)
|
||||
}
|
||||
|
||||
func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
|
||||
var pageSizes []string
|
||||
for _, fileName := range fileNames {
|
||||
nameArray := strings.Split(fileName, "-")
|
||||
pageSize, err := units.RAMInBytes(nameArray[1])
|
||||
if err != nil {
|
||||
return []string{}, err
|
||||
}
|
||||
sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)
|
||||
sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, HugePageSizeUnitList)
|
||||
pageSizes = append(pageSizes, sizeString)
|
||||
}
|
||||
|
||||
|
|
|
@ -59,3 +59,8 @@ func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice {
|
|||
func (td *ThrottleDevice) String() string {
|
||||
return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)
|
||||
}
|
||||
|
||||
// StringName formats the struct to be writable to the cgroup specific file
|
||||
func (td *ThrottleDevice) StringName(name string) string {
|
||||
return fmt.Sprintf("%d:%d %s=%d", td.Major, td.Minor, name, td.Rate)
|
||||
}
|
||||
|
|
|
@ -119,4 +119,12 @@ type Resources struct {
|
|||
|
||||
// Set class identifier for container's network packets
|
||||
NetClsClassid uint32 `json:"net_cls_classid_u"`
|
||||
|
||||
// Used on cgroups v2:
|
||||
|
||||
// CpuWeight sets a proportional bandwidth limit.
|
||||
CpuWeight uint64 `json:"cpu_weight"`
|
||||
|
||||
// CpuMax sets she maximum bandwidth limit (format: max period).
|
||||
CpuMax string `json:"cpu_max"`
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
// +build !linux
|
||||
|
||||
package configs
|
||||
|
||||
// TODO Windows: This can ultimately be entirely factored out on Windows as
|
|
@ -44,6 +44,7 @@ const (
|
|||
Trap
|
||||
Allow
|
||||
Trace
|
||||
Log
|
||||
)
|
||||
|
||||
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
|
||||
|
|
|
@ -19,7 +19,7 @@ import (
|
|||
"syscall" // only for SysProcAttr and Signal
|
||||
"time"
|
||||
|
||||
"github.com/cyphar/filepath-securejoin"
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
|
@ -265,22 +265,24 @@ func (c *linuxContainer) Exec() error {
|
|||
|
||||
func (c *linuxContainer) exec() error {
|
||||
path := filepath.Join(c.root, execFifoFilename)
|
||||
pid := c.initProcess.pid()
|
||||
blockingFifoOpenCh := awaitFifoOpen(path)
|
||||
for {
|
||||
select {
|
||||
case result := <-blockingFifoOpenCh:
|
||||
return handleFifoResult(result)
|
||||
|
||||
fifoOpen := make(chan struct{})
|
||||
select {
|
||||
case <-awaitProcessExit(c.initProcess.pid(), fifoOpen):
|
||||
return errors.New("container process is already dead")
|
||||
case result := <-awaitFifoOpen(path):
|
||||
close(fifoOpen)
|
||||
if result.err != nil {
|
||||
return result.err
|
||||
case <-time.After(time.Millisecond * 100):
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil || stat.State == system.Zombie {
|
||||
// could be because process started, ran, and completed between our 100ms timeout and our system.Stat() check.
|
||||
// see if the fifo exists and has data (with a non-blocking open, which will succeed if the writing process is complete).
|
||||
if err := handleFifoResult(fifoOpen(path, false)); err != nil {
|
||||
return errors.New("container process is already dead")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
f := result.file
|
||||
defer f.Close()
|
||||
if err := readFromExecFifo(f); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.Remove(path)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -295,38 +297,39 @@ func readFromExecFifo(execFifo io.Reader) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func awaitProcessExit(pid int, exit <-chan struct{}) <-chan struct{} {
|
||||
isDead := make(chan struct{})
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-exit:
|
||||
return
|
||||
case <-time.After(time.Millisecond * 100):
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil || stat.State == system.Zombie {
|
||||
close(isDead)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
return isDead
|
||||
}
|
||||
|
||||
func awaitFifoOpen(path string) <-chan openResult {
|
||||
fifoOpened := make(chan openResult)
|
||||
go func() {
|
||||
f, err := os.OpenFile(path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
|
||||
return
|
||||
}
|
||||
fifoOpened <- openResult{file: f}
|
||||
result := fifoOpen(path, true)
|
||||
fifoOpened <- result
|
||||
}()
|
||||
return fifoOpened
|
||||
}
|
||||
|
||||
func fifoOpen(path string, block bool) openResult {
|
||||
flags := os.O_RDONLY
|
||||
if !block {
|
||||
flags |= syscall.O_NONBLOCK
|
||||
}
|
||||
f, err := os.OpenFile(path, flags, 0)
|
||||
if err != nil {
|
||||
return openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")}
|
||||
}
|
||||
return openResult{file: f}
|
||||
}
|
||||
|
||||
func handleFifoResult(result openResult) error {
|
||||
if result.err != nil {
|
||||
return result.err
|
||||
}
|
||||
f := result.file
|
||||
defer f.Close()
|
||||
if err := readFromExecFifo(f); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.Remove(f.Name())
|
||||
}
|
||||
|
||||
type openResult struct {
|
||||
file *os.File
|
||||
err error
|
||||
|
@ -337,6 +340,7 @@ func (c *linuxContainer) start(process *Process) error {
|
|||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "creating new parent process")
|
||||
}
|
||||
parent.forwardChildLogs()
|
||||
if err := parent.start(); err != nil {
|
||||
// terminate the process to ensure that it properly is reaped.
|
||||
if err := ignoreTerminateErrors(parent.terminate()); err != nil {
|
||||
|
@ -438,16 +442,24 @@ func (c *linuxContainer) includeExecFifo(cmd *exec.Cmd) error {
|
|||
}
|
||||
|
||||
func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
|
||||
parentPipe, childPipe, err := utils.NewSockPair("init")
|
||||
parentInitPipe, childInitPipe, err := utils.NewSockPair("init")
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "creating new init pipe")
|
||||
}
|
||||
cmd, err := c.commandTemplate(p, childPipe)
|
||||
messageSockPair := filePair{parentInitPipe, childInitPipe}
|
||||
|
||||
parentLogPipe, childLogPipe, err := os.Pipe()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Unable to create the log pipe: %s", err)
|
||||
}
|
||||
logFilePair := filePair{parentLogPipe, childLogPipe}
|
||||
|
||||
cmd, err := c.commandTemplate(p, childInitPipe, childLogPipe)
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "creating new command template")
|
||||
}
|
||||
if !p.Init {
|
||||
return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
|
||||
return c.newSetnsProcess(p, cmd, messageSockPair, logFilePair)
|
||||
}
|
||||
|
||||
// We only set up fifoFd if we're not doing a `runc exec`. The historic
|
||||
|
@ -458,10 +470,10 @@ func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
|
|||
if err := c.includeExecFifo(cmd); err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "including execfifo in cmd.Exec setup")
|
||||
}
|
||||
return c.newInitProcess(p, cmd, parentPipe, childPipe)
|
||||
return c.newInitProcess(p, cmd, messageSockPair, logFilePair)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
|
||||
func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) (*exec.Cmd, error) {
|
||||
cmd := exec.Command(c.initPath, c.initArgs[1:]...)
|
||||
cmd.Args[0] = c.initArgs[0]
|
||||
cmd.Stdin = p.Stdin
|
||||
|
@ -479,11 +491,18 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
|
|||
fmt.Sprintf("_LIBCONTAINER_CONSOLE=%d", stdioFdCount+len(cmd.ExtraFiles)-1),
|
||||
)
|
||||
}
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe)
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, childInitPipe)
|
||||
cmd.Env = append(cmd.Env,
|
||||
fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1),
|
||||
fmt.Sprintf("_LIBCONTAINER_STATEDIR=%s", c.root),
|
||||
)
|
||||
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, childLogPipe)
|
||||
cmd.Env = append(cmd.Env,
|
||||
fmt.Sprintf("_LIBCONTAINER_LOGPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1),
|
||||
fmt.Sprintf("_LIBCONTAINER_LOGLEVEL=%s", p.LogLevel),
|
||||
)
|
||||
|
||||
// NOTE: when running a container with no PID namespace and the parent process spawning the container is
|
||||
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
|
||||
// even with the parent still running.
|
||||
|
@ -493,7 +512,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
|
|||
return cmd, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
|
||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
|
||||
nsMaps := make(map[configs.NamespaceType]string)
|
||||
for _, ns := range c.config.Namespaces {
|
||||
|
@ -508,8 +527,8 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
|
|||
}
|
||||
init := &initProcess{
|
||||
cmd: cmd,
|
||||
childPipe: childPipe,
|
||||
parentPipe: parentPipe,
|
||||
messageSockPair: messageSockPair,
|
||||
logFilePair: logFilePair,
|
||||
manager: c.cgroupManager,
|
||||
intelRdtManager: c.intelRdtManager,
|
||||
config: c.newInitConfig(p),
|
||||
|
@ -522,7 +541,7 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
|
|||
return init, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
|
||||
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*setnsProcess, error) {
|
||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
|
||||
state, err := c.currentState()
|
||||
if err != nil {
|
||||
|
@ -539,8 +558,8 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
|
|||
cgroupPaths: c.cgroupManager.GetPaths(),
|
||||
rootlessCgroups: c.config.RootlessCgroups,
|
||||
intelRdtPath: state.IntelRdtPath,
|
||||
childPipe: childPipe,
|
||||
parentPipe: parentPipe,
|
||||
messageSockPair: messageSockPair,
|
||||
logFilePair: logFilePair,
|
||||
config: c.newInitConfig(p),
|
||||
process: p,
|
||||
bootstrapData: data,
|
||||
|
@ -924,7 +943,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
|
||||
// Since a container can be C/R'ed multiple times,
|
||||
// the checkpoint directory may already exist.
|
||||
if err := os.Mkdir(criuOpts.ImagesDirectory, 0755); err != nil && !os.IsExist(err) {
|
||||
if err := os.Mkdir(criuOpts.ImagesDirectory, 0700); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -932,7 +951,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
|
||||
}
|
||||
|
||||
if err := os.Mkdir(criuOpts.WorkDirectory, 0755); err != nil && !os.IsExist(err) {
|
||||
if err := os.Mkdir(criuOpts.WorkDirectory, 0700); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -1095,7 +1114,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
return err
|
||||
}
|
||||
|
||||
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655)
|
||||
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0600)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -1160,7 +1179,7 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkMountDestination(c.config.Rootfs, dest); err != nil {
|
||||
if err := checkProcMount(c.config.Rootfs, dest, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Destination = dest
|
||||
|
@ -1230,7 +1249,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
|||
}
|
||||
// Since a container can be C/R'ed multiple times,
|
||||
// the work directory may already exist.
|
||||
if err := os.Mkdir(criuOpts.WorkDirectory, 0655); err != nil && !os.IsExist(err) {
|
||||
if err := os.Mkdir(criuOpts.WorkDirectory, 0700); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
workDir, err := os.Open(criuOpts.WorkDirectory)
|
||||
|
@ -1798,15 +1817,22 @@ func (c *linuxContainer) isPaused() (bool, error) {
|
|||
// A container doesn't have a freezer cgroup
|
||||
return false, nil
|
||||
}
|
||||
data, err := ioutil.ReadFile(filepath.Join(fcg, "freezer.state"))
|
||||
pausedState := "FROZEN"
|
||||
filename := "freezer.state"
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
filename = "cgroup.freeze"
|
||||
pausedState = "1"
|
||||
}
|
||||
|
||||
data, err := ioutil.ReadFile(filepath.Join(fcg, filename))
|
||||
if err != nil {
|
||||
// If freezer cgroup is not mounted, the container would just be not paused.
|
||||
if os.IsNotExist(err) {
|
||||
if os.IsNotExist(err) || err == syscall.ENODEV {
|
||||
return false, nil
|
||||
}
|
||||
return false, newSystemErrorWithCause(err, "checking if container is paused")
|
||||
}
|
||||
return bytes.Equal(bytes.TrimSpace(data), []byte("FROZEN")), nil
|
||||
return bytes.Equal(bytes.TrimSpace(data), []byte(pausedState)), nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) currentState() (*State, error) {
|
||||
|
|
|
@ -7,11 +7,11 @@ import (
|
|||
"path/filepath"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrNotADevice denotes that a file is not a valid linux device.
|
||||
ErrNotADevice = errors.New("not a device node")
|
||||
)
|
||||
|
||||
|
@ -21,7 +21,8 @@ var (
|
|||
ioutilReadDir = ioutil.ReadDir
|
||||
)
|
||||
|
||||
// Given the path to a device and its cgroup_permissions(which cannot be easily queried) look up the information about a linux device and return that information as a Device struct.
|
||||
// Given the path to a device and its cgroup_permissions(which cannot be easily queried) look up the
|
||||
// information about a linux device and return that information as a Device struct.
|
||||
func DeviceFromPath(path, permissions string) (*configs.Device, error) {
|
||||
var stat unix.Stat_t
|
||||
err := unixLstat(path, &stat)
|
||||
|
@ -60,25 +61,29 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
// HostDevices returns all devices that can be found under /dev directory.
|
||||
func HostDevices() ([]*configs.Device, error) {
|
||||
return getDevices("/dev")
|
||||
return GetDevices("/dev")
|
||||
}
|
||||
|
||||
func getDevices(path string) ([]*configs.Device, error) {
|
||||
// GetDevices recursively traverses a directory specified by path
|
||||
// and returns all devices found there.
|
||||
func GetDevices(path string) ([]*configs.Device, error) {
|
||||
files, err := ioutilReadDir(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out := []*configs.Device{}
|
||||
var out []*configs.Device
|
||||
for _, f := range files {
|
||||
switch {
|
||||
case f.IsDir():
|
||||
switch f.Name() {
|
||||
// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
|
||||
case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts":
|
||||
// ".udev" added to address https://github.com/opencontainers/runc/issues/2093
|
||||
case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts", ".udev":
|
||||
continue
|
||||
default:
|
||||
sub, err := getDevices(filepath.Join(path, f.Name()))
|
||||
sub, err := GetDevices(filepath.Join(path, f.Name()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -14,12 +14,14 @@ import (
|
|||
"github.com/cyphar/filepath-securejoin"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs/validate"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
"github.com/opencontainers/runc/libcontainer/mount"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
@ -51,12 +53,35 @@ func InitArgs(args ...string) func(*LinuxFactory) error {
|
|||
// SystemdCgroups is an options func to configure a LinuxFactory to return
|
||||
// containers that use systemd to create and manage cgroups.
|
||||
func SystemdCgroups(l *LinuxFactory) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &systemd.Manager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
systemdCgroupsManager, err := systemd.NewSystemdCgroupsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
l.NewCgroupsManager = systemdCgroupsManager
|
||||
return nil
|
||||
}
|
||||
|
||||
func getUnifiedPath(paths map[string]string) string {
|
||||
unifiedPath := ""
|
||||
for k, v := range paths {
|
||||
if unifiedPath == "" {
|
||||
unifiedPath = v
|
||||
} else if v != unifiedPath {
|
||||
panic(errors.Errorf("expected %q path to be unified path %q, got %q", k, unifiedPath, v))
|
||||
}
|
||||
}
|
||||
// can be empty
|
||||
return unifiedPath
|
||||
}
|
||||
|
||||
func cgroupfs2(l *LinuxFactory, rootless bool) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
m, err := fs2.NewManager(config, getUnifiedPath(paths), rootless)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return m
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -64,6 +89,9 @@ func SystemdCgroups(l *LinuxFactory) error {
|
|||
// that use the native cgroups filesystem implementation to create and manage
|
||||
// cgroups.
|
||||
func Cgroupfs(l *LinuxFactory) error {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return cgroupfs2(l, false)
|
||||
}
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &fs.Manager{
|
||||
Cgroups: config,
|
||||
|
@ -80,6 +108,9 @@ func Cgroupfs(l *LinuxFactory) error {
|
|||
// during rootless container (including euid=0 in userns) setup (while still allowing cgroup usage if
|
||||
// they've been set up properly).
|
||||
func RootlessCgroupfs(l *LinuxFactory) error {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return cgroupfs2(l, true)
|
||||
}
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &fs.Manager{
|
||||
Cgroups: config,
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue