ba728f8f97
* api: enable support for setting original source alongside job This PR adds support for setting job source material along with the registration of a job. This includes a new HTTP endpoint and a new RPC endpoint for making queries for the original source of a job. The HTTP endpoint is /v1/job/<id>/submission?version=<version> and the RPC method is Job.GetJobSubmission. The job source (if submitted, and doing so is always optional), is stored in the job_submission memdb table, separately from the actual job. This way we do not incur overhead of reading the large string field throughout normal job operations. The server config now includes job_max_source_size for configuring the maximum size the job source may be, before the server simply drops the source material. This should help prevent Bad Things from happening when huge jobs are submitted. If the value is set to 0, all job source material will be dropped. * api: avoid writing var content to disk for parsing * api: move submission validation into RPC layer * api: return an error if updating a job submission without namespace or job id * api: be exact about the job index we associate a submission with (modify) * api: reword api docs scheduling * api: prune all but the last 6 job submissions * api: protect against nil job submission in job validation * api: set max job source size in test server * api: fixups from pr
206 lines
5.3 KiB
Go
206 lines
5.3 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
package benchmarks
|
|
|
|
import (
|
|
"fmt"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/hashicorp/nomad/helper/uuid"
|
|
"github.com/hashicorp/nomad/nomad/mock"
|
|
"github.com/hashicorp/nomad/nomad/structs"
|
|
"github.com/hashicorp/nomad/scheduler"
|
|
)
|
|
|
|
// BenchmarkSchedulerExample is an example of how to write a one-off
|
|
// benchmark for the Nomad scheduler. The starting state for your
|
|
// implementation will depend on the following environment variables:
|
|
//
|
|
// - NOMAD_BENCHMARK_DATADIR: path to data directory
|
|
// - NOMAD_BENCHMARK_SNAPSHOT: path to raft snapshot
|
|
// - neither: empty starting state
|
|
//
|
|
// You can run a profile for this benchmark with the usual -cpuprofile
|
|
// -memprofile flags.
|
|
func BenchmarkSchedulerExample(b *testing.B) {
|
|
|
|
h := NewBenchmarkingHarness(b)
|
|
var eval *structs.Evaluation
|
|
|
|
// (implement me!) this is your setup for the state and the eval
|
|
// you're going to process, all of which happens before benchmarking
|
|
// starts. If you're benchmarking a real world datadir or snapshot,
|
|
// you should assert your assumptions about the contents here.
|
|
{
|
|
upsertNodes(h, 5000, 100)
|
|
|
|
iter, err := h.State.Nodes(nil)
|
|
require.NoError(b, err)
|
|
nodes := 0
|
|
for {
|
|
raw := iter.Next()
|
|
if raw == nil {
|
|
break
|
|
}
|
|
nodes++
|
|
}
|
|
require.Equal(b, 5000, nodes)
|
|
job := generateJob(true, 600)
|
|
eval = upsertJob(h, job)
|
|
}
|
|
|
|
// (implement me!) Note that h.Process doesn't return errors for
|
|
// most states that result in blocked plans, so it's recommended
|
|
// you write an assertion section here so that you're sure you're
|
|
// benchmarking a successful run and not a failed plan.
|
|
{
|
|
err := h.Process(scheduler.NewServiceScheduler, eval)
|
|
require.NoError(b, err)
|
|
require.Len(b, h.Plans, 1)
|
|
require.False(b, h.Plans[0].IsNoOp())
|
|
}
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
err := h.Process(scheduler.NewServiceScheduler, eval)
|
|
require.NoError(b, err)
|
|
}
|
|
}
|
|
|
|
// BenchmarkServiceScheduler exercises the service scheduler at a
|
|
// variety of cluster sizes, with both spread and non-spread jobs
|
|
func BenchmarkServiceScheduler(b *testing.B) {
|
|
|
|
clusterSizes := []int{1000, 5000, 10000}
|
|
rackSets := []int{10, 25, 50, 75}
|
|
jobSizes := []int{300, 600, 900, 1200}
|
|
|
|
type benchmark struct {
|
|
name string
|
|
clusterSize int
|
|
racks int
|
|
jobSize int
|
|
withSpread bool
|
|
}
|
|
|
|
benchmarks := []benchmark{}
|
|
for _, clusterSize := range clusterSizes {
|
|
for _, racks := range rackSets {
|
|
for _, jobSize := range jobSizes {
|
|
benchmarks = append(benchmarks,
|
|
benchmark{
|
|
name: fmt.Sprintf("%d nodes %d racks %d allocs spread",
|
|
clusterSize, racks, jobSize,
|
|
),
|
|
clusterSize: clusterSize, racks: racks, jobSize: jobSize,
|
|
withSpread: true,
|
|
},
|
|
)
|
|
benchmarks = append(benchmarks,
|
|
benchmark{
|
|
name: fmt.Sprintf("%d nodes %d racks %d allocs no spread",
|
|
clusterSize, racks, jobSize,
|
|
),
|
|
clusterSize: clusterSize, racks: racks, jobSize: jobSize,
|
|
withSpread: false,
|
|
},
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
for _, bm := range benchmarks {
|
|
b.Run(bm.name, func(b *testing.B) {
|
|
h := scheduler.NewHarness(b)
|
|
upsertNodes(h, bm.clusterSize, bm.racks)
|
|
job := generateJob(bm.withSpread, bm.jobSize)
|
|
eval := upsertJob(h, job)
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
err := h.Process(scheduler.NewServiceScheduler, eval)
|
|
require.NoError(b, err)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func upsertJob(h *scheduler.Harness, job *structs.Job) *structs.Evaluation {
|
|
err := h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
eval := &structs.Evaluation{
|
|
Namespace: structs.DefaultNamespace,
|
|
ID: uuid.Generate(),
|
|
Priority: job.Priority,
|
|
TriggeredBy: structs.EvalTriggerJobRegister,
|
|
JobID: job.ID,
|
|
Status: structs.EvalStatusPending,
|
|
}
|
|
err = h.State.UpsertEvals(structs.MsgTypeTestSetup,
|
|
h.NextIndex(), []*structs.Evaluation{eval})
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return eval
|
|
}
|
|
|
|
func generateJob(withSpread bool, jobSize int) *structs.Job {
|
|
job := mock.Job()
|
|
job.Datacenters = []string{"dc-1", "dc-2"}
|
|
if withSpread {
|
|
job.Spreads = []*structs.Spread{{Attribute: "${meta.rack}"}}
|
|
}
|
|
job.Constraints = []*structs.Constraint{}
|
|
job.TaskGroups[0].Count = jobSize
|
|
job.TaskGroups[0].Networks = nil
|
|
job.TaskGroups[0].Services = []*structs.Service{}
|
|
job.TaskGroups[0].Tasks[0].Resources = &structs.Resources{
|
|
CPU: 6000,
|
|
MemoryMB: 6000,
|
|
}
|
|
return job
|
|
}
|
|
|
|
func upsertNodes(h *scheduler.Harness, count, racks int) {
|
|
|
|
datacenters := []string{"dc-1", "dc-2"}
|
|
|
|
for i := 0; i < count; i++ {
|
|
node := mock.Node()
|
|
node.Datacenter = datacenters[i%2]
|
|
node.Meta = map[string]string{}
|
|
node.Meta["rack"] = fmt.Sprintf("r%d", i%racks)
|
|
cpuShares := 14000
|
|
memoryMB := 32000
|
|
diskMB := 100 * 1024
|
|
|
|
node.NodeResources = &structs.NodeResources{
|
|
Cpu: structs.NodeCpuResources{
|
|
CpuShares: int64(cpuShares),
|
|
},
|
|
Memory: structs.NodeMemoryResources{
|
|
MemoryMB: int64(memoryMB),
|
|
},
|
|
Disk: structs.NodeDiskResources{
|
|
DiskMB: int64(diskMB),
|
|
},
|
|
Networks: []*structs.NetworkResource{
|
|
{
|
|
Mode: "host",
|
|
Device: "eth0",
|
|
CIDR: "192.168.0.100/32",
|
|
MBits: 1000,
|
|
},
|
|
},
|
|
}
|
|
|
|
err := h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
}
|