package nomad import ( "fmt" "io" "log" "time" "github.com/armon/go-metrics" "github.com/hashicorp/go-msgpack/codec" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/raft" ) var ( msgpackHandle = &codec.MsgpackHandle{ RawToString: true, WriteExt: true, } ) const ( // timeTableGranularity is the granularity of index to time tracking timeTableGranularity = 5 * time.Minute // timeTableLimit is the maximum limit of our tracking timeTableLimit = 24 * time.Hour ) // SnapshotType is prefixed to a record in the FSM snapshot // so that we can determine the type for restore type SnapshotType byte const ( NodeSnapshot SnapshotType = iota JobSnapshot IndexSnapshot EvalSnapshot AllocSnapshot TimeTableSnapshot ) // nomadFSM implements a finite state machine that is used // along with Raft to provide strong consistency. We implement // this outside the Server to avoid exposing this outside the package. type nomadFSM struct { evalBroker *EvalBroker logOutput io.Writer logger *log.Logger state *state.StateStore timetable *TimeTable } // nomadSnapshot is used to provide a snapshot of the current // state in a way that can be accessed concurrently with operations // that may modify the live state. type nomadSnapshot struct { snap *state.StateSnapshot timetable *TimeTable } // snapshotHeader is the first entry in our snapshot type snapshotHeader struct { } // NewFSMPath is used to construct a new FSM with a blank state func NewFSM(evalBroker *EvalBroker, logOutput io.Writer) (*nomadFSM, error) { // Create a state store state, err := state.NewStateStore(logOutput) if err != nil { return nil, err } fsm := &nomadFSM{ evalBroker: evalBroker, logOutput: logOutput, logger: log.New(logOutput, "", log.LstdFlags), state: state, timetable: NewTimeTable(timeTableGranularity, timeTableLimit), } return fsm, nil } // Close is used to cleanup resources associated with the FSM func (n *nomadFSM) Close() error { return nil } // State is used to return a handle to the current state func (n *nomadFSM) State() *state.StateStore { return n.state } // TimeTable returns the time table of transactions func (n *nomadFSM) TimeTable() *TimeTable { return n.timetable } func (n *nomadFSM) Apply(log *raft.Log) interface{} { buf := log.Data msgType := structs.MessageType(buf[0]) // Witness this write n.timetable.Witness(log.Index, time.Now().UTC()) // Check if this message type should be ignored when unknown. This is // used so that new commands can be added with developer control if older // versions can safely ignore the command, or if they should crash. ignoreUnknown := false if msgType&structs.IgnoreUnknownTypeFlag == structs.IgnoreUnknownTypeFlag { msgType &= ^structs.IgnoreUnknownTypeFlag ignoreUnknown = true } switch msgType { case structs.NodeRegisterRequestType: return n.applyRegisterNode(buf[1:], log.Index) case structs.NodeDeregisterRequestType: return n.applyDeregisterNode(buf[1:], log.Index) case structs.NodeUpdateStatusRequestType: return n.applyStatusUpdate(buf[1:], log.Index) case structs.JobRegisterRequestType: return n.applyRegisterJob(buf[1:], log.Index) case structs.JobDeregisterRequestType: return n.applyDeregisterJob(buf[1:], log.Index) case structs.EvalUpdateRequestType: return n.applyUpdateEval(buf[1:], log.Index) case structs.EvalDeleteRequestType: return n.applyDeleteEval(buf[1:], log.Index) case structs.AllocUpdateRequestType: return n.applyAllocUpdate(buf[1:], log.Index) case structs.AllocClientUpdateRequestType: return n.applyAllocClientUpdate(buf[1:], log.Index) default: if ignoreUnknown { n.logger.Printf("[WARN] nomad.fsm: ignoring unknown message type (%d), upgrade to newer version", msgType) return nil } else { panic(fmt.Errorf("failed to apply request: %#v", buf)) } } } func (n *nomadFSM) applyRegisterNode(buf []byte, index uint64) interface{} { defer metrics.MeasureSince([]string{"nomad", "fsm", "register_node"}, time.Now()) var req structs.NodeRegisterRequest if err := structs.Decode(buf, &req); err != nil { panic(fmt.Errorf("failed to decode request: %v", err)) } if err := n.state.RegisterNode(index, req.Node); err != nil { n.logger.Printf("[ERR] nomad.fsm: RegisterNode failed: %v", err) return err } return nil } func (n *nomadFSM) applyDeregisterNode(buf []byte, index uint64) interface{} { defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_node"}, time.Now()) var req structs.NodeDeregisterRequest if err := structs.Decode(buf, &req); err != nil { panic(fmt.Errorf("failed to decode request: %v", err)) } if err := n.state.DeregisterNode(index, req.NodeID); err != nil { n.logger.Printf("[ERR] nomad.fsm: DeregisterNode failed: %v", err) return err } return nil } func (n *nomadFSM) applyStatusUpdate(buf []byte, index uint64) interface{} { defer metrics.MeasureSince([]string{"nomad", "fsm", "node_status_update"}, time.Now()) var req structs.NodeUpdateStatusRequest if err := structs.Decode(buf, &req); err != nil { panic(fmt.Errorf("failed to decode request: %v", err)) } if err := n.state.UpdateNodeStatus(index, req.NodeID, req.Status); err != nil { n.logger.Printf("[ERR] nomad.fsm: UpdateNodeStatus failed: %v", err) return err } return nil } func (n *nomadFSM) applyRegisterJob(buf []byte, index uint64) interface{} { defer metrics.MeasureSince([]string{"nomad", "fsm", "register_job"}, time.Now()) var req structs.JobRegisterRequest if err := structs.Decode(buf, &req); err != nil { panic(fmt.Errorf("failed to decode request: %v", err)) } if err := n.state.RegisterJob(index, req.Job); err != nil { n.logger.Printf("[ERR] nomad.fsm: RegisterJob failed: %v", err) return err } return nil } func (n *nomadFSM) applyDeregisterJob(buf []byte, index uint64) interface{} { defer metrics.MeasureSince([]string{"nomad", "fsm", "deregister_job"}, time.Now()) var req structs.JobDeregisterRequest if err := structs.Decode(buf, &req); err != nil { panic(fmt.Errorf("failed to decode request: %v", err)) } if err := n.state.DeregisterJob(index, req.JobID); err != nil { n.logger.Printf("[ERR] nomad.fsm: DeregisterJob failed: %v", err) return err } return nil } func (n *nomadFSM) applyUpdateEval(buf []byte, index uint64) interface{} { defer metrics.MeasureSince([]string{"nomad", "fsm", "update_eval"}, time.Now()) var req structs.EvalUpdateRequest if err := structs.Decode(buf, &req); err != nil { panic(fmt.Errorf("failed to decode request: %v", err)) } if err := n.state.UpsertEvals(index, req.Evals); err != nil { n.logger.Printf("[ERR] nomad.fsm: UpsertEvals failed: %v", err) return err } for _, eval := range req.Evals { if eval.ShouldEnqueue() { if err := n.evalBroker.Enqueue(eval); err != nil { n.logger.Printf("[ERR] nomad.fsm: failed to enqueue evaluation %s: %v", eval.ID, err) return err } } } return nil } func (n *nomadFSM) applyDeleteEval(buf []byte, index uint64) interface{} { defer metrics.MeasureSince([]string{"nomad", "fsm", "delete_eval"}, time.Now()) var req structs.EvalDeleteRequest if err := structs.Decode(buf, &req); err != nil { panic(fmt.Errorf("failed to decode request: %v", err)) } if err := n.state.DeleteEval(index, req.Evals, req.Allocs); err != nil { n.logger.Printf("[ERR] nomad.fsm: DeleteEval failed: %v", err) return err } return nil } func (n *nomadFSM) applyAllocUpdate(buf []byte, index uint64) interface{} { defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update"}, time.Now()) var req structs.AllocUpdateRequest if err := structs.Decode(buf, &req); err != nil { panic(fmt.Errorf("failed to decode request: %v", err)) } if err := n.state.UpdateAllocations(index, req.Alloc); err != nil { n.logger.Printf("[ERR] nomad.fsm: UpdateAllocations failed: %v", err) return err } return nil } func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{} { defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_client_update"}, time.Now()) var req structs.AllocUpdateRequest if err := structs.Decode(buf, &req); err != nil { panic(fmt.Errorf("failed to decode request: %v", err)) } if len(req.Alloc) == 0 { return nil } if err := n.state.UpdateAllocFromClient(index, req.Alloc[0]); err != nil { n.logger.Printf("[ERR] nomad.fsm: UpdateAllocFromClient failed: %v", err) return err } return nil } func (n *nomadFSM) Snapshot() (raft.FSMSnapshot, error) { // Create a new snapshot snap, err := n.state.Snapshot() if err != nil { return nil, err } ns := &nomadSnapshot{ snap: snap, timetable: n.timetable, } return ns, nil } func (n *nomadFSM) Restore(old io.ReadCloser) error { defer old.Close() // Create a new state store newState, err := state.NewStateStore(n.logOutput) if err != nil { return err } n.state = newState // Start the state restore restore, err := newState.Restore() if err != nil { return err } defer restore.Abort() // Create a decoder dec := codec.NewDecoder(old, msgpackHandle) // Read in the header var header snapshotHeader if err := dec.Decode(&header); err != nil { return err } // Populate the new state msgType := make([]byte, 1) for { // Read the message type _, err := old.Read(msgType) if err == io.EOF { break } else if err != nil { return err } // Decode switch SnapshotType(msgType[0]) { case TimeTableSnapshot: if err := n.timetable.Deserialize(dec); err != nil { return fmt.Errorf("time table deserialize failed: %v", err) } case NodeSnapshot: node := new(structs.Node) if err := dec.Decode(node); err != nil { return err } if err := restore.NodeRestore(node); err != nil { return err } case JobSnapshot: job := new(structs.Job) if err := dec.Decode(job); err != nil { return err } if err := restore.JobRestore(job); err != nil { return err } case EvalSnapshot: eval := new(structs.Evaluation) if err := dec.Decode(eval); err != nil { return err } if err := restore.EvalRestore(eval); err != nil { return err } case AllocSnapshot: alloc := new(structs.Allocation) if err := dec.Decode(alloc); err != nil { return err } if err := restore.AllocRestore(alloc); err != nil { return err } case IndexSnapshot: idx := new(state.IndexEntry) if err := dec.Decode(idx); err != nil { return err } if err := restore.IndexRestore(idx); err != nil { return err } default: return fmt.Errorf("Unrecognized snapshot type: %v", msgType) } } // Commit the state restore restore.Commit() return nil } func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { defer metrics.MeasureSince([]string{"nomad", "fsm", "persist"}, time.Now()) // Register the nodes encoder := codec.NewEncoder(sink, msgpackHandle) // Write the header header := snapshotHeader{} if err := encoder.Encode(&header); err != nil { sink.Cancel() return err } // Write the time table sink.Write([]byte{byte(TimeTableSnapshot)}) if err := s.timetable.Serialize(encoder); err != nil { sink.Cancel() return err } // Write all the data out if err := s.persistIndexes(sink, encoder); err != nil { sink.Cancel() return err } if err := s.persistNodes(sink, encoder); err != nil { sink.Cancel() return err } if err := s.persistJobs(sink, encoder); err != nil { sink.Cancel() return err } if err := s.persistEvals(sink, encoder); err != nil { sink.Cancel() return err } if err := s.persistAllocs(sink, encoder); err != nil { sink.Cancel() return err } return nil } func (s *nomadSnapshot) persistIndexes(sink raft.SnapshotSink, encoder *codec.Encoder) error { // Get all the indexes iter, err := s.snap.Indexes() if err != nil { return err } for { // Get the next item raw := iter.Next() if raw == nil { break } // Prepare the request struct idx := raw.(*state.IndexEntry) // Write out a node registration sink.Write([]byte{byte(IndexSnapshot)}) if err := encoder.Encode(idx); err != nil { return err } } return nil } func (s *nomadSnapshot) persistNodes(sink raft.SnapshotSink, encoder *codec.Encoder) error { // Get all the nodes nodes, err := s.snap.Nodes() if err != nil { return err } for { // Get the next item raw := nodes.Next() if raw == nil { break } // Prepare the request struct node := raw.(*structs.Node) // Write out a node registration sink.Write([]byte{byte(NodeSnapshot)}) if err := encoder.Encode(node); err != nil { return err } } return nil } func (s *nomadSnapshot) persistJobs(sink raft.SnapshotSink, encoder *codec.Encoder) error { // Get all the jobs jobs, err := s.snap.Jobs() if err != nil { return err } for { // Get the next item raw := jobs.Next() if raw == nil { break } // Prepare the request struct job := raw.(*structs.Job) // Write out a job registration sink.Write([]byte{byte(JobSnapshot)}) if err := encoder.Encode(job); err != nil { return err } } return nil } func (s *nomadSnapshot) persistEvals(sink raft.SnapshotSink, encoder *codec.Encoder) error { // Get all the evaluations evals, err := s.snap.Evals() if err != nil { return err } for { // Get the next item raw := evals.Next() if raw == nil { break } // Prepare the request struct eval := raw.(*structs.Evaluation) // Write out the evaluation sink.Write([]byte{byte(EvalSnapshot)}) if err := encoder.Encode(eval); err != nil { return err } } return nil } func (s *nomadSnapshot) persistAllocs(sink raft.SnapshotSink, encoder *codec.Encoder) error { // Get all the allocations allocs, err := s.snap.Allocs() if err != nil { return err } for { // Get the next item raw := allocs.Next() if raw == nil { break } // Prepare the request struct alloc := raw.(*structs.Allocation) // Write out the evaluation sink.Write([]byte{byte(AllocSnapshot)}) if err := encoder.Encode(alloc); err != nil { return err } } return nil } // Release is a no-op, as we just need to GC the pointer // to the state store snapshot. There is nothing to explicitly // cleanup. func (s *nomadSnapshot) Release() {}