Merge pull request #3821 from hashicorp/f-restart-stanza-fail-default
Change the default mode for client side restarts to fail from delay
This commit is contained in:
commit
98f6872c85
|
@ -4,6 +4,9 @@ __BACKWARDS INCOMPATIBILITIES:__
|
|||
* discovery: Prevent absolute URLs in check paths. The documentation indicated
|
||||
that absolute URLs are not allowed, but it was not enforced. Absolute URLs
|
||||
in HTTP check paths will now fail to validate. [[GH-3685](https://github.com/hashicorp/nomad/issues/3685)]
|
||||
* jobspec: The default values for restart policy have changed. Restart policy mode defaults to "fail" and the
|
||||
attempts/time interval values have been changed to enable faster server side rescheduling. See
|
||||
[restart stanza](https://www.nomadproject.io/docs/job-specification/restart.html) for more information.
|
||||
|
||||
IMPROVEMENTS:
|
||||
* core: A set of features (Autopilot) has been added to allow for automatic operator-friendly management of Nomad servers. For more information about Autopilot, see the [Autopilot Guide](https://www.nomadproject.io/guides/cluster/autopilot.html). [[GH-3670](https://github.com/hashicorp/nomad/pull/3670)]
|
||||
|
|
16
api/tasks.go
16
api/tasks.go
|
@ -340,17 +340,17 @@ func (g *TaskGroup) Canonicalize(job *Job) {
|
|||
switch *job.Type {
|
||||
case "service", "system":
|
||||
defaultRestartPolicy = &RestartPolicy{
|
||||
Delay: helper.TimeToPtr(15 * time.Second),
|
||||
Attempts: helper.IntToPtr(2),
|
||||
Interval: helper.TimeToPtr(1 * time.Minute),
|
||||
Mode: helper.StringToPtr("delay"),
|
||||
Delay: helper.TimeToPtr(structs.DefaultServiceJobRestartPolicy.Delay),
|
||||
Attempts: helper.IntToPtr(structs.DefaultServiceJobRestartPolicy.Attempts),
|
||||
Interval: helper.TimeToPtr(structs.DefaultServiceJobRestartPolicy.Interval),
|
||||
Mode: helper.StringToPtr(structs.DefaultServiceJobRestartPolicy.Mode),
|
||||
}
|
||||
default:
|
||||
defaultRestartPolicy = &RestartPolicy{
|
||||
Delay: helper.TimeToPtr(15 * time.Second),
|
||||
Attempts: helper.IntToPtr(15),
|
||||
Interval: helper.TimeToPtr(7 * 24 * time.Hour),
|
||||
Mode: helper.StringToPtr("delay"),
|
||||
Delay: helper.TimeToPtr(structs.DefaultBatchJobRestartPolicy.Delay),
|
||||
Attempts: helper.IntToPtr(structs.DefaultBatchJobRestartPolicy.Attempts),
|
||||
Interval: helper.TimeToPtr(structs.DefaultBatchJobRestartPolicy.Interval),
|
||||
Mode: helper.StringToPtr(structs.DefaultBatchJobRestartPolicy.Mode),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -183,18 +183,18 @@ job "example" {
|
|||
#
|
||||
restart {
|
||||
# The number of attempts to run the job within the specified interval.
|
||||
attempts = 10
|
||||
interval = "5m"
|
||||
attempts = 2
|
||||
interval = "15s"
|
||||
|
||||
# The "delay" parameter specifies the duration to wait before restarting
|
||||
# a task after it has failed.
|
||||
delay = "25s"
|
||||
delay = "15s"
|
||||
|
||||
# The "mode" parameter controls what happens when a task has restarted
|
||||
# "attempts" times within the interval. "delay" mode delays the next
|
||||
# restart until the next interval. "fail" mode does not restart the task
|
||||
# if "attempts" has been hit within the interval.
|
||||
mode = "delay"
|
||||
mode = "fail"
|
||||
}
|
||||
|
||||
# The "ephemeral_disk" stanza instructs Nomad to utilize an ephemeral disk
|
||||
|
|
|
@ -2510,17 +2510,17 @@ func (d *DispatchPayloadConfig) Validate() error {
|
|||
}
|
||||
|
||||
var (
|
||||
defaultServiceJobRestartPolicy = RestartPolicy{
|
||||
DefaultServiceJobRestartPolicy = RestartPolicy{
|
||||
Delay: 15 * time.Second,
|
||||
Attempts: 2,
|
||||
Interval: 1 * time.Minute,
|
||||
Mode: RestartPolicyModeDelay,
|
||||
Interval: 30 * time.Minute,
|
||||
Mode: RestartPolicyModeFail,
|
||||
}
|
||||
defaultBatchJobRestartPolicy = RestartPolicy{
|
||||
DefaultBatchJobRestartPolicy = RestartPolicy{
|
||||
Delay: 15 * time.Second,
|
||||
Attempts: 15,
|
||||
Interval: 7 * 24 * time.Hour,
|
||||
Mode: RestartPolicyModeDelay,
|
||||
Attempts: 3,
|
||||
Interval: 24 * time.Hour,
|
||||
Mode: RestartPolicyModeFail,
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -2604,10 +2604,10 @@ func (r *RestartPolicy) Validate() error {
|
|||
func NewRestartPolicy(jobType string) *RestartPolicy {
|
||||
switch jobType {
|
||||
case JobTypeService, JobTypeSystem:
|
||||
rp := defaultServiceJobRestartPolicy
|
||||
rp := DefaultServiceJobRestartPolicy
|
||||
return &rp
|
||||
case JobTypeBatch:
|
||||
rp := defaultBatchJobRestartPolicy
|
||||
rp := DefaultBatchJobRestartPolicy
|
||||
return &rp
|
||||
}
|
||||
return nil
|
||||
|
|
|
@ -91,10 +91,10 @@ Below is the JSON representation of the job outputted by `$ nomad init`:
|
|||
"Leader": false
|
||||
}],
|
||||
"RestartPolicy": {
|
||||
"Interval": 300000000000,
|
||||
"Attempts": 10,
|
||||
"Delay": 25000000000,
|
||||
"Mode": "delay"
|
||||
"Interval": 1800000000000,
|
||||
"Attempts": 2,
|
||||
"Delay": 15000000000,
|
||||
"Mode": "fail"
|
||||
},
|
||||
"EphemeralDisk": {
|
||||
"SizeMB": 300
|
||||
|
|
|
@ -17,7 +17,8 @@ description: |-
|
|||
</tr>
|
||||
</table>
|
||||
|
||||
The `restart` stanza configures a group's behavior on task failure.
|
||||
The `restart` stanza configures a group's behavior on task failure. Restarts
|
||||
happen on the client that is running the task.
|
||||
|
||||
```hcl
|
||||
job "docs" {
|
||||
|
@ -62,7 +63,7 @@ defaults by job type:
|
|||
attempts = 15
|
||||
delay = "15s"
|
||||
interval = "168h"
|
||||
mode = "delay"
|
||||
mode = "fail"
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -73,7 +74,7 @@ defaults by job type:
|
|||
interval = "1m"
|
||||
attempts = 2
|
||||
delay = "15s"
|
||||
mode = "delay"
|
||||
mode = "fail"
|
||||
}
|
||||
```
|
||||
|
||||
|
|
Loading…
Reference in a new issue