Merge pull request #3821 from hashicorp/f-restart-stanza-fail-default
Change the default mode for client side restarts to fail from delay
This commit is contained in:
commit
98f6872c85
|
@ -4,6 +4,9 @@ __BACKWARDS INCOMPATIBILITIES:__
|
||||||
* discovery: Prevent absolute URLs in check paths. The documentation indicated
|
* discovery: Prevent absolute URLs in check paths. The documentation indicated
|
||||||
that absolute URLs are not allowed, but it was not enforced. Absolute URLs
|
that absolute URLs are not allowed, but it was not enforced. Absolute URLs
|
||||||
in HTTP check paths will now fail to validate. [[GH-3685](https://github.com/hashicorp/nomad/issues/3685)]
|
in HTTP check paths will now fail to validate. [[GH-3685](https://github.com/hashicorp/nomad/issues/3685)]
|
||||||
|
* jobspec: The default values for restart policy have changed. Restart policy mode defaults to "fail" and the
|
||||||
|
attempts/time interval values have been changed to enable faster server side rescheduling. See
|
||||||
|
[restart stanza](https://www.nomadproject.io/docs/job-specification/restart.html) for more information.
|
||||||
|
|
||||||
IMPROVEMENTS:
|
IMPROVEMENTS:
|
||||||
* core: A set of features (Autopilot) has been added to allow for automatic operator-friendly management of Nomad servers. For more information about Autopilot, see the [Autopilot Guide](https://www.nomadproject.io/guides/cluster/autopilot.html). [[GH-3670](https://github.com/hashicorp/nomad/pull/3670)]
|
* core: A set of features (Autopilot) has been added to allow for automatic operator-friendly management of Nomad servers. For more information about Autopilot, see the [Autopilot Guide](https://www.nomadproject.io/guides/cluster/autopilot.html). [[GH-3670](https://github.com/hashicorp/nomad/pull/3670)]
|
||||||
|
|
16
api/tasks.go
16
api/tasks.go
|
@ -340,17 +340,17 @@ func (g *TaskGroup) Canonicalize(job *Job) {
|
||||||
switch *job.Type {
|
switch *job.Type {
|
||||||
case "service", "system":
|
case "service", "system":
|
||||||
defaultRestartPolicy = &RestartPolicy{
|
defaultRestartPolicy = &RestartPolicy{
|
||||||
Delay: helper.TimeToPtr(15 * time.Second),
|
Delay: helper.TimeToPtr(structs.DefaultServiceJobRestartPolicy.Delay),
|
||||||
Attempts: helper.IntToPtr(2),
|
Attempts: helper.IntToPtr(structs.DefaultServiceJobRestartPolicy.Attempts),
|
||||||
Interval: helper.TimeToPtr(1 * time.Minute),
|
Interval: helper.TimeToPtr(structs.DefaultServiceJobRestartPolicy.Interval),
|
||||||
Mode: helper.StringToPtr("delay"),
|
Mode: helper.StringToPtr(structs.DefaultServiceJobRestartPolicy.Mode),
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
defaultRestartPolicy = &RestartPolicy{
|
defaultRestartPolicy = &RestartPolicy{
|
||||||
Delay: helper.TimeToPtr(15 * time.Second),
|
Delay: helper.TimeToPtr(structs.DefaultBatchJobRestartPolicy.Delay),
|
||||||
Attempts: helper.IntToPtr(15),
|
Attempts: helper.IntToPtr(structs.DefaultBatchJobRestartPolicy.Attempts),
|
||||||
Interval: helper.TimeToPtr(7 * 24 * time.Hour),
|
Interval: helper.TimeToPtr(structs.DefaultBatchJobRestartPolicy.Interval),
|
||||||
Mode: helper.StringToPtr("delay"),
|
Mode: helper.StringToPtr(structs.DefaultBatchJobRestartPolicy.Mode),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -183,18 +183,18 @@ job "example" {
|
||||||
#
|
#
|
||||||
restart {
|
restart {
|
||||||
# The number of attempts to run the job within the specified interval.
|
# The number of attempts to run the job within the specified interval.
|
||||||
attempts = 10
|
attempts = 2
|
||||||
interval = "5m"
|
interval = "15s"
|
||||||
|
|
||||||
# The "delay" parameter specifies the duration to wait before restarting
|
# The "delay" parameter specifies the duration to wait before restarting
|
||||||
# a task after it has failed.
|
# a task after it has failed.
|
||||||
delay = "25s"
|
delay = "15s"
|
||||||
|
|
||||||
# The "mode" parameter controls what happens when a task has restarted
|
# The "mode" parameter controls what happens when a task has restarted
|
||||||
# "attempts" times within the interval. "delay" mode delays the next
|
# "attempts" times within the interval. "delay" mode delays the next
|
||||||
# restart until the next interval. "fail" mode does not restart the task
|
# restart until the next interval. "fail" mode does not restart the task
|
||||||
# if "attempts" has been hit within the interval.
|
# if "attempts" has been hit within the interval.
|
||||||
mode = "delay"
|
mode = "fail"
|
||||||
}
|
}
|
||||||
|
|
||||||
# The "ephemeral_disk" stanza instructs Nomad to utilize an ephemeral disk
|
# The "ephemeral_disk" stanza instructs Nomad to utilize an ephemeral disk
|
||||||
|
|
|
@ -2510,17 +2510,17 @@ func (d *DispatchPayloadConfig) Validate() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
defaultServiceJobRestartPolicy = RestartPolicy{
|
DefaultServiceJobRestartPolicy = RestartPolicy{
|
||||||
Delay: 15 * time.Second,
|
Delay: 15 * time.Second,
|
||||||
Attempts: 2,
|
Attempts: 2,
|
||||||
Interval: 1 * time.Minute,
|
Interval: 30 * time.Minute,
|
||||||
Mode: RestartPolicyModeDelay,
|
Mode: RestartPolicyModeFail,
|
||||||
}
|
}
|
||||||
defaultBatchJobRestartPolicy = RestartPolicy{
|
DefaultBatchJobRestartPolicy = RestartPolicy{
|
||||||
Delay: 15 * time.Second,
|
Delay: 15 * time.Second,
|
||||||
Attempts: 15,
|
Attempts: 3,
|
||||||
Interval: 7 * 24 * time.Hour,
|
Interval: 24 * time.Hour,
|
||||||
Mode: RestartPolicyModeDelay,
|
Mode: RestartPolicyModeFail,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -2604,10 +2604,10 @@ func (r *RestartPolicy) Validate() error {
|
||||||
func NewRestartPolicy(jobType string) *RestartPolicy {
|
func NewRestartPolicy(jobType string) *RestartPolicy {
|
||||||
switch jobType {
|
switch jobType {
|
||||||
case JobTypeService, JobTypeSystem:
|
case JobTypeService, JobTypeSystem:
|
||||||
rp := defaultServiceJobRestartPolicy
|
rp := DefaultServiceJobRestartPolicy
|
||||||
return &rp
|
return &rp
|
||||||
case JobTypeBatch:
|
case JobTypeBatch:
|
||||||
rp := defaultBatchJobRestartPolicy
|
rp := DefaultBatchJobRestartPolicy
|
||||||
return &rp
|
return &rp
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -91,10 +91,10 @@ Below is the JSON representation of the job outputted by `$ nomad init`:
|
||||||
"Leader": false
|
"Leader": false
|
||||||
}],
|
}],
|
||||||
"RestartPolicy": {
|
"RestartPolicy": {
|
||||||
"Interval": 300000000000,
|
"Interval": 1800000000000,
|
||||||
"Attempts": 10,
|
"Attempts": 2,
|
||||||
"Delay": 25000000000,
|
"Delay": 15000000000,
|
||||||
"Mode": "delay"
|
"Mode": "fail"
|
||||||
},
|
},
|
||||||
"EphemeralDisk": {
|
"EphemeralDisk": {
|
||||||
"SizeMB": 300
|
"SizeMB": 300
|
||||||
|
|
|
@ -17,7 +17,8 @@ description: |-
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
The `restart` stanza configures a group's behavior on task failure.
|
The `restart` stanza configures a group's behavior on task failure. Restarts
|
||||||
|
happen on the client that is running the task.
|
||||||
|
|
||||||
```hcl
|
```hcl
|
||||||
job "docs" {
|
job "docs" {
|
||||||
|
@ -62,7 +63,7 @@ defaults by job type:
|
||||||
attempts = 15
|
attempts = 15
|
||||||
delay = "15s"
|
delay = "15s"
|
||||||
interval = "168h"
|
interval = "168h"
|
||||||
mode = "delay"
|
mode = "fail"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -73,7 +74,7 @@ defaults by job type:
|
||||||
interval = "1m"
|
interval = "1m"
|
||||||
attempts = 2
|
attempts = 2
|
||||||
delay = "15s"
|
delay = "15s"
|
||||||
mode = "delay"
|
mode = "fail"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue