Merge pull request #3821 from hashicorp/f-restart-stanza-fail-default

Change the default mode for client side restarts to fail from delay
This commit is contained in:
Preetha 2018-02-01 16:39:04 -06:00 committed by GitHub
commit 98f6872c85
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 32 additions and 28 deletions

View File

@ -4,6 +4,9 @@ __BACKWARDS INCOMPATIBILITIES:__
* discovery: Prevent absolute URLs in check paths. The documentation indicated * discovery: Prevent absolute URLs in check paths. The documentation indicated
that absolute URLs are not allowed, but it was not enforced. Absolute URLs that absolute URLs are not allowed, but it was not enforced. Absolute URLs
in HTTP check paths will now fail to validate. [[GH-3685](https://github.com/hashicorp/nomad/issues/3685)] in HTTP check paths will now fail to validate. [[GH-3685](https://github.com/hashicorp/nomad/issues/3685)]
* jobspec: The default values for restart policy have changed. Restart policy mode defaults to "fail" and the
attempts/time interval values have been changed to enable faster server side rescheduling. See
[restart stanza](https://www.nomadproject.io/docs/job-specification/restart.html) for more information.
IMPROVEMENTS: IMPROVEMENTS:
* core: A set of features (Autopilot) has been added to allow for automatic operator-friendly management of Nomad servers. For more information about Autopilot, see the [Autopilot Guide](https://www.nomadproject.io/guides/cluster/autopilot.html). [[GH-3670](https://github.com/hashicorp/nomad/pull/3670)] * core: A set of features (Autopilot) has been added to allow for automatic operator-friendly management of Nomad servers. For more information about Autopilot, see the [Autopilot Guide](https://www.nomadproject.io/guides/cluster/autopilot.html). [[GH-3670](https://github.com/hashicorp/nomad/pull/3670)]

View File

@ -340,17 +340,17 @@ func (g *TaskGroup) Canonicalize(job *Job) {
switch *job.Type { switch *job.Type {
case "service", "system": case "service", "system":
defaultRestartPolicy = &RestartPolicy{ defaultRestartPolicy = &RestartPolicy{
Delay: helper.TimeToPtr(15 * time.Second), Delay: helper.TimeToPtr(structs.DefaultServiceJobRestartPolicy.Delay),
Attempts: helper.IntToPtr(2), Attempts: helper.IntToPtr(structs.DefaultServiceJobRestartPolicy.Attempts),
Interval: helper.TimeToPtr(1 * time.Minute), Interval: helper.TimeToPtr(structs.DefaultServiceJobRestartPolicy.Interval),
Mode: helper.StringToPtr("delay"), Mode: helper.StringToPtr(structs.DefaultServiceJobRestartPolicy.Mode),
} }
default: default:
defaultRestartPolicy = &RestartPolicy{ defaultRestartPolicy = &RestartPolicy{
Delay: helper.TimeToPtr(15 * time.Second), Delay: helper.TimeToPtr(structs.DefaultBatchJobRestartPolicy.Delay),
Attempts: helper.IntToPtr(15), Attempts: helper.IntToPtr(structs.DefaultBatchJobRestartPolicy.Attempts),
Interval: helper.TimeToPtr(7 * 24 * time.Hour), Interval: helper.TimeToPtr(structs.DefaultBatchJobRestartPolicy.Interval),
Mode: helper.StringToPtr("delay"), Mode: helper.StringToPtr(structs.DefaultBatchJobRestartPolicy.Mode),
} }
} }

View File

@ -183,18 +183,18 @@ job "example" {
# #
restart { restart {
# The number of attempts to run the job within the specified interval. # The number of attempts to run the job within the specified interval.
attempts = 10 attempts = 2
interval = "5m" interval = "15s"
# The "delay" parameter specifies the duration to wait before restarting # The "delay" parameter specifies the duration to wait before restarting
# a task after it has failed. # a task after it has failed.
delay = "25s" delay = "15s"
# The "mode" parameter controls what happens when a task has restarted # The "mode" parameter controls what happens when a task has restarted
# "attempts" times within the interval. "delay" mode delays the next # "attempts" times within the interval. "delay" mode delays the next
# restart until the next interval. "fail" mode does not restart the task # restart until the next interval. "fail" mode does not restart the task
# if "attempts" has been hit within the interval. # if "attempts" has been hit within the interval.
mode = "delay" mode = "fail"
} }
# The "ephemeral_disk" stanza instructs Nomad to utilize an ephemeral disk # The "ephemeral_disk" stanza instructs Nomad to utilize an ephemeral disk

View File

@ -2510,17 +2510,17 @@ func (d *DispatchPayloadConfig) Validate() error {
} }
var ( var (
defaultServiceJobRestartPolicy = RestartPolicy{ DefaultServiceJobRestartPolicy = RestartPolicy{
Delay: 15 * time.Second, Delay: 15 * time.Second,
Attempts: 2, Attempts: 2,
Interval: 1 * time.Minute, Interval: 30 * time.Minute,
Mode: RestartPolicyModeDelay, Mode: RestartPolicyModeFail,
} }
defaultBatchJobRestartPolicy = RestartPolicy{ DefaultBatchJobRestartPolicy = RestartPolicy{
Delay: 15 * time.Second, Delay: 15 * time.Second,
Attempts: 15, Attempts: 3,
Interval: 7 * 24 * time.Hour, Interval: 24 * time.Hour,
Mode: RestartPolicyModeDelay, Mode: RestartPolicyModeFail,
} }
) )
@ -2604,10 +2604,10 @@ func (r *RestartPolicy) Validate() error {
func NewRestartPolicy(jobType string) *RestartPolicy { func NewRestartPolicy(jobType string) *RestartPolicy {
switch jobType { switch jobType {
case JobTypeService, JobTypeSystem: case JobTypeService, JobTypeSystem:
rp := defaultServiceJobRestartPolicy rp := DefaultServiceJobRestartPolicy
return &rp return &rp
case JobTypeBatch: case JobTypeBatch:
rp := defaultBatchJobRestartPolicy rp := DefaultBatchJobRestartPolicy
return &rp return &rp
} }
return nil return nil

View File

@ -91,10 +91,10 @@ Below is the JSON representation of the job outputted by `$ nomad init`:
"Leader": false "Leader": false
}], }],
"RestartPolicy": { "RestartPolicy": {
"Interval": 300000000000, "Interval": 1800000000000,
"Attempts": 10, "Attempts": 2,
"Delay": 25000000000, "Delay": 15000000000,
"Mode": "delay" "Mode": "fail"
}, },
"EphemeralDisk": { "EphemeralDisk": {
"SizeMB": 300 "SizeMB": 300

View File

@ -17,7 +17,8 @@ description: |-
</tr> </tr>
</table> </table>
The `restart` stanza configures a group's behavior on task failure. The `restart` stanza configures a group's behavior on task failure. Restarts
happen on the client that is running the task.
```hcl ```hcl
job "docs" { job "docs" {
@ -62,7 +63,7 @@ defaults by job type:
attempts = 15 attempts = 15
delay = "15s" delay = "15s"
interval = "168h" interval = "168h"
mode = "delay" mode = "fail"
} }
``` ```
@ -73,7 +74,7 @@ defaults by job type:
interval = "1m" interval = "1m"
attempts = 2 attempts = 2
delay = "15s" delay = "15s"
mode = "delay" mode = "fail"
} }
``` ```