driver/docker: Don't pull InfraImage if it exists (#13265)

Co-authored-by: James Rasell <jrasell@hashicorp.com>
This commit is contained in:
Ted Behling 2022-07-07 11:44:06 -04:00 committed by GitHub
parent d6ef3432c5
commit 6a032a54d2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 69 additions and 13 deletions

3
.changelog/13265.txt Normal file
View File

@ -0,0 +1,3 @@
```release-note:improvement
driver/docker: Eliminate excess Docker registry pulls for the `infra_image` when it already exists locally.
```

View File

@ -28,16 +28,7 @@ func (d *Driver) CreateNetwork(allocID string, createSpec *drivers.NetworkCreate
return nil, false, fmt.Errorf("failed to connect to docker daemon: %s", err)
}
repo, _ := parseDockerImage(d.config.InfraImage)
authOptions, err := firstValidAuth(repo, []authBackend{
authFromDockerConfig(d.config.Auth.Config),
authFromHelper(d.config.Auth.Helper),
})
if err != nil {
d.logger.Debug("auth failed for infra container image pull", "image", d.config.InfraImage, "error", err)
}
_, err = d.coordinator.PullImage(d.config.InfraImage, authOptions, allocID, noopLogEventFn, d.config.infraImagePullTimeoutDuration, d.config.pullActivityTimeoutDuration)
if err != nil {
if err := d.pullInfraImage(allocID); err != nil {
return nil, false, err
}
@ -101,10 +92,28 @@ func (d *Driver) DestroyNetwork(allocID string, spec *drivers.NetworkIsolationSp
return fmt.Errorf("failed to connect to docker daemon: %s", err)
}
return client.RemoveContainer(docker.RemoveContainerOptions{
if err := client.RemoveContainer(docker.RemoveContainerOptions{
Force: true,
ID: spec.Labels[dockerNetSpecLabelKey],
})
}); err != nil {
return err
}
if d.config.GC.Image {
// The Docker image ID is needed in order to correctly update the image
// reference count. Any error finding this, however, should not result
// in an error shutting down the allocrunner.
dockerImage, err := client.InspectImage(d.config.InfraImage)
if err != nil {
d.logger.Warn("InspectImage failed for infra_image container destroy",
"image", d.config.InfraImage, "error", err)
return nil
}
d.coordinator.RemoveImage(dockerImage.ID, allocID)
}
return nil
}
// createSandboxContainerConfig creates a docker container configuration which
@ -124,3 +133,46 @@ func (d *Driver) createSandboxContainerConfig(allocID string, createSpec *driver
},
}, nil
}
// pullInfraImage conditionally pulls the `infra_image` from the Docker registry
// only if its name uses the "latest" tag or the image doesn't already exist locally.
func (d *Driver) pullInfraImage(allocID string) error {
repo, tag := parseDockerImage(d.config.InfraImage)
// There's a (narrow) time-of-check-time-of-use race here. If we call
// InspectImage and then a concurrent task shutdown happens before we call
// IncrementImageReference, we could end up removing the image, and it
// would no longer exist by the time we get to PullImage below.
d.coordinator.imageLock.Lock()
if tag != "latest" {
dockerImage, err := client.InspectImage(d.config.InfraImage)
if err != nil {
d.logger.Debug("InspectImage failed for infra_image container pull",
"image", d.config.InfraImage, "error", err)
} else if dockerImage != nil {
// Image exists, so no pull is attempted; just increment its reference
// count and unlock the image lock.
d.coordinator.incrementImageReferenceImpl(dockerImage.ID, d.config.InfraImage, allocID)
d.coordinator.imageLock.Unlock()
return nil
}
}
// At this point we have performed all the image work needed, so unlock. It
// is possible in environments with slow networks that the image pull may
// take a while, so while defer unlock would be best, this allows us to
// remove the lock sooner.
d.coordinator.imageLock.Unlock()
authOptions, err := firstValidAuth(repo, []authBackend{
authFromDockerConfig(d.config.Auth.Config),
authFromHelper(d.config.Auth.Helper),
})
if err != nil {
d.logger.Debug("auth failed for infra_image container pull", "image", d.config.InfraImage, "error", err)
}
_, err = d.coordinator.PullImage(d.config.InfraImage, authOptions, allocID, noopLogEventFn, d.config.infraImagePullTimeoutDuration, d.config.pullActivityTimeoutDuration)
return err
}

View File

@ -981,7 +981,8 @@ host system.
- `infra_image` - This is the Docker image to use when creating the parent
container necessary when sharing network namespaces between tasks. Defaults to
`gcr.io/google_containers/pause-<goarch>:3.1`.
`gcr.io/google_containers/pause-<goarch>:3.1`. The image will only be pulled from
the container registry if its tag is `latest` or the image doesn't yet exist locally.
- `infra_image_pull_timeout` - A time duration that controls how long Nomad will
wait before cancelling an in-progress pull of the Docker image as specified in