-
Notifications
You must be signed in to change notification settings - Fork 656
Fix container auto-delete on rapid stop/start #841
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -90,19 +90,12 @@ public actor ContainersService { | |
| ) | ||
| ) | ||
| results[config.id] = state | ||
| let plugin = runtimePlugins.first { $0.name == config.runtimeHandler } | ||
| guard let plugin else { | ||
| guard runtimePlugins.first(where: { $0.name == config.runtimeHandler }) != nil else { | ||
| throw ContainerizationError( | ||
| .internalError, | ||
| message: "failed to find runtime plugin \(config.runtimeHandler)" | ||
| ) | ||
| } | ||
| try Self.registerService( | ||
| plugin: plugin, | ||
| loader: loader, | ||
| configuration: config, | ||
| path: dir | ||
| ) | ||
| } catch { | ||
| try? FileManager.default.removeItem(at: dir) | ||
| log.warning("failed to load container bundle at \(dir.path)") | ||
|
|
@@ -159,10 +152,7 @@ public actor ContainersService { | |
| ) | ||
| } | ||
|
|
||
| let runtimePlugin = self.runtimePlugins.filter { | ||
| $0.name == configuration.runtimeHandler | ||
| }.first | ||
| guard let runtimePlugin else { | ||
| guard self.runtimePlugins.first(where: { $0.name == configuration.runtimeHandler }) != nil else { | ||
| throw ContainerizationError( | ||
| .notFound, | ||
| message: "unable to locate runtime plugin \(configuration.runtimeHandler)" | ||
|
|
@@ -185,13 +175,6 @@ public actor ContainersService { | |
| try bundle.setContainerRootFs(cloning: imageFs) | ||
| try bundle.write(filename: "options.json", value: options) | ||
|
|
||
| try Self.registerService( | ||
| plugin: runtimePlugin, | ||
| loader: self.pluginLoader, | ||
| configuration: configuration, | ||
| path: path | ||
| ) | ||
|
|
||
| let snapshot = ContainerSnapshot( | ||
| configuration: configuration, | ||
| status: .stopped, | ||
|
|
@@ -223,6 +206,16 @@ public actor ContainersService { | |
| return | ||
| } | ||
|
|
||
| let path = self.containerRoot.appendingPathComponent(id) | ||
| let bundle = ContainerClient.Bundle(path: path) | ||
| let config = try bundle.configuration | ||
| try Self.registerService( | ||
| plugin: self.runtimePlugins.first { $0.name == config.runtimeHandler }!, | ||
| loader: self.pluginLoader, | ||
| configuration: config, | ||
| path: path | ||
| ) | ||
|
|
||
| let runtime = state.snapshot.configuration.runtimeHandler | ||
| let sandboxClient = try await SandboxClient.create( | ||
| id: id, | ||
|
|
@@ -457,15 +450,23 @@ public actor ContainersService { | |
|
|
||
| await self.exitMonitor.stopTracking(id: id) | ||
|
|
||
| // Try and shutdown the runtime helper. | ||
| do { | ||
| self.log.info("Shutting down sandbox service for \(id)") | ||
| // Shutdown and deregister the sandbox service | ||
| self.log.info("Shutting down sandbox service for \(id)") | ||
|
|
||
| let client = try state.getClient() | ||
| try await client.shutdown() | ||
| } catch { | ||
| self.log.error("failed to shutdown sandbox service for \(id): \(error)") | ||
| } | ||
| let path = self.containerRoot.appendingPathComponent(id) | ||
| let bundle = ContainerClient.Bundle(path: path) | ||
| let config = try bundle.configuration | ||
| let label = Self.fullLaunchdServiceLabel( | ||
| runtimeName: config.runtimeHandler, | ||
| instanceId: id | ||
| ) | ||
|
|
||
| let client = try state.getClient() | ||
| try await client.shutdown() | ||
|
Comment on lines
+464
to
+465
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I still think shutdown shouldn't be fatal and we can leave what we had as it's mostly best effort cleanup on the sb services end, I just meant to not ignore all of the errors like what you had in your original change.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We're in a weird spot if deregistration fails though. Curious on your thoughts on how to handle that.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ahh, If deregister fails, we could catch it and attempt kill SIGKILL + deregister as a fallback? |
||
|
|
||
| // Deregister the service, launchd will terminate the process | ||
| try ServiceManager.deregister(fullServiceLabel: label) | ||
| self.log.info("Deregistered sandbox service for \(id)") | ||
|
|
||
| state.snapshot.status = .stopped | ||
| state.snapshot.networks = [] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -272,15 +272,6 @@ public actor SandboxService { | |
| case .created, .stopped(_), .stopping: | ||
| await self.setState(.shuttingDown) | ||
|
|
||
| Task { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it that containers won't ever stop gracefully anymore? To ensure containers and attached volumes preserve written data don't we want something like:
This isn't unlike what launchd does for macOS services
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Truthfully we should ensure the container (singular as that's all we support currently, but can be expanded to whatever the N is) is dead in shutdown. If the sandbox svc is still servicing any containers we should return an error. Shutdown should solely be a final little handshake to cleanup some possible resources before getting killed.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We should return an error for what? That the container cannot be stopped? What should the UX be for How best do we handle cases where a container (or pod) workload processes don't shut down in a timely manner? If
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No, if there are still containers running in the handler for |
||
| do { | ||
| try await Task.sleep(for: .seconds(5)) | ||
| } catch { | ||
| self.log.error("failed to sleep before shutting down SandboxService: \(error)") | ||
| } | ||
| self.log.info("Shutting down SandboxService") | ||
| exit(0) | ||
| } | ||
| default: | ||
| throw ContainerizationError( | ||
| .invalidState, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: more concise here and on L155: