Sanity check to make sure servers stop gracefully if daemon closes

This commit is contained in:
2026-06-05 17:27:05 -05:00
parent d29d51cede
commit 8e575cab6e

View File

@@ -7,18 +7,15 @@ import (
"os" "os"
"os/signal" "os/signal"
"path/filepath" "path/filepath"
"sync"
"syscall" "syscall"
"time"
) )
type CommandPayload struct { type CommandPayload struct {
Command string `json:"command"` Command string `json:"command"`
} }
type DaemonServer struct {
cfg *AppConfig
procManager *ProcessManager
}
type InstanceStatusResponse struct { type InstanceStatusResponse struct {
Name string `json:"name"` Name string `json:"name"`
Version string `json:"version"` Version string `json:"version"`
@@ -26,6 +23,11 @@ type InstanceStatusResponse struct {
Status string `json:"status"` Status string `json:"status"`
} }
type DaemonServer struct {
cfg *AppConfig
procManager *ProcessManager
}
func StartDaemon(cfg *AppConfig) error { func StartDaemon(cfg *AppConfig) error {
ds := &DaemonServer{ ds := &DaemonServer{
cfg: cfg, cfg: cfg,
@@ -50,15 +52,73 @@ func StartDaemon(cfg *AppConfig) error {
go func() { go func() {
fmt.Printf("Engine daemon actively listening on http://%s\n", cfg.Daemon.ListenAddress) fmt.Printf("Engine daemon actively listening on http://%s\n", cfg.Daemon.ListenAddress)
if err := server.ListenAndServe(); err != http.ErrServerClosed { if err := server.ListenAndServe(); err != http.ErrServerClosed {
fmt.Errorf("Daemon runtime failure: %v", err) fmt.Printf("Daemon runtime failure: %v\n", err)
} }
}() }()
<-sigChan <-sigChan
fmt.Println("\nShutting down supervisor daemon threads...") fmt.Println("\n[Daemon] Shutdown signal caught! Initializing graceful teardown sequence...")
_ = server.Close()
ds.shutdownAllRunningServers()
fmt.Println("[Daemon] All threads gracefully shut down. Exiting supervisor cleanly.")
return nil return nil
} }
func (ds *DaemonServer) shutdownAllRunningServers() {
ds.procManager.Lock()
var activeNames []string
for name := range ds.procManager.ActiveInstances {
activeNames = append(activeNames, name)
}
ds.procManager.Unlock()
if len(activeNames) == 0 {
fmt.Println("[Daemon] No active server instances to tear down.")
return
}
fmt.Printf("[Daemon] Flushing stop instructions to %d running instance(s)...\n", len(activeNames))
var wg sync.WaitGroup
for _, name := range activeNames {
wg.Add(1)
go func(instanceName string) {
defer wg.Done()
fmt.Printf("[Daemon] Sending graceful /stop to instance '%s'...\n", instanceName)
err := ds.procManager.SendCommand(instanceName, "/stop")
if err != nil {
fmt.Printf("[Daemon Error] Could not send stop to %s: %v\n", instanceName, err)
return
}
ticker := time.NewTicker(250 * time.Millisecond)
defer ticker.Stop()
timeout := time.After(15 * time.Second)
for {
select {
case <-ticker.C:
ds.procManager.RLock()
_, running := ds.procManager.ActiveInstances[instanceName]
ds.procManager.RUnlock()
if !running {
fmt.Printf("[Daemon] Instance '%s' has successfully exited.\n", instanceName)
return
}
case <-timeout:
fmt.Printf("[Daemon Warning] Instance '%s' timed out while trying to stop safely.\n", instanceName)
return
}
}
}(name)
}
wg.Wait()
}
func (ds *DaemonServer) handleCreate(w http.ResponseWriter, r *http.Request) { func (ds *DaemonServer) handleCreate(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost { if r.Method != http.MethodPost {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)