From 8e575cab6ed3dcc3401c2db1ae8b69217a15b78f Mon Sep 17 00:00:00 2001 From: chris bell Date: Fri, 5 Jun 2026 17:27:05 -0500 Subject: [PATCH] Sanity check to make sure servers stop gracefully if daemon closes --- daemon.go | 74 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 7 deletions(-) diff --git a/daemon.go b/daemon.go index f1d651b..a59fab6 100644 --- a/daemon.go +++ b/daemon.go @@ -7,18 +7,15 @@ import ( "os" "os/signal" "path/filepath" + "sync" "syscall" + "time" ) type CommandPayload struct { Command string `json:"command"` } -type DaemonServer struct { - cfg *AppConfig - procManager *ProcessManager -} - type InstanceStatusResponse struct { Name string `json:"name"` Version string `json:"version"` @@ -26,6 +23,11 @@ type InstanceStatusResponse struct { Status string `json:"status"` } +type DaemonServer struct { + cfg *AppConfig + procManager *ProcessManager +} + func StartDaemon(cfg *AppConfig) error { ds := &DaemonServer{ cfg: cfg, @@ -50,15 +52,73 @@ func StartDaemon(cfg *AppConfig) error { go func() { fmt.Printf("Engine daemon actively listening on http://%s\n", cfg.Daemon.ListenAddress) if err := server.ListenAndServe(); err != http.ErrServerClosed { - fmt.Errorf("Daemon runtime failure: %v", err) + fmt.Printf("Daemon runtime failure: %v\n", err) } }() <-sigChan - fmt.Println("\nShutting down supervisor daemon threads...") + fmt.Println("\n[Daemon] Shutdown signal caught! Initializing graceful teardown sequence...") + _ = server.Close() + ds.shutdownAllRunningServers() + fmt.Println("[Daemon] All threads gracefully shut down. Exiting supervisor cleanly.") return nil } +func (ds *DaemonServer) shutdownAllRunningServers() { + ds.procManager.Lock() + + var activeNames []string + for name := range ds.procManager.ActiveInstances { + activeNames = append(activeNames, name) + } + ds.procManager.Unlock() + + if len(activeNames) == 0 { + fmt.Println("[Daemon] No active server instances to tear down.") + return + } + + fmt.Printf("[Daemon] Flushing stop instructions to %d running instance(s)...\n", len(activeNames)) + + var wg sync.WaitGroup + for _, name := range activeNames { + wg.Add(1) + go func(instanceName string) { + defer wg.Done() + + fmt.Printf("[Daemon] Sending graceful /stop to instance '%s'...\n", instanceName) + err := ds.procManager.SendCommand(instanceName, "/stop") + if err != nil { + fmt.Printf("[Daemon Error] Could not send stop to %s: %v\n", instanceName, err) + return + } + + ticker := time.NewTicker(250 * time.Millisecond) + defer ticker.Stop() + timeout := time.After(15 * time.Second) + + for { + select { + case <-ticker.C: + ds.procManager.RLock() + _, running := ds.procManager.ActiveInstances[instanceName] + ds.procManager.RUnlock() + + if !running { + fmt.Printf("[Daemon] Instance '%s' has successfully exited.\n", instanceName) + return + } + case <-timeout: + fmt.Printf("[Daemon Warning] Instance '%s' timed out while trying to stop safely.\n", instanceName) + return + } + } + }(name) + } + + wg.Wait() +} + func (ds *DaemonServer) handleCreate(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodPost { http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)