From db56fc58de471bebd182059680c8737442547630 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20M=C3=B6rling?= Date: Thu, 21 May 2026 16:39:01 +0200 Subject: [PATCH] Add deploy failure diagnostics and safer backend health check. Production deploy failed with no backend logs before rollback. Print backend and postgres logs on failure, wait longer for JVM startup, and probe /api/payment/swish-info instead of vehicle lookup (no external scrape). - Document proof-first troubleshooting in README - No volume reset workflow; fix only after reading job logs --- .forgejo/workflows/deploy.yml | 21 +++++++++++++++++---- README.md | 14 ++++++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/.forgejo/workflows/deploy.yml b/.forgejo/workflows/deploy.yml index 8ff68af..9bc878b 100644 --- a/.forgejo/workflows/deploy.yml +++ b/.forgejo/workflows/deploy.yml @@ -64,12 +64,12 @@ jobs: - name: Health checks with rollback run: | echo "Waiting for services to start..." - sleep 20 + sleep 30 BACKEND_OK=false - for i in 1 2 3 4 5; do + for i in 1 2 3 4 5 6 7 8 9 10; do if docker run --rm --network bilhej-prod_default curlimages/curl:8.5.0 \ - -s http://bilhej-backend-prod:8080/api/vehicles/ABC123 > /dev/null; then + -sf http://bilhej-backend-prod:8080/api/payment/swish-info > /dev/null; then echo "Backend is healthy" BACKEND_OK=true break @@ -93,12 +93,25 @@ jobs: if [ "$BACKEND_OK" != "true" ] || [ "$FRONTEND_OK" != "true" ]; then echo "" echo "═══════════════════════════════════════════════════" - echo " HEALTH CHECK FAILED — ROLLING BACK DEPLOYMENT" + echo " HEALTH CHECK FAILED — DIAGNOSTICS" + echo "═══════════════════════════════════════════════════" + echo "" + docker compose -p bilhej-prod -f docker-compose.prod.yml ps + echo "" + echo "--- Backend logs ---" + docker logs bilhej-backend-prod 2>&1 | tail -80 || true + echo "" + echo "--- Postgres logs ---" + docker logs bilhej-postgres-prod 2>&1 | tail -30 || true + echo "" + echo "═══════════════════════════════════════════════════" + echo " ROLLING BACK DEPLOYMENT" echo "═══════════════════════════════════════════════════" echo "" docker compose -p bilhej-prod -f docker-compose.prod.yml down echo "" echo "Rolled back. Containers stopped. DB volume preserved." + echo "Read Backend logs above to find the root cause before redeploying." exit 1 fi diff --git a/README.md b/README.md index c3fb243..e4f2a66 100644 --- a/README.md +++ b/README.md @@ -311,6 +311,20 @@ Before the first deploy, complete these steps on the production server (`srvr.nu 3. Enter a version tag (e.g., `v0.1.0`). 4. Click **Run workflow**. +### Deploy failed (backend health check) + +If the job passes the frontend check but the backend never becomes healthy: + +1. Open the failed job log and read **Backend logs** (printed before rollback). +2. Match the error to a fix — do not guess: + - **`password authentication failed`** — DB credentials in the running stack do not match + what Postgres was initialized with; fix credentials or Postgres password to match (only + wipe the volume if you accept losing prod data). + - **`Production requires ADMIN_EMAIL and ADMIN_PASSWORD`** — add those Forgejo secrets. + - **Flyway / migration errors** — fix schema or migration history before redeploying. +3. **DBeaver from your laptop** — prod Postgres binds to `127.0.0.1:5433` on the server only. + Use an SSH tunnel, then host `localhost` port `5433` (not `192.168.0.59` directly). + ### What Happens | Step | Action |