diff --git a/cloud-init/user-data.yaml.tpl b/cloud-init/user-data.yaml.tpl index 1bb3b4f..73ef206 100644 --- a/cloud-init/user-data.yaml.tpl +++ b/cloud-init/user-data.yaml.tpl @@ -123,6 +123,27 @@ write_files: [Install] WantedBy=timers.target + - path: /etc/systemd/system/forgejo-reboot.service + content: | + [Unit] + Description=Apply staged COS updates by rebooting + + [Service] + Type=oneshot + ExecStart=/sbin/shutdown -r +0 + + - path: /etc/systemd/system/forgejo-reboot.timer + content: | + [Unit] + Description=Nightly reboot (lands 30 min after Watchtower so container updates apply first) + + [Timer] + OnCalendar=*-*-* 04:30:00 + Persistent=true + + [Install] + WantedBy=timers.target + runcmd: - mkdir -p /mnt/disks/forgejo-data - if ! blkid /dev/disk/by-id/google-forgejo-data; then mkfs.ext4 -F /dev/disk/by-id/google-forgejo-data; fi @@ -130,3 +151,4 @@ runcmd: - mkdir -p /mnt/disks/forgejo-data/forgejo /mnt/disks/forgejo-data/caddy - systemctl enable --now forgejo-stack.service - systemctl enable --now forgejo-backup.timer + - systemctl enable --now forgejo-reboot.timer diff --git a/docs/runbook.md b/docs/runbook.md index 8cef969..c75e3ca 100644 --- a/docs/runbook.md +++ b/docs/runbook.md @@ -39,6 +39,30 @@ Single container only: docker restart forgejo ``` +## How updates work + +| Layer | Mechanism | Schedule | +|---|---|---| +| Host OS (COS) | `cos-update-strategy=update_enabled` stages updates onto the inactive A/B partition; reboot applies them. | Applied on the nightly reboot below. | +| Forgejo & Caddy patch updates | Watchtower pulls new image digests for the pinned tags (`forgejo:11`, `caddy:2-alpine`). | 04:00 UTC daily (inside the watchtower container; cron `0 0 4 * * *`). | +| Forgejo major version (e.g. 11→12) | Bump `var.forgejo_image` in tfvars and `terraform apply` — VM is replaced, data disk persists, first boot runs DB migrations. | Manual / deliberate. | +| Watchtower itself | Pinned at `containrrr/watchtower` (no tag = `latest`), self-updates with `--cleanup`. | 04:00 UTC daily. | +| Backups | `forgejo-backup.service` via timer. | 03:30 UTC daily. | +| Reboot to apply COS updates | `forgejo-reboot.service` runs `shutdown -r +0`. Containers come back via `forgejo-stack.service` + `--restart=unless-stopped`. | 04:30 UTC daily. ~30–60s downtime. | + +Tonight's order: backup at 03:30 → container update check at 04:00 → reboot at 04:30. Backups always land before any reboot, so a bad update can be rolled back from GCS. + +### Disable the nightly reboot + +If the reboot ever causes trouble, turn it off without affecting backups or container updates: + +```bash +gcloud compute ssh forgejo --zone=us-east1-b --tunnel-through-iap \ + --command='sudo systemctl disable --now forgejo-reboot.timer' +``` + +Re-enable with `enable --now` instead of `disable --now`. Cloud-init will re-enable it on the next VM replacement regardless. + ## Update containers immediately Watchtower pulls new images at 04:00 UTC by default. To force now: @@ -106,5 +130,5 @@ Rotating `SECRET_KEY` invalidates 2FA and some encrypted DB fields. Read the For ## Cost / billing watch -- Set a project budget alert at $10/month in Cloud Billing (manual; not in Terraform by design — the budget API requires the billing-account-admin role). +- A $10/month project budget is managed by `terraform/budget.tf`. Email alerts at 50%, 90%, 100% (current spend) and 100% (forecasted) go to `admin_email`. Adjust the threshold via `budget_amount_usd` in tfvars. - Skim the billing report monthly. Egress is the most likely surprise. diff --git a/terraform/budget.tf b/terraform/budget.tf new file mode 100644 index 0000000..3b31b28 --- /dev/null +++ b/terraform/budget.tf @@ -0,0 +1,51 @@ +resource "google_project_service" "billingbudgets" { + service = "billingbudgets.googleapis.com" + disable_on_destroy = false +} + +resource "google_monitoring_notification_channel" "email" { + display_name = "Forgejo budget alerts" + type = "email" + labels = { + email_address = var.admin_email + } +} + +resource "google_billing_budget" "forgejo" { + billing_account = var.billing_account + display_name = "Forgejo project (${var.project_id})" + + budget_filter { + projects = ["projects/${var.project_id}"] + } + + amount { + specified_amount { + currency_code = "USD" + units = tostring(var.budget_amount_usd) + } + } + + threshold_rules { + threshold_percent = 0.5 + } + threshold_rules { + threshold_percent = 0.9 + } + threshold_rules { + threshold_percent = 1.0 + } + threshold_rules { + threshold_percent = 1.0 + spend_basis = "FORECASTED_SPEND" + } + + all_updates_rule { + monitoring_notification_channels = [ + google_monitoring_notification_channel.email.id, + ] + disable_default_iam_recipients = false + } + + depends_on = [google_project_service.billingbudgets] +} diff --git a/terraform/variables.tf b/terraform/variables.tf index 292e2bd..9f3e81d 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -22,7 +22,18 @@ variable "domain" { variable "admin_email" { type = string - description = "Google account that gets IAP SSH access" + description = "Google account that gets IAP SSH access and budget alert emails" +} + +variable "billing_account" { + type = string + description = "Billing account ID (format: XXXXXX-XXXXXX-XXXXXX) for the budget alert" +} + +variable "budget_amount_usd" { + type = number + default = 10 + description = "Monthly budget in USD; alerts fire at 50%, 90%, 100% of this" } variable "forgejo_image" { diff --git a/terraform/versions.tf b/terraform/versions.tf index 23190bd..dc87d67 100644 --- a/terraform/versions.tf +++ b/terraform/versions.tf @@ -10,7 +10,9 @@ terraform { } provider "google" { - project = var.project_id - region = var.region - zone = var.zone + project = var.project_id + region = var.region + zone = var.zone + user_project_override = true + billing_project = var.project_id }