1
0
Fork 0

initial commit

Signed-off-by: Jason Hall <imjasonh@gmail.com>
This commit is contained in:
Jason Hall 2026-05-07 20:02:59 -04:00
commit 4dc1b58f2f
20 changed files with 1398 additions and 0 deletions

17
.gitignore vendored Normal file
View file

@ -0,0 +1,17 @@
# Terraform
**/.terraform/
**/.terraform.lock.hcl
*.tfstate
*.tfstate.*
*.tfplan
crash.log
crash.*.log
# Local variable files (may contain project IDs, domains, etc.)
*.tfvars
# Editor/OS
.DS_Store
.idea/
.vscode/
*.swp

43
README.md Normal file
View file

@ -0,0 +1,43 @@
# forge
Self-hosted [Forgejo](https://forgejo.org/) on GCP. e2-micro VM on Container-Optimized OS, Caddy for HTTPS, IAP for admin SSH, nightly backups to GCS. Targets ~$35/month.
See [plan.md](plan.md) for the full design rationale.
## Quick start
```bash
# 1. Set the active project, enable APIs
gcloud config set project YOUR_PROJECT
gcloud services enable compute.googleapis.com secretmanager.googleapis.com \
iap.googleapis.com storage.googleapis.com
# 2. Generate the Forgejo secrets in Secret Manager (one-time)
./scripts/bootstrap-secrets.sh
# 3. Configure Terraform (terraform.tfvars is gitignored)
$EDITOR terraform/terraform.tfvars
# 4. Apply
cd terraform
terraform init
terraform apply
```
Point your domain's A record at the `static_ip` output, then visit `https://<your-domain>` to run the Forgejo installer.
## Day two
- [docs/runbook.md](docs/runbook.md) — admin SSH, container ops, backups, upgrades
- [docs/disaster-recovery.md](docs/disaster-recovery.md) — recovery scenarios
- [plan.md](plan.md) — full design, cost breakdown, security checklist
## Layout
```
terraform/ GCP infrastructure (VM, network, IAM, GCS, optional DNS)
cloud-init/ user-data.yaml.tpl — systemd units that boot Forgejo + Caddy + Watchtower
config/ Caddyfile template (reference; the live copy is embedded in cloud-init)
scripts/ bootstrap-secrets.sh, backup.sh, restore.sh, test-restore.sh
docs/ runbook + disaster recovery
```

View file

@ -0,0 +1,132 @@
#cloud-config
# Notes on Container-Optimized OS (COS):
# - /var is mounted noexec, so executable scripts must live under /var/lib/google
# (one of the exec-allowed writable paths on COS).
# - Mount units use systemd-escape(1) naming: /mnt/disks/forgejo-data becomes
# mnt-disks-forgejo\x2ddata.mount. We avoid hardcoding the escaped name in
# dependencies by using RequiresMountsFor=, which lets systemd resolve it.
write_files:
- path: /etc/systemd/system/mnt-disks-forgejo\x2ddata.mount
content: |
[Unit]
Description=Mount Forgejo data disk
Before=docker.service
[Mount]
What=/dev/disk/by-id/google-forgejo-data
Where=/mnt/disks/forgejo-data
Type=ext4
Options=defaults,nofail
[Install]
WantedBy=multi-user.target
- path: /var/lib/forgejo/Caddyfile
content: |
${domain} {
reverse_proxy forgejo:3000
encode gzip
}
- path: /var/lib/google/forgejo/fetch-secrets.sh
permissions: '0755'
content: |
#!/bin/bash
set -euo pipefail
TOKEN=$(curl -sf -H "Metadata-Flavor: Google" \
"http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" \
| python3 -c "import sys,json;print(json.load(sys.stdin)['access_token'])")
fetch() {
curl -sf -H "Authorization: Bearer $TOKEN" \
"https://secretmanager.googleapis.com/v1/projects/${project_id}/secrets/$1/versions/latest:access" \
| python3 -c "import sys,json,base64;print(base64.b64decode(json.load(sys.stdin)['payload']['data']).decode())"
}
mkdir -p /run
umask 077
{
echo "FORGEJO__security__SECRET_KEY=$(fetch forgejo-secret-key)"
echo "FORGEJO__security__INTERNAL_TOKEN=$(fetch forgejo-internal-token)"
} > /run/forgejo-secrets.env
- path: /etc/systemd/system/forgejo-stack.service
content: |
[Unit]
Description=Forgejo + Caddy + Watchtower
After=network-online.target docker.service
RequiresMountsFor=/mnt/disks/forgejo-data
Wants=network-online.target
[Service]
Type=oneshot
RemainAfterExit=true
ExecStartPre=/var/lib/google/forgejo/fetch-secrets.sh
ExecStartPre=-/usr/bin/docker network create web
ExecStart=/usr/bin/docker run -d --name caddy --network web \
-p 80:80 -p 443:443 \
-v /mnt/disks/forgejo-data/caddy:/data \
-v /var/lib/forgejo/Caddyfile:/etc/caddy/Caddyfile:ro \
--restart=unless-stopped \
${caddy_image}
ExecStart=/usr/bin/docker run -d --name forgejo --network web \
-e FORGEJO__server__DISABLE_SSH=true \
-e FORGEJO__server__ROOT_URL=https://${domain}/ \
-e FORGEJO__service__DISABLE_REGISTRATION=true \
-e FORGEJO__database__DB_TYPE=sqlite3 \
--env-file /run/forgejo-secrets.env \
-v /mnt/disks/forgejo-data/forgejo:/data \
--restart=unless-stopped \
${forgejo_image}
ExecStart=/usr/bin/docker run -d --name watchtower \
-v /var/run/docker.sock:/var/run/docker.sock \
--restart=unless-stopped \
containrrr/watchtower --cleanup --schedule "0 0 4 * * *"
ExecStop=/usr/bin/docker stop watchtower forgejo caddy
[Install]
WantedBy=multi-user.target
- path: /var/lib/google/forgejo/backup.sh
permissions: '0755'
content: |
#!/bin/bash
set -euo pipefail
STAMP=$(date -u +%Y%m%dT%H%M%SZ)
docker exec forgejo sqlite3 /data/gitea/gitea.db ".backup '/data/gitea/snapshot.db'"
tar czf /tmp/forgejo-$STAMP.tar.gz -C /mnt/disks/forgejo-data forgejo
docker run --rm -v /tmp:/tmp google/cloud-sdk:slim \
gsutil cp /tmp/forgejo-$STAMP.tar.gz gs://${gcs_backup_bucket}/
rm /tmp/forgejo-$STAMP.tar.gz
docker exec forgejo rm -f /data/gitea/snapshot.db
- path: /etc/systemd/system/forgejo-backup.service
content: |
[Unit]
Description=Backup Forgejo to GCS
After=forgejo-stack.service
Requires=forgejo-stack.service
[Service]
Type=oneshot
ExecStart=/var/lib/google/forgejo/backup.sh
- path: /etc/systemd/system/forgejo-backup.timer
content: |
[Unit]
Description=Nightly Forgejo backup
[Timer]
OnCalendar=*-*-* 03:30:00
Persistent=true
[Install]
WantedBy=timers.target
runcmd:
- mkdir -p /mnt/disks/forgejo-data
- if ! blkid /dev/disk/by-id/google-forgejo-data; then mkfs.ext4 -F /dev/disk/by-id/google-forgejo-data; fi
- systemctl daemon-reload
- mkdir -p /mnt/disks/forgejo-data/forgejo /mnt/disks/forgejo-data/caddy
- systemctl enable --now forgejo-stack.service
- systemctl enable --now forgejo-backup.timer

4
config/Caddyfile.tpl Normal file
View file

@ -0,0 +1,4 @@
${domain} {
reverse_proxy forgejo:3000
encode gzip
}

101
docs/disaster-recovery.md Normal file
View file

@ -0,0 +1,101 @@
# Disaster recovery
What to do when things go wrong, in rough order of severity.
## Pre-requisite: verify backups are real
Before you need them. Run monthly:
```bash
./scripts/test-restore.sh
```
This pulls the latest GCS backup, boots Forgejo against it in a throwaway local container, and probes the API. If it fails, fix backups before you have an actual incident.
## VM is unreachable but the disk is fine
Symptoms: Forgejo doesn't load, `gcloud compute ssh ... --tunnel-through-iap` times out, but `forgejo-data` disk and `forgejo-ip` static IP both still exist.
Recovery:
```bash
cd terraform
terraform apply -replace=google_compute_instance.forgejo
```
The data disk has `prevent_destroy = true` and is reattached; cloud-init re-bootstraps the stack against the existing data. The static IP is preserved, so DNS keeps working.
## Persistent disk is corrupted or accidentally deleted
1. (If still present and corrupt) remove `prevent_destroy` from `google_compute_disk.forgejo_data`, then `terraform apply` to destroy and recreate. **Re-add `prevent_destroy` immediately afterward.**
2. SSH to the VM.
3. `sudo /var/lib/forgejo/restore.sh <latest-backup>.tar.gz` — restores from GCS into the fresh disk.
## Whole GCP project is lost
Worst case, but recoverable from GCS-side backups *if* you copied them out before deleting the project.
1. **Before deleting the old project**: copy the latest backup to durable storage you control.
```bash
gsutil cp gs://OLD_PROJECT-forgejo-backups/forgejo-LATEST.tar.gz ~/Backups/
```
2. Create a new GCP project, enable APIs.
3. `./scripts/bootstrap-secrets.sh` — this generates *new* `SECRET_KEY` and `INTERNAL_TOKEN`. If you saved the originals to a password manager, manually upload those instead so encrypted DB fields survive (see below).
4. Update `project_id` in `terraform.tfvars`.
5. `terraform apply`.
6. Upload the saved tarball to the new bucket: `gsutil cp ~/Backups/forgejo-LATEST.tar.gz gs://NEW_PROJECT-forgejo-backups/`.
7. SSH to the VM and run `restore.sh`.
### Preserving SECRET_KEY across projects
Forgejo uses `SECRET_KEY` to encrypt some DB fields (2FA tokens, OAuth tokens, mirror credentials). Rotating it leaves repos and accounts intact but breaks those features.
For bit-exact recovery, save the secrets to a password manager when you first create them:
```bash
gcloud secrets versions access latest --secret=forgejo-secret-key
gcloud secrets versions access latest --secret=forgejo-internal-token
```
To restore them in a new project, *skip* `bootstrap-secrets.sh` and create the secrets manually with the saved values:
```bash
echo -n "OLD_SECRET_KEY_VALUE" | gcloud secrets create forgejo-secret-key \
--replication-policy=automatic --data-file=-
echo -n "OLD_INTERNAL_TOKEN_VALUE" | gcloud secrets create forgejo-internal-token \
--replication-policy=automatic --data-file=-
```
## Backup itself is corrupt
This is what `scripts/test-restore.sh` exists to catch *before* an incident.
If the latest is corrupt, list older versions:
```bash
gsutil ls -l gs://YOUR_PROJECT-forgejo-backups/
```
Backups are kept 30 days (lifecycle rule in `backups.tf`). Within that window, fall back to an earlier nightly tarball.
If all backups in the bucket are corrupt: there is no recovery beyond what's still on the data disk. This is why monthly verification matters.
## Domain / DNS lost
The static IP (`google_compute_address.forgejo`) is reserved separately from the VM and persists across VM replacements. You only lose it if you `terraform destroy` or manually release it.
To re-point: set your registrar's A record (or Cloud DNS if `manage_dns = true`) to the value of `terraform output static_ip`.
Caddy will re-issue a Let's Encrypt cert automatically once DNS resolves and ports 80/443 are reachable. ACME state lives in the data disk (`/mnt/disks/forgejo-data/caddy`), so existing certs survive VM replacements within their validity period.
## Compromise / suspected intrusion
1. Cut public network access immediately:
```bash
gcloud compute firewall-rules update allow-https --disabled
```
(Or `terraform` it: temporarily set `source_ranges` to your IP only.)
2. SSH in via IAP, snapshot evidence: `docker logs forgejo > /tmp/forensics.log`, copy `/mnt/disks/forgejo-data/forgejo` aside.
3. Rotate every secret: `forgejo-secret-key`, `forgejo-internal-token`, all Forgejo user passwords + PATs, your Google account password.
4. Review `gcloud logging read 'resource.type=gce_instance'` for unexpected access.
5. If unsure of the compromise vector, treat the disk as tainted: nuke the VM and restore from a backup taken *before* the suspected breach.

110
docs/runbook.md Normal file
View file

@ -0,0 +1,110 @@
# Runbook
Common operations against the running Forgejo VM.
## Admin SSH
Public port 22 is closed. Use IAP tunneling:
```bash
gcloud compute ssh forgejo --zone=us-east1-b --tunnel-through-iap
```
Your Google account needs:
- `roles/iap.tunnelResourceAccessor` on the instance (granted by Terraform via `var.admin_email`)
- `roles/compute.osLogin` on the project (same)
- 2FA on the Google account (manual, but strongly recommended — IAP is only as strong as your login)
## Inspect the stack
```bash
docker ps # caddy, forgejo, watchtower expected
docker logs --tail 200 forgejo
docker logs --tail 200 caddy
docker logs --tail 200 watchtower
journalctl -u forgejo-stack.service -n 200
journalctl -u forgejo-backup.service -n 50
systemctl list-timers forgejo-backup.timer
```
## Restart the stack
```bash
sudo systemctl restart forgejo-stack.service
```
Single container only:
```bash
docker restart forgejo
```
## Update containers immediately
Watchtower pulls new images at 04:00 UTC by default. To force now:
```bash
docker exec watchtower kill -s SIGHUP 1
# or, manually:
docker pull codeberg.org/forgejo/forgejo:11
sudo systemctl restart forgejo-stack.service
```
## Run a backup on demand
```bash
sudo /var/lib/google/forgejo/backup.sh
gsutil ls gs://YOUR_PROJECT-forgejo-backups/
```
## Restore from a backup
`scripts/restore.sh` is in the repo, not on the VM. Copy it over and run:
```bash
gcloud compute scp scripts/restore.sh forgejo:/tmp/restore.sh \
--zone=us-east1-b --tunnel-through-iap
gcloud compute ssh forgejo --zone=us-east1-b --tunnel-through-iap \
--command='sudo bash /tmp/restore.sh forgejo-20260507T033000Z.tar.gz'
```
For a clean-environment dry run, use `scripts/test-restore.sh` from your workstation — it pulls the latest backup, boots Forgejo against it in a throwaway container, and probes the API.
## Forgejo major version upgrade
1. Read the [release notes](https://codeberg.org/forgejo/forgejo/releases) for breaking changes.
2. Take a manual backup (`sudo /var/lib/google/forgejo/backup.sh`).
3. Bump `forgejo_image` in `terraform.tfvars` (e.g. `codeberg.org/forgejo/forgejo:12`).
4. `terraform apply` — replaces the VM. The data disk persists; first boot runs DB migrations.
5. Watch `docker logs forgejo` to confirm migrations and startup.
## Resize the data disk
GCP supports online disk growth:
```bash
gcloud compute disks resize forgejo-data --zone=us-east1-b --size=40
```
Then on the VM:
```bash
sudo resize2fs /dev/disk/by-id/google-forgejo-data
```
Update `size = 40` in `terraform/main.tf` afterward to keep state in sync.
## Rotate secrets
```bash
# Add a new version (the latest is read at boot):
openssl rand -hex 32 | gcloud secrets versions add forgejo-secret-key --data-file=-
sudo systemctl restart forgejo-stack.service
```
Rotating `SECRET_KEY` invalidates 2FA and some encrypted DB fields. Read the Forgejo docs before rotating.
## Cost / billing watch
- Set a project budget alert at $10/month in Cloud Billing (manual; not in Terraform by design — the budget API requires the billing-account-admin role).
- Skim the billing report monthly. Egress is the most likely surprise.

613
plan.md Normal file
View file

@ -0,0 +1,613 @@
# Self-Hosted Forgejo on GCP: Complete Plan
A declarative, low-cost, low-maintenance plan for running a personal Forgejo instance on Google Cloud Platform using Container-Optimized OS, Caddy for HTTPS, and IAP for admin access.
## Goals and constraints
- **Cost**: minimize monthly spend; target ~$24/month
- **Maintenance**: minimal ongoing effort; OS and app patches should apply automatically
- **Security**: minimal attack surface; no public SSH; principle of least privilege for service accounts
- **Reproducibility**: entire stack defined in code; `terraform apply` from a clean project produces a working instance
- **Personal scale**: low traffic, single user, occasional pushes
## Architectural decisions
| Decision | Choice | Rationale |
|---|---|---|
| Compute | e2-micro VM in us-west1, us-central1, or us-east1 | Always-free tier covers the full month |
| OS | Container-Optimized OS (COS) | Read-only root, automatic patching by Google, minimal attack surface, container-first |
| Database | SQLite on persistent disk | Free, sufficient for personal scale, simple to back up |
| Repo storage | Local persistent disk | Fast, reliable, survives VM replacement |
| TLS | Caddy with Let's Encrypt | Auto-renewing certs with one-line config |
| Git access | HTTPS only with personal access token | No SSH port conflicts, no client-side gcloud setup |
| Admin SSH | IAP TCP forwarding | Public port 22 closed; SSH via authenticated Google tunnel |
| App updates | Watchtower with pinned major version tag | Patch updates automatic; major upgrades deliberate |
| OS updates | COS auto-update | Google manages OS patching |
| Backups | Nightly SQLite snapshot + repo tarball to GCS | Survives disk loss, accidental deletion, region failure |
| Secrets | Google Secret Manager, fetched at boot | Out of Terraform state, out of git, encrypted at rest |
| Infrastructure | Terraform | Declarative, replayable, well-documented for GCP |
| VM bootstrap | cloud-init via instance metadata | Native COS support, idempotent on VM replacement |
## Cost estimate
| Item | Monthly cost |
|---|---|
| e2-micro VM (always-free region) | $0 |
| 30 GB standard persistent disk (boot + data combined under 30 GB free tier) | $0 |
| Static external IP attached to running VM | ~$2.92 |
| GCS storage for backups (~1 GB, 30-day retention) | ~$0.05 |
| Secret Manager (2 secrets, low access volume) | ~$0.06 |
| Cloud DNS (optional; can use registrar's DNS) | $0.20 or $0 |
| Egress beyond 1 GB free | $02 depending on usage |
| **Total** | **~$35/month** |
Set a billing budget alert at $10/month to catch surprises early. GCP has no hard spending limit.
## Network exposure
| Port | Protocol | Source | Purpose |
|---|---|---|---|
| 80 | TCP | 0.0.0.0/0 | Caddy HTTP → HTTPS redirect, ACME HTTP-01 challenge |
| 443 | TCP | 0.0.0.0/0 | Caddy HTTPS → Forgejo |
| 22 | TCP | 35.235.240.0/20 (IAP only) | Admin SSH via IAP tunnel |
| All others | — | — | Default deny |
## Repository layout
```
forgejo-infra/
├── terraform/
│ ├── main.tf # VM, disk, instance config
│ ├── network.tf # Firewall rules, static IP
│ ├── iam.tf # Service account, IAP bindings
│ ├── secrets.tf # Secret Manager references (values out-of-band)
│ ├── backups.tf # GCS bucket, lifecycle rules
│ ├── dns.tf # Optional Cloud DNS record
│ ├── variables.tf
│ ├── outputs.tf
│ └── versions.tf
├── cloud-init/
│ └── user-data.yaml.tpl # Systemd units, container startup, backup timer
├── config/
│ └── Caddyfile.tpl # TLS reverse proxy config
├── scripts/
│ ├── bootstrap-secrets.sh # One-time: generate and upload secrets
│ ├── backup.sh # Run on VM via systemd timer
│ ├── restore.sh # Manual recovery from GCS tarball
│ └── test-restore.sh # Verify a backup is restorable
├── docs/
│ ├── runbook.md # Common operations, troubleshooting
│ └── disaster-recovery.md # Step-by-step recovery procedures
├── .gitignore
└── README.md
```
## Terraform: key resources
### main.tf
```hcl
resource "google_compute_disk" "forgejo_data" {
name = "forgejo-data"
type = "pd-standard"
size = 20
zone = var.zone
lifecycle { prevent_destroy = true }
}
resource "google_compute_instance" "forgejo" {
name = "forgejo"
machine_type = "e2-micro"
zone = var.zone
tags = ["forgejo"]
boot_disk {
initialize_params {
image = "cos-cloud/cos-stable"
size = 10
type = "pd-standard"
}
}
attached_disk {
source = google_compute_disk.forgejo_data.id
device_name = "forgejo-data"
}
network_interface {
network = "default"
access_config {
nat_ip = google_compute_address.forgejo.address
}
}
metadata = {
user-data = templatefile("${path.module}/../cloud-init/user-data.yaml.tpl", {
domain = var.domain
forgejo_image = var.forgejo_image
caddy_image = var.caddy_image
gcs_backup_bucket = google_storage_bucket.backups.name
project_id = var.project_id
})
google-logging-enabled = "true"
cos-update-strategy = "update_enabled"
enable-oslogin = "TRUE"
}
service_account {
email = google_service_account.forgejo.email
scopes = ["cloud-platform"]
}
allow_stopping_for_update = true
}
```
### network.tf
```hcl
resource "google_compute_address" "forgejo" {
name = "forgejo-ip"
region = var.region
}
resource "google_compute_firewall" "https" {
name = "allow-https"
network = "default"
direction = "INGRESS"
allow {
protocol = "tcp"
ports = ["80", "443"]
}
source_ranges = ["0.0.0.0/0"]
target_tags = ["forgejo"]
}
resource "google_compute_firewall" "iap_ssh" {
name = "allow-iap-ssh"
network = "default"
direction = "INGRESS"
allow {
protocol = "tcp"
ports = ["22"]
}
source_ranges = ["35.235.240.0/20"]
target_tags = ["forgejo"]
}
```
### iam.tf
```hcl
resource "google_service_account" "forgejo" {
account_id = "forgejo-vm"
display_name = "Forgejo VM service account"
}
resource "google_secret_manager_secret_iam_member" "forgejo_secrets" {
for_each = toset(["forgejo-secret-key", "forgejo-internal-token"])
project = var.project_id
secret_id = each.value
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.forgejo.email}"
}
resource "google_storage_bucket_iam_member" "backups_writer" {
bucket = google_storage_bucket.backups.name
role = "roles/storage.objectAdmin"
member = "serviceAccount:${google_service_account.forgejo.email}"
}
resource "google_iap_tunnel_instance_iam_member" "ssh_admin" {
project = var.project_id
zone = var.zone
instance = google_compute_instance.forgejo.name
role = "roles/iap.tunnelResourceAccessor"
member = "user:${var.admin_email}"
}
resource "google_project_iam_member" "ssh_os_login" {
project = var.project_id
role = "roles/compute.osLogin"
member = "user:${var.admin_email}"
}
```
### backups.tf
```hcl
resource "google_storage_bucket" "backups" {
name = "${var.project_id}-forgejo-backups"
location = var.region
storage_class = "STANDARD"
uniform_bucket_level_access = true
lifecycle_rule {
condition { age = 30 }
action { type = "Delete" }
}
versioning { enabled = false }
}
```
### secrets.tf
```hcl
# Secrets are created out-of-band by scripts/bootstrap-secrets.sh
# This file only declares them as data sources and grants access (in iam.tf)
data "google_secret_manager_secret" "secret_key" {
secret_id = "forgejo-secret-key"
}
data "google_secret_manager_secret" "internal_token" {
secret_id = "forgejo-internal-token"
}
```
### variables.tf
```hcl
variable "project_id" { type = string }
variable "region" { type = string default = "us-central1" }
variable "zone" { type = string default = "us-central1-a" }
variable "domain" { type = string }
variable "admin_email" { type = string }
variable "forgejo_image" {
type = string
default = "codeberg.org/forgejo/forgejo:11"
}
variable "caddy_image" {
type = string
default = "caddy:2-alpine"
}
```
### outputs.tf
```hcl
output "static_ip" {
value = google_compute_address.forgejo.address
description = "Point your domain's A record at this address"
}
output "ssh_command" {
value = "gcloud compute ssh forgejo --zone=${var.zone} --tunnel-through-iap"
description = "Admin SSH via IAP tunnel"
}
```
## Cloud-init: user-data.yaml.tpl
```yaml
#cloud-config
write_files:
- path: /etc/systemd/system/forgejo-data.mount
content: |
[Unit]
Description=Mount Forgejo data disk
Before=docker.service
[Mount]
What=/dev/disk/by-id/google-forgejo-data
Where=/mnt/disks/forgejo-data
Type=ext4
Options=defaults,nofail
[Install]
WantedBy=multi-user.target
- path: /var/lib/forgejo/Caddyfile
content: |
${domain} {
reverse_proxy forgejo:3000
encode gzip
}
- path: /var/lib/forgejo/fetch-secrets.sh
permissions: '0755'
content: |
#!/bin/bash
set -euo pipefail
TOKEN=$(curl -sf -H "Metadata-Flavor: Google" \
"http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" \
| python3 -c "import sys,json;print(json.load(sys.stdin)['access_token'])")
fetch() {
curl -sf -H "Authorization: Bearer $TOKEN" \
"https://secretmanager.googleapis.com/v1/projects/${project_id}/secrets/$1/versions/latest:access" \
| python3 -c "import sys,json,base64;print(base64.b64decode(json.load(sys.stdin)['payload']['data']).decode())"
}
mkdir -p /run
umask 077
{
echo "FORGEJO__security__SECRET_KEY=$(fetch forgejo-secret-key)"
echo "FORGEJO__security__INTERNAL_TOKEN=$(fetch forgejo-internal-token)"
} > /run/forgejo-secrets.env
- path: /etc/systemd/system/forgejo-stack.service
content: |
[Unit]
Description=Forgejo + Caddy + Watchtower
After=forgejo-data.mount network-online.target docker.service
Requires=forgejo-data.mount
Wants=network-online.target
[Service]
Type=oneshot
RemainAfterExit=true
ExecStartPre=/var/lib/forgejo/fetch-secrets.sh
ExecStartPre=-/usr/bin/docker network create web
ExecStart=/usr/bin/docker run -d --name caddy --network web \
-p 80:80 -p 443:443 \
-v /mnt/disks/forgejo-data/caddy:/data \
-v /var/lib/forgejo/Caddyfile:/etc/caddy/Caddyfile:ro \
--restart=unless-stopped \
${caddy_image}
ExecStart=/usr/bin/docker run -d --name forgejo --network web \
-e FORGEJO__server__DISABLE_SSH=true \
-e FORGEJO__server__ROOT_URL=https://${domain}/ \
-e FORGEJO__service__DISABLE_REGISTRATION=true \
-e FORGEJO__database__DB_TYPE=sqlite3 \
--env-file /run/forgejo-secrets.env \
-v /mnt/disks/forgejo-data/forgejo:/data \
--restart=unless-stopped \
${forgejo_image}
ExecStart=/usr/bin/docker run -d --name watchtower \
-v /var/run/docker.sock:/var/run/docker.sock \
--restart=unless-stopped \
containrrr/watchtower --cleanup --schedule "0 0 4 * * *"
ExecStop=/usr/bin/docker stop watchtower forgejo caddy
[Install]
WantedBy=multi-user.target
- path: /var/lib/forgejo/backup.sh
permissions: '0755'
content: |
#!/bin/bash
set -euo pipefail
STAMP=$(date -u +%Y%m%dT%H%M%SZ)
BACKUP_DIR=/mnt/disks/forgejo-data/forgejo
docker exec forgejo sqlite3 /data/gitea/gitea.db ".backup '/data/gitea/snapshot.db'"
tar czf /tmp/forgejo-$STAMP.tar.gz -C /mnt/disks/forgejo-data forgejo
docker run --rm -v /tmp:/tmp google/cloud-sdk:slim \
gsutil cp /tmp/forgejo-$STAMP.tar.gz gs://${gcs_backup_bucket}/
rm /tmp/forgejo-$STAMP.tar.gz
docker exec forgejo rm -f /data/gitea/snapshot.db
- path: /etc/systemd/system/forgejo-backup.service
content: |
[Unit]
Description=Backup Forgejo to GCS
After=forgejo-stack.service
Requires=forgejo-stack.service
[Service]
Type=oneshot
ExecStart=/var/lib/forgejo/backup.sh
- path: /etc/systemd/system/forgejo-backup.timer
content: |
[Unit]
Description=Nightly Forgejo backup
[Timer]
OnCalendar=*-*-* 03:30:00
Persistent=true
[Install]
WantedBy=timers.target
runcmd:
- mkdir -p /mnt/disks/forgejo-data
- if ! blkid /dev/disk/by-id/google-forgejo-data; then mkfs.ext4 -F /dev/disk/by-id/google-forgejo-data; fi
- systemctl daemon-reload
- systemctl enable --now forgejo-data.mount
- mkdir -p /mnt/disks/forgejo-data/forgejo /mnt/disks/forgejo-data/caddy
- systemctl enable --now forgejo-stack.service
- systemctl enable --now forgejo-backup.timer
```
## Bootstrap procedure
### One-time setup (before first `terraform apply`)
1. **Create the GCP project** and enable required APIs:
```bash
gcloud services enable \
compute.googleapis.com \
secretmanager.googleapis.com \
iap.googleapis.com \
storage.googleapis.com
```
2. **Generate and upload secrets** (`scripts/bootstrap-secrets.sh`):
```bash
#!/bin/bash
set -euo pipefail
for SECRET in forgejo-secret-key forgejo-internal-token; do
if ! gcloud secrets describe "$SECRET" >/dev/null 2>&1; then
openssl rand -hex 32 | gcloud secrets create "$SECRET" --data-file=-
echo "Created $SECRET"
else
echo "$SECRET already exists, skipping"
fi
done
```
3. **Configure Terraform variables** in `terraform.tfvars`:
```hcl
project_id = "your-project-id"
domain = "git.yourdomain.com"
admin_email = "you@yourdomain.com"
```
### First deploy
```bash
cd terraform/
terraform init
terraform plan
terraform apply
```
Note the `static_ip` output. Point your domain's A record at it. Wait for DNS propagation (a few minutes typically).
### Forgejo first-run installer
Visit `https://yourdomain` in a browser. Forgejo's installer will appear. Configure:
- Database: SQLite3 (path `/data/gitea/gitea.db`)
- Site title: whatever you want
- Server domain: your domain
- Server base URL: `https://yourdomain/`
- Disable self-registration: yes
- Create the admin user
After this, the installer is locked. Subsequent VM replacements (terraform-driven) will keep the database and skip the installer.
### Generate a personal access token
In Forgejo: Settings → Applications → Generate New Token. Scope it minimally (read/write repository is usually enough). Configure your local git client:
```bash
git config --global credential.helper store
# On first push, enter username and the PAT as password; it'll be saved.
```
## Operations
### Admin SSH
```bash
gcloud compute ssh forgejo --zone=us-central1-a --tunnel-through-iap
```
### Inspect containers
```bash
docker ps
docker logs forgejo
docker logs caddy
journalctl -u forgejo-stack.service
```
### Force an update of containers
```bash
docker exec watchtower kill -s SIGHUP 1
# or
docker pull codeberg.org/forgejo/forgejo:11
sudo systemctl restart forgejo-stack.service
```
### Run a manual backup
```bash
sudo /var/lib/forgejo/backup.sh
gsutil ls gs://YOUR_PROJECT-forgejo-backups/
```
### Restore from backup (`scripts/restore.sh`)
```bash
#!/bin/bash
set -euo pipefail
BACKUP=$1 # e.g. forgejo-20260507T033000Z.tar.gz
sudo systemctl stop forgejo-stack.service
gsutil cp "gs://YOUR_PROJECT-forgejo-backups/$BACKUP" /tmp/
sudo rm -rf /mnt/disks/forgejo-data/forgejo
sudo tar xzf "/tmp/$BACKUP" -C /mnt/disks/forgejo-data/
sudo systemctl start forgejo-stack.service
```
### Major version upgrade of Forgejo
1. Read the [Forgejo release notes](https://codeberg.org/forgejo/forgejo/releases) for breaking changes
2. Take a manual backup
3. Update the `forgejo_image` variable in Terraform (e.g. `codeberg.org/forgejo/forgejo:12`)
4. `terraform apply` — this will replace the VM
5. The persistent disk persists; first boot will run any DB migrations
## Disaster recovery
### Scenario: VM is unrecoverable
`terraform apply` recreates the VM. The persistent disk has `prevent_destroy`, so it survives. Forgejo comes back up with all data intact.
### Scenario: Persistent disk is corrupted or deleted
1. Remove `prevent_destroy` from the data disk resource (if needed)
2. `terraform apply` to create a fresh disk
3. SSH in and run the restore script with the latest GCS backup
### Scenario: Whole project is lost
1. Create a new GCP project
2. Run bootstrap-secrets.sh in the new project (generates new secrets — DB tables encrypted with the old SECRET_KEY for things like 2FA will need re-setup, but repos and basic data are fine)
3. Update `project_id` in tfvars
4. `terraform apply`
5. Manually copy the latest backup tarball from old project's GCS bucket to new one (do this BEFORE deleting the old project)
6. Run restore script
**Note**: rotating `SECRET_KEY` invalidates 2FA tokens and some encrypted fields. For a true bit-exact recovery, also back up the secrets to a password manager you control.
### Scenario: Backup itself is corrupt
This is why we test restores. `scripts/test-restore.sh` should:
1. Spin up a temporary VM (or use a local Docker setup)
2. Restore the latest backup
3. Verify Forgejo starts and at least one repo is browsable
4. Tear down
Run this monthly. Calendar reminder.
## Security checklist
- [x] Public SSH (port 22 from 0.0.0.0/0) blocked at firewall
- [x] Admin SSH only via IAP tunnel
- [x] OS Login enabled (no SSH keys in metadata)
- [x] HTTPS-only; HTTP redirects to HTTPS via Caddy
- [x] Forgejo registration disabled
- [x] Service account has minimum required permissions (Secret Manager read for two specific secrets, Storage write to one specific bucket)
- [x] Secrets in Secret Manager, not in Terraform state or git
- [x] COS auto-updates enabled for OS patching
- [x] Watchtower for application patch updates
- [x] Major version upgrades pinned (no `:latest`)
- [x] Billing budget alert at $10/month
- [x] Backups encrypted at rest in GCS (default), 30-day retention
- [ ] **Manual: enable 2FA on your GCP account** (the IAP gate is only as strong as your Google login)
- [ ] **Manual: enable 2FA on your Forgejo admin account** after first login
- [ ] **Manual: store secret values in a password manager** for cross-project recovery
## Maintenance schedule
| Frequency | Task |
|---|---|
| Continuous | Watchtower handles app patch updates; COS handles OS patches |
| Daily | Automatic backup at 03:30 UTC |
| Monthly | Run `test-restore.sh` to verify backups are restorable |
| Monthly | Review GCP billing for anomalies |
| Quarterly | Review Forgejo release notes; consider major version upgrade |
| Annually | Rotate `SECRET_KEY` and `INTERNAL_TOKEN` (requires care; see Forgejo docs) |
| Annually | Review IAM bindings; remove anything unused |
## Open questions and future work
- **Email notifications**: Forgejo can send issue/PR emails. Easiest path is configuring SMTP via a free-tier transactional email provider (e.g. Brevo, SendGrid). Not covered here; add as `FORGEJO__mailer__*` env vars when needed.
- **Forgejo Actions (CI)**: Runs on dedicated runners. The e2-micro is too small to host runners. If wanted, run a runner on a separate cheap host or skip CI.
- **Repo size growth**: 30 GB persistent disk holds a lot of personal repos but isn't infinite. Monitor with a simple disk-usage alert. Resizing the disk is online and non-disruptive on GCP.
- **Multiple users**: this design assumes one user. Adding more is fine (Forgejo handles it natively) but reconsider the registration-disabled and HTTPS-token approach if multiple humans need access.
- **Geographic redundancy**: not in scope. Backups in GCS are regional; for multi-region durability use a multi-region bucket (slightly more expensive).
## Appendix: useful references
- [Forgejo documentation](https://forgejo.org/docs/)
- [Forgejo Docker image](https://codeberg.org/forgejo/-/packages/container/forgejo/)
- [Container-Optimized OS overview](https://cloud.google.com/container-optimized-os/docs/concepts/features-and-benefits)
- [IAP for TCP forwarding](https://cloud.google.com/iap/docs/using-tcp-forwarding)
- [Caddy documentation](https://caddyserver.com/docs/)
- [GCP free tier](https://cloud.google.com/free/docs/free-cloud-features)
- [Watchtower](https://containrrr.dev/watchtower/)

16
scripts/backup.sh Executable file
View file

@ -0,0 +1,16 @@
#!/bin/bash
# Run on the VM via the forgejo-backup.timer systemd unit.
# Snapshots the SQLite DB, tars the data dir, and uploads to GCS.
# Note: the canonical copy of this script is embedded in cloud-init/user-data.yaml.tpl
# at /var/lib/forgejo/backup.sh. This file is kept for readability and ad-hoc reuse.
set -euo pipefail
: "${GCS_BACKUP_BUCKET:?GCS_BACKUP_BUCKET must be set}"
STAMP=$(date -u +%Y%m%dT%H%M%SZ)
docker exec forgejo sqlite3 /data/gitea/gitea.db ".backup '/data/gitea/snapshot.db'"
tar czf "/tmp/forgejo-${STAMP}.tar.gz" -C /mnt/disks/forgejo-data forgejo
docker run --rm -v /tmp:/tmp google/cloud-sdk:slim \
gsutil cp "/tmp/forgejo-${STAMP}.tar.gz" "gs://${GCS_BACKUP_BUCKET}/"
rm "/tmp/forgejo-${STAMP}.tar.gz"
docker exec forgejo rm -f /data/gitea/snapshot.db

25
scripts/bootstrap-secrets.sh Executable file
View file

@ -0,0 +1,25 @@
#!/bin/bash
# Generate and upload Forgejo secrets to Google Secret Manager.
# Run once per project before the first `terraform apply`.
# Idempotent: skips secrets that already exist.
set -euo pipefail
if [[ -z "$(gcloud config get-value project 2>/dev/null)" ]]; then
echo "ERROR: no active gcloud project. Run 'gcloud config set project YOUR_PROJECT' first." >&2
exit 1
fi
for SECRET in forgejo-secret-key forgejo-internal-token; do
if gcloud secrets describe "$SECRET" >/dev/null 2>&1; then
echo "$SECRET already exists, skipping"
else
openssl rand -hex 32 \
| gcloud secrets create "$SECRET" --replication-policy=automatic --data-file=-
echo "Created $SECRET"
fi
done
echo
echo "Tip: also save these values in your password manager for cross-project recovery."
echo " gcloud secrets versions access latest --secret=forgejo-secret-key"
echo " gcloud secrets versions access latest --secret=forgejo-internal-token"

30
scripts/restore.sh Executable file
View file

@ -0,0 +1,30 @@
#!/bin/bash
# Restore a Forgejo backup tarball from GCS over the live data dir.
# Run on the VM via SSH. Stops the stack, restores, restarts.
#
# Usage: sudo ./restore.sh forgejo-20260507T033000Z.tar.gz
set -euo pipefail
if [[ $# -ne 1 ]]; then
echo "Usage: $0 <backup-filename>" >&2
echo " e.g. $0 forgejo-20260507T033000Z.tar.gz" >&2
exit 1
fi
BACKUP=$1
BUCKET=$(curl -sf -H "Metadata-Flavor: Google" \
"http://metadata.google.internal/computeMetadata/v1/project/project-id")-forgejo-backups
read -r -p "About to restore '$BACKUP' from gs://$BUCKET/ over /mnt/disks/forgejo-data/forgejo. Continue? [y/N] " ans
[[ "$ans" == "y" || "$ans" == "Y" ]] || exit 1
systemctl stop forgejo-stack.service
docker run --rm -v /tmp:/tmp google/cloud-sdk:slim \
gsutil cp "gs://${BUCKET}/${BACKUP}" /tmp/
rm -rf /mnt/disks/forgejo-data/forgejo
tar xzf "/tmp/${BACKUP}" -C /mnt/disks/forgejo-data/
rm "/tmp/${BACKUP}"
systemctl start forgejo-stack.service
echo "Restore complete. Check 'docker logs forgejo' for migration output."

63
scripts/test-restore.sh Executable file
View file

@ -0,0 +1,63 @@
#!/bin/bash
# Verify the latest GCS backup is restorable in a throwaway local Docker setup.
# Run from a workstation with gcloud + docker. Does not touch the production VM.
#
# Usage: ./test-restore.sh [project-id]
# project-id defaults to the active gcloud project
set -euo pipefail
PROJECT=${1:-$(gcloud config get-value project 2>/dev/null)}
if [[ -z "$PROJECT" ]]; then
echo "ERROR: no project specified and no active gcloud project." >&2
exit 1
fi
BUCKET="${PROJECT}-forgejo-backups"
WORKDIR=$(mktemp -d -t forgejo-test-restore.XXXXXX)
trap 'docker rm -f forgejo-test >/dev/null 2>&1 || true; rm -rf "$WORKDIR"' EXIT
echo "Workdir: $WORKDIR"
LATEST=$(gsutil ls "gs://${BUCKET}/" | grep '\.tar\.gz$' | sort | tail -1)
if [[ -z "$LATEST" ]]; then
echo "ERROR: no backups found in gs://${BUCKET}/" >&2
exit 1
fi
echo "Latest backup: $LATEST"
gsutil cp "$LATEST" "$WORKDIR/backup.tar.gz"
tar xzf "$WORKDIR/backup.tar.gz" -C "$WORKDIR"
if [[ ! -d "$WORKDIR/forgejo" ]]; then
echo "ERROR: tarball does not contain a 'forgejo' directory" >&2
exit 1
fi
if [[ ! -f "$WORKDIR/forgejo/gitea/gitea.db" ]]; then
echo "ERROR: SQLite DB missing from backup" >&2
exit 1
fi
# Boot Forgejo against the restored data and probe it.
docker run -d --rm --name forgejo-test \
-p 13000:3000 \
-v "$WORKDIR/forgejo:/data" \
-e FORGEJO__server__DISABLE_SSH=true \
-e FORGEJO__database__DB_TYPE=sqlite3 \
codeberg.org/forgejo/forgejo:11 >/dev/null
echo "Waiting for Forgejo to start..."
for i in $(seq 1 30); do
if curl -sf http://localhost:13000/api/v1/version >/dev/null; then
echo "OK: Forgejo responded with version: $(curl -s http://localhost:13000/api/v1/version)"
REPOS=$(curl -s http://localhost:13000/api/v1/repos/search?limit=1 | python3 -c 'import sys,json; d=json.load(sys.stdin); print(len(d.get("data", [])))')
echo "OK: API repos endpoint returned $REPOS result(s)"
echo "PASS: backup is restorable"
exit 0
fi
sleep 2
done
echo "FAIL: Forgejo did not respond within 60s" >&2
docker logs forgejo-test >&2 || true
exit 1

19
terraform/backups.tf Normal file
View file

@ -0,0 +1,19 @@
resource "google_storage_bucket" "backups" {
name = "${var.project_id}-forgejo-backups"
location = var.region
storage_class = "STANDARD"
uniform_bucket_level_access = true
lifecycle_rule {
condition {
age = 30
}
action {
type = "Delete"
}
}
versioning {
enabled = false
}
}

9
terraform/dns.tf Normal file
View file

@ -0,0 +1,9 @@
resource "google_dns_record_set" "forgejo" {
count = var.manage_dns ? 1 : 0
name = "${var.domain}."
type = "A"
ttl = 300
managed_zone = var.dns_managed_zone
rrdatas = [google_compute_address.forgejo.address]
}

32
terraform/iam.tf Normal file
View file

@ -0,0 +1,32 @@
resource "google_service_account" "forgejo" {
account_id = "forgejo-vm"
display_name = "Forgejo VM service account"
}
resource "google_secret_manager_secret_iam_member" "forgejo_secrets" {
for_each = toset(["forgejo-secret-key", "forgejo-internal-token"])
project = var.project_id
secret_id = each.value
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.forgejo.email}"
}
resource "google_storage_bucket_iam_member" "backups_writer" {
bucket = google_storage_bucket.backups.name
role = "roles/storage.objectAdmin"
member = "serviceAccount:${google_service_account.forgejo.email}"
}
resource "google_iap_tunnel_instance_iam_member" "ssh_admin" {
project = var.project_id
zone = var.zone
instance = google_compute_instance.forgejo.name
role = "roles/iap.tunnelResourceAccessor"
member = "user:${var.admin_email}"
}
resource "google_project_iam_member" "ssh_os_login" {
project = var.project_id
role = "roles/compute.osLogin"
member = "user:${var.admin_email}"
}

62
terraform/main.tf Normal file
View file

@ -0,0 +1,62 @@
resource "google_compute_disk" "forgejo_data" {
name = "forgejo-data"
type = "pd-standard"
size = 20
zone = var.zone
lifecycle {
prevent_destroy = true
}
}
resource "google_compute_instance" "forgejo" {
name = "forgejo"
machine_type = "e2-micro"
zone = var.zone
tags = ["forgejo"]
boot_disk {
initialize_params {
image = "cos-cloud/cos-stable"
size = 10
type = "pd-standard"
}
}
attached_disk {
source = google_compute_disk.forgejo_data.id
device_name = "forgejo-data"
}
network_interface {
network = "default"
access_config {
nat_ip = google_compute_address.forgejo.address
}
}
metadata = {
user-data = templatefile("${path.module}/../cloud-init/user-data.yaml.tpl", {
domain = var.domain
forgejo_image = var.forgejo_image
caddy_image = var.caddy_image
gcs_backup_bucket = google_storage_bucket.backups.name
project_id = var.project_id
})
google-logging-enabled = "true"
cos-update-strategy = "update_enabled"
enable-oslogin = "TRUE"
}
service_account {
email = google_service_account.forgejo.email
scopes = ["cloud-platform"]
}
allow_stopping_for_update = true
depends_on = [
google_secret_manager_secret_iam_member.forgejo_secrets,
google_storage_bucket_iam_member.backups_writer,
]
}

32
terraform/network.tf Normal file
View file

@ -0,0 +1,32 @@
resource "google_compute_address" "forgejo" {
name = "forgejo-ip"
region = var.region
}
resource "google_compute_firewall" "https" {
name = "allow-https"
network = "default"
direction = "INGRESS"
allow {
protocol = "tcp"
ports = ["80", "443"]
}
source_ranges = ["0.0.0.0/0"]
target_tags = ["forgejo"]
}
resource "google_compute_firewall" "iap_ssh" {
name = "allow-iap-ssh"
network = "default"
direction = "INGRESS"
allow {
protocol = "tcp"
ports = ["22"]
}
source_ranges = ["35.235.240.0/20"]
target_tags = ["forgejo"]
}

14
terraform/outputs.tf Normal file
View file

@ -0,0 +1,14 @@
output "static_ip" {
value = google_compute_address.forgejo.address
description = "Point your domain's A record at this address"
}
output "ssh_command" {
value = "gcloud compute ssh forgejo --zone=${var.zone} --tunnel-through-iap"
description = "Admin SSH via IAP tunnel"
}
output "backup_bucket" {
value = google_storage_bucket.backups.name
description = "GCS bucket holding nightly backups"
}

10
terraform/secrets.tf Normal file
View file

@ -0,0 +1,10 @@
# Secrets are created out-of-band by scripts/bootstrap-secrets.sh.
# This file only declares them as data sources; the IAM bindings live in iam.tf.
data "google_secret_manager_secret" "secret_key" {
secret_id = "forgejo-secret-key"
}
data "google_secret_manager_secret" "internal_token" {
secret_id = "forgejo-internal-token"
}

50
terraform/variables.tf Normal file
View file

@ -0,0 +1,50 @@
variable "project_id" {
type = string
description = "GCP project ID"
}
variable "region" {
type = string
default = "us-central1"
description = "GCP region (use us-west1, us-central1, or us-east1 for the always-free e2-micro)"
}
variable "zone" {
type = string
default = "us-central1-a"
description = "GCP zone within region"
}
variable "domain" {
type = string
description = "Domain name for the Forgejo instance (e.g. git.example.com)"
}
variable "admin_email" {
type = string
description = "Google account that gets IAP SSH access"
}
variable "forgejo_image" {
type = string
default = "codeberg.org/forgejo/forgejo:11"
description = "Forgejo container image, pinned to a major version"
}
variable "caddy_image" {
type = string
default = "caddy:2-alpine"
description = "Caddy container image, pinned to a major version"
}
variable "manage_dns" {
type = bool
default = false
description = "If true, manage an A record in Cloud DNS. Requires dns_managed_zone."
}
variable "dns_managed_zone" {
type = string
default = ""
description = "Cloud DNS managed zone name (only used when manage_dns = true)"
}

16
terraform/versions.tf Normal file
View file

@ -0,0 +1,16 @@
terraform {
required_version = ">= 1.6"
required_providers {
google = {
source = "hashicorp/google"
version = "~> 6.0"
}
}
}
provider "google" {
project = var.project_id
region = var.region
zone = var.zone
}