170 lines
4.4 KiB
Terraform
170 lines
4.4 KiB
Terraform
|
|
resource "google_monitoring_uptime_check_config" "forgejo" {
|
||
|
|
display_name = "Forgejo /api/v1/version"
|
||
|
|
timeout = "10s"
|
||
|
|
period = "60s"
|
||
|
|
|
||
|
|
http_check {
|
||
|
|
path = "/api/v1/version"
|
||
|
|
port = "443"
|
||
|
|
use_ssl = true
|
||
|
|
validate_ssl = true
|
||
|
|
request_method = "GET"
|
||
|
|
}
|
||
|
|
|
||
|
|
monitored_resource {
|
||
|
|
type = "uptime_url"
|
||
|
|
labels = {
|
||
|
|
project_id = var.project_id
|
||
|
|
host = var.domain
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
resource "google_monitoring_alert_policy" "forgejo_down" {
|
||
|
|
display_name = "Forgejo is down"
|
||
|
|
combiner = "OR"
|
||
|
|
|
||
|
|
conditions {
|
||
|
|
display_name = "Uptime check failing for 5+ min"
|
||
|
|
condition_threshold {
|
||
|
|
filter = join(" AND ", [
|
||
|
|
"resource.type=\"uptime_url\"",
|
||
|
|
"metric.type=\"monitoring.googleapis.com/uptime_check/check_passed\"",
|
||
|
|
"metric.label.\"check_id\"=\"${google_monitoring_uptime_check_config.forgejo.uptime_check_id}\"",
|
||
|
|
])
|
||
|
|
duration = "300s"
|
||
|
|
comparison = "COMPARISON_GT"
|
||
|
|
threshold_value = 1
|
||
|
|
|
||
|
|
aggregations {
|
||
|
|
alignment_period = "1200s"
|
||
|
|
per_series_aligner = "ALIGN_NEXT_OLDER"
|
||
|
|
cross_series_reducer = "REDUCE_COUNT_FALSE"
|
||
|
|
group_by_fields = ["resource.label.host"]
|
||
|
|
}
|
||
|
|
|
||
|
|
trigger {
|
||
|
|
count = 1
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
notification_channels = [google_monitoring_notification_channel.email.id]
|
||
|
|
|
||
|
|
alert_strategy {
|
||
|
|
auto_close = "1800s"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
resource "google_monitoring_alert_policy" "tls_cert_expiry" {
|
||
|
|
display_name = "Forgejo TLS cert expiring soon"
|
||
|
|
combiner = "OR"
|
||
|
|
|
||
|
|
conditions {
|
||
|
|
display_name = "Cert expires in <7 days"
|
||
|
|
condition_threshold {
|
||
|
|
filter = join(" AND ", [
|
||
|
|
"resource.type=\"uptime_url\"",
|
||
|
|
"metric.type=\"monitoring.googleapis.com/uptime_check/time_until_ssl_cert_expires\"",
|
||
|
|
"metric.label.\"check_id\"=\"${google_monitoring_uptime_check_config.forgejo.uptime_check_id}\"",
|
||
|
|
])
|
||
|
|
duration = "600s"
|
||
|
|
comparison = "COMPARISON_LT"
|
||
|
|
threshold_value = 7
|
||
|
|
|
||
|
|
aggregations {
|
||
|
|
alignment_period = "1200s"
|
||
|
|
per_series_aligner = "ALIGN_NEXT_OLDER"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
notification_channels = [google_monitoring_notification_channel.email.id]
|
||
|
|
|
||
|
|
alert_strategy {
|
||
|
|
auto_close = "86400s"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
resource "google_logging_metric" "forgejo_disk_high" {
|
||
|
|
name = "forgejo_disk_high"
|
||
|
|
filter = "resource.type=\"gce_instance\" AND jsonPayload.MESSAGE=~\"^DISK_HIGH:\""
|
||
|
|
|
||
|
|
metric_descriptor {
|
||
|
|
metric_kind = "DELTA"
|
||
|
|
value_type = "INT64"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
resource "google_monitoring_alert_policy" "disk_full" {
|
||
|
|
display_name = "Forgejo data disk >80% full"
|
||
|
|
combiner = "OR"
|
||
|
|
|
||
|
|
conditions {
|
||
|
|
display_name = "DISK_HIGH log line emitted"
|
||
|
|
condition_threshold {
|
||
|
|
filter = "resource.type=\"gce_instance\" AND metric.type=\"logging.googleapis.com/user/forgejo_disk_high\""
|
||
|
|
duration = "0s"
|
||
|
|
comparison = "COMPARISON_GT"
|
||
|
|
threshold_value = 0
|
||
|
|
|
||
|
|
aggregations {
|
||
|
|
alignment_period = "3600s"
|
||
|
|
per_series_aligner = "ALIGN_COUNT"
|
||
|
|
cross_series_reducer = "REDUCE_SUM"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
notification_channels = [google_monitoring_notification_channel.email.id]
|
||
|
|
|
||
|
|
alert_strategy {
|
||
|
|
auto_close = "86400s"
|
||
|
|
}
|
||
|
|
|
||
|
|
depends_on = [google_logging_metric.forgejo_disk_high]
|
||
|
|
}
|
||
|
|
|
||
|
|
resource "google_logging_metric" "forgejo_panic" {
|
||
|
|
name = "forgejo_panic"
|
||
|
|
filter = join(" AND ", [
|
||
|
|
"resource.type=\"gce_instance\"",
|
||
|
|
"logName=~\"projects/.*/logs/cos_containers\"",
|
||
|
|
"(textPayload=~\"panic\" OR textPayload=~\"FATAL\" OR jsonPayload.message=~\"panic\" OR jsonPayload.message=~\"FATAL\")",
|
||
|
|
])
|
||
|
|
|
||
|
|
metric_descriptor {
|
||
|
|
metric_kind = "DELTA"
|
||
|
|
value_type = "INT64"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
resource "google_monitoring_alert_policy" "forgejo_panic" {
|
||
|
|
display_name = "Forgejo container panic/FATAL"
|
||
|
|
combiner = "OR"
|
||
|
|
|
||
|
|
conditions {
|
||
|
|
display_name = "panic or FATAL in container logs"
|
||
|
|
condition_threshold {
|
||
|
|
filter = "resource.type=\"gce_instance\" AND metric.type=\"logging.googleapis.com/user/forgejo_panic\""
|
||
|
|
duration = "0s"
|
||
|
|
comparison = "COMPARISON_GT"
|
||
|
|
threshold_value = 0
|
||
|
|
|
||
|
|
aggregations {
|
||
|
|
alignment_period = "300s"
|
||
|
|
per_series_aligner = "ALIGN_COUNT"
|
||
|
|
cross_series_reducer = "REDUCE_SUM"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
notification_channels = [google_monitoring_notification_channel.email.id]
|
||
|
|
|
||
|
|
alert_strategy {
|
||
|
|
auto_close = "3600s"
|
||
|
|
}
|
||
|
|
|
||
|
|
depends_on = [google_logging_metric.forgejo_panic]
|
||
|
|
}
|