feat: close preprod observability loop

This commit is contained in:
2026-05-08 15:48:56 -04:00
parent 8bcff96821
commit 986c7efea6
14 changed files with 618 additions and 2 deletions

View File

@@ -11,6 +11,16 @@ groups:
summary: Socialize API telemetry is missing
description: No API request telemetry has been received for 5 minutes. The API or telemetry pipeline may be down.
- alert: SocializePreprodEndpointDown
expr: probe_success{job="preprod-uptime"} == 0
for: 2m
labels:
severity: critical
service: socialize-preprod
annotations:
summary: Preprod endpoint is down
description: '{{ $labels.instance }} has failed blackbox checks for 2 minutes.'
- alert: SocializeApiHighErrorRate
expr: |
(
@@ -56,6 +66,26 @@ groups:
summary: Socialize core usage is quiet
description: No content, comment, approval, or feedback activity has been observed over the last 12 hours.
- alert: SocializeContentStaleInApproval
expr: socialize_workflow_stale_in_approval > 0
for: 30m
labels:
severity: warning
service: socialize-api
annotations:
summary: Content is stale in approval
description: One or more content items have been in approval longer than the configured threshold.
- alert: SocializeNoActiveWorkspaces
expr: socialize_workflow_active_workspaces{window="24h"} < 1
for: 1h
labels:
severity: info
service: socialize-api
annotations:
summary: No active workspaces in the last 24 hours
description: No workspace has content workflow activity in the last 24 hours.
- alert: SocializeFeedbackBugSubmitted
expr: sum(increase(socialize_feedback_submitted_total{feedback_type="Bug"}[15m])) > 0
for: 0m