feat: add preprod observability foundation
This commit is contained in:
413
deploy/observability/grafana/dashboards/socialize-overview.json
Normal file
413
deploy/observability/grafana/dashboards/socialize-overview.json
Normal file
@@ -0,0 +1,413 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(http_server_request_duration_seconds_count{service_name=\"socialize-api\"}[5m]))",
|
||||
"legendFormat": "requests/sec"
|
||||
}
|
||||
],
|
||||
"title": "API Requests/sec",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(http_server_request_duration_seconds_count{service_name=\"socialize-api\", http_response_status_code=~\"5..\"}[5m])) / clamp_min(sum(rate(http_server_request_duration_seconds_count{service_name=\"socialize-api\"}[5m])), 0.001)",
|
||||
"legendFormat": "5xx rate"
|
||||
}
|
||||
],
|
||||
"title": "API 5xx Rate",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum by (le) (rate(http_server_request_duration_seconds_bucket{service_name=\"socialize-api\"}[5m])))",
|
||||
"legendFormat": "p95"
|
||||
}
|
||||
],
|
||||
"title": "API p95 Latency",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(ALERTS{alertstate=\"firing\"})",
|
||||
"legendFormat": "firing"
|
||||
}
|
||||
],
|
||||
"title": "Firing Alerts",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 4
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(http_server_request_duration_seconds_count{service_name=\"socialize-api\"}[5m])) by (http_request_method, http_route)",
|
||||
"legendFormat": "{{http_request_method}} {{http_route}}"
|
||||
}
|
||||
],
|
||||
"title": "Request Rate By Endpoint",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 4
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum by (le, http_route) (rate(http_server_request_duration_seconds_bucket{service_name=\"socialize-api\"}[5m])))",
|
||||
"legendFormat": "{{http_route}}"
|
||||
}
|
||||
],
|
||||
"title": "p95 Latency By Endpoint",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 12
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(socialize_login_attempts_total[24h])) by (outcome)",
|
||||
"legendFormat": "login {{outcome}}"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(socialize_organizations_created_total[24h]))",
|
||||
"legendFormat": "organizations"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(socialize_workspaces_created_total[24h]))",
|
||||
"legendFormat": "workspaces"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(socialize_content_items_created_total[24h]))",
|
||||
"legendFormat": "content"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(socialize_comments_created_total[24h]))",
|
||||
"legendFormat": "comments"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(socialize_approval_decisions_submitted_total[24h]))",
|
||||
"legendFormat": "approvals"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(socialize_feedback_submitted_total[24h]))",
|
||||
"legendFormat": "feedback"
|
||||
}
|
||||
],
|
||||
"title": "Usage Signals, 24h Rolling",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 12
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(socialize_email_delivery_total[1h])) by (outcome, provider)",
|
||||
"legendFormat": "email {{provider}} {{outcome}}"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(socialize_blob_storage_operations_total[1h])) by (operation, outcome)",
|
||||
"legendFormat": "blob {{operation}} {{outcome}}"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(socialize_background_job_runs_total[1h])) by (job, outcome)",
|
||||
"legendFormat": "job {{job}} {{outcome}}"
|
||||
}
|
||||
],
|
||||
"title": "Operational Events, 1h Rolling",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "Prometheus"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 20
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"showHeader": true
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ALERTS{alertstate=\"firing\"}",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"legendFormat": "{{alertname}}"
|
||||
}
|
||||
],
|
||||
"title": "Firing Alerts",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "Loki"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 27
|
||||
},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"dedupStrategy": "none",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": false,
|
||||
"showCommonLabels": false,
|
||||
"showLabels": false,
|
||||
"showTime": true,
|
||||
"sortOrder": "Descending",
|
||||
"wrapLogMessage": false
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "{platform=\"docker\", compose_service=\"api\"}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "API Logs",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"tags": [
|
||||
"socialize",
|
||||
"preprod"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Socialize Overview",
|
||||
"uid": "socialize-overview",
|
||||
"version": 2,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: Socialize
|
||||
orgId: 1
|
||||
folder: Socialize
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 30
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
@@ -0,0 +1,26 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
uid: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
|
||||
- name: Loki
|
||||
uid: Loki
|
||||
type: loki
|
||||
access: proxy
|
||||
url: http://loki:3100
|
||||
|
||||
- name: Tempo
|
||||
uid: Tempo
|
||||
type: tempo
|
||||
access: proxy
|
||||
url: http://tempo:3200
|
||||
jsonData:
|
||||
tracesToLogsV2:
|
||||
datasourceUid: Loki
|
||||
serviceMap:
|
||||
datasourceUid: Prometheus
|
||||
Reference in New Issue
Block a user