feat: add preprod observability foundation
This commit is contained in:
31
README.md
31
README.md
@@ -82,6 +82,37 @@ The deploy workflow writes the remote `.env` file and syncs `deploy/compose.yml`
|
|||||||
before running the server deploy script.
|
before running the server deploy script.
|
||||||
Use the raw Resend API key value for `RESEND_API_KEY`, without a `Bearer ` prefix.
|
Use the raw Resend API key value for `RESEND_API_KEY`, without a `Bearer ` prefix.
|
||||||
|
|
||||||
|
## Preprod Observability
|
||||||
|
|
||||||
|
The optional observability overlay runs a self-hosted Grafana stack for preproduction:
|
||||||
|
|
||||||
|
- Grafana `13.0.1`: dashboards
|
||||||
|
- Prometheus `v3.11.3`: metrics and local alert rules
|
||||||
|
- Loki `3.7.1`: Docker/container logs
|
||||||
|
- Tempo `2.10.3`: traces
|
||||||
|
- Grafana Alloy `v1.16.0`: OTLP receiver and Docker log collector
|
||||||
|
|
||||||
|
Start the app with observability:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose -f deploy/compose.yml -f deploy/observability/compose.observability.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
Grafana is exposed at:
|
||||||
|
|
||||||
|
```txt
|
||||||
|
http://127.0.0.1:3000
|
||||||
|
```
|
||||||
|
|
||||||
|
Default credentials are `admin` / `admin` unless `GRAFANA_ADMIN_USER` and
|
||||||
|
`GRAFANA_ADMIN_PASSWORD` are set. Set `GRAFANA_HTTP_BIND=0.0.0.0` only when the
|
||||||
|
preprod network boundary is trusted or protected by a reverse proxy/VPN.
|
||||||
|
|
||||||
|
Set a non-default `GRAFANA_ADMIN_PASSWORD` before exposing Grafana outside the
|
||||||
|
host. Prometheus alert rules are provisioned under
|
||||||
|
`deploy/observability/prometheus/rules/`; notification delivery is intentionally
|
||||||
|
left to the preprod operations environment.
|
||||||
|
|
||||||
## Solution
|
## Solution
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
using System.Text;
|
using System.Text;
|
||||||
using Socialize.Api.Data;
|
using Socialize.Api.Data;
|
||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
using Socialize.Api.Infrastructure.Security;
|
using Socialize.Api.Infrastructure.Security;
|
||||||
using Microsoft.EntityFrameworkCore;
|
using Microsoft.EntityFrameworkCore;
|
||||||
using Microsoft.AspNetCore.Authentication;
|
using Microsoft.AspNetCore.Authentication;
|
||||||
@@ -20,7 +21,10 @@ internal static class ApplicationRegistration
|
|||||||
services.AddHttpContextAccessor();
|
services.AddHttpContextAccessor();
|
||||||
|
|
||||||
services.AddHealthChecks()
|
services.AddHealthChecks()
|
||||||
.AddDbContextCheck<AppDbContext>();
|
.AddCheck("self", () => Microsoft.Extensions.Diagnostics.HealthChecks.HealthCheckResult.Healthy(), tags: ["live"])
|
||||||
|
.AddDbContextCheck<AppDbContext>("postgres", tags: ["ready"])
|
||||||
|
.AddCheck<LocalBlobStorageHealthCheck>("local_blob_storage", tags: ["ready"])
|
||||||
|
.AddCheck<EmailerConfigurationHealthCheck>("emailer_configuration", tags: ["ready"]);
|
||||||
|
|
||||||
services.AddHttpClient();
|
services.AddHttpClient();
|
||||||
services.AddScoped<AccessScopeService>();
|
services.AddScoped<AccessScopeService>();
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
using Microsoft.Extensions.Options;
|
using Microsoft.Extensions.Options;
|
||||||
using Socialize.Api.Infrastructure.BlobStorage.Configuration;
|
using Socialize.Api.Infrastructure.BlobStorage.Configuration;
|
||||||
using Socialize.Api.Infrastructure.BlobStorage.Contracts;
|
using Socialize.Api.Infrastructure.BlobStorage.Contracts;
|
||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
|
|
||||||
namespace Socialize.Api.Infrastructure.BlobStorage.Services;
|
namespace Socialize.Api.Infrastructure.BlobStorage.Services;
|
||||||
|
|
||||||
@@ -8,7 +9,8 @@ internal sealed class LocalBlobStorage(
|
|||||||
IWebHostEnvironment environment,
|
IWebHostEnvironment environment,
|
||||||
IHttpContextAccessor httpContextAccessor,
|
IHttpContextAccessor httpContextAccessor,
|
||||||
IOptions<LocalBlobStorageOptions> options,
|
IOptions<LocalBlobStorageOptions> options,
|
||||||
ILogger<LocalBlobStorage> logger)
|
ILogger<LocalBlobStorage> logger,
|
||||||
|
SocializeMetrics metrics)
|
||||||
: IBlobStorage
|
: IBlobStorage
|
||||||
{
|
{
|
||||||
private const long MaxUploadSize = 10 * 1024 * 1024;
|
private const long MaxUploadSize = 10 * 1024 * 1024;
|
||||||
@@ -30,6 +32,8 @@ internal sealed class LocalBlobStorage(
|
|||||||
Stream stream,
|
Stream stream,
|
||||||
string contentType,
|
string contentType,
|
||||||
CancellationToken ct = default)
|
CancellationToken ct = default)
|
||||||
|
{
|
||||||
|
try
|
||||||
{
|
{
|
||||||
stream.Position = 0;
|
stream.Position = 0;
|
||||||
|
|
||||||
@@ -55,14 +59,33 @@ internal sealed class LocalBlobStorage(
|
|||||||
|
|
||||||
string fileUri = BuildPublicUrl(relativePath);
|
string fileUri = BuildPublicUrl(relativePath);
|
||||||
LogUploadedFile(logger, blobName, containerName, contentType, fileUri, null);
|
LogUploadedFile(logger, blobName, containerName, contentType, fileUri, null);
|
||||||
|
metrics.RecordBlobStorageOperation("upload", true);
|
||||||
|
|
||||||
return fileUri;
|
return fileUri;
|
||||||
}
|
}
|
||||||
|
catch (InvalidOperationException)
|
||||||
|
{
|
||||||
|
metrics.RecordBlobStorageOperation("upload", false);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
catch (IOException)
|
||||||
|
{
|
||||||
|
metrics.RecordBlobStorageOperation("upload", false);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
catch (UnauthorizedAccessException)
|
||||||
|
{
|
||||||
|
metrics.RecordBlobStorageOperation("upload", false);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public async Task<MemoryStream> DownloadFileAsync(
|
public async Task<MemoryStream> DownloadFileAsync(
|
||||||
string containerName,
|
string containerName,
|
||||||
string blobName,
|
string blobName,
|
||||||
CancellationToken ct = default)
|
CancellationToken ct = default)
|
||||||
|
{
|
||||||
|
try
|
||||||
{
|
{
|
||||||
string filePath = Path.Combine(GetRootPath(), GetSafeRelativePath(containerName, blobName));
|
string filePath = Path.Combine(GetRootPath(), GetSafeRelativePath(containerName, blobName));
|
||||||
|
|
||||||
@@ -75,9 +98,31 @@ internal sealed class LocalBlobStorage(
|
|||||||
await using FileStream fileStream = File.OpenRead(filePath);
|
await using FileStream fileStream = File.OpenRead(filePath);
|
||||||
await fileStream.CopyToAsync(memoryStream, ct);
|
await fileStream.CopyToAsync(memoryStream, ct);
|
||||||
memoryStream.Position = 0;
|
memoryStream.Position = 0;
|
||||||
|
metrics.RecordBlobStorageOperation("download", true);
|
||||||
|
|
||||||
return memoryStream;
|
return memoryStream;
|
||||||
}
|
}
|
||||||
|
catch (InvalidOperationException)
|
||||||
|
{
|
||||||
|
metrics.RecordBlobStorageOperation("download", false);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
catch (FileNotFoundException)
|
||||||
|
{
|
||||||
|
metrics.RecordBlobStorageOperation("download", false);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
catch (IOException)
|
||||||
|
{
|
||||||
|
metrics.RecordBlobStorageOperation("download", false);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
catch (UnauthorizedAccessException)
|
||||||
|
{
|
||||||
|
metrics.RecordBlobStorageOperation("download", false);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
internal string GetRootPath()
|
internal string GetRootPath()
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,8 +1,11 @@
|
|||||||
using Socialize.Api.Infrastructure.Emailer.Contracts;
|
using Socialize.Api.Infrastructure.Emailer.Contracts;
|
||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
|
|
||||||
namespace Socialize.Api.Infrastructure.Emailer.Services;
|
namespace Socialize.Api.Infrastructure.Emailer.Services;
|
||||||
|
|
||||||
internal class LoggerEmailSender(ILogger<IEmailSender> logger)
|
internal class LoggerEmailSender(
|
||||||
|
ILogger<IEmailSender> logger,
|
||||||
|
SocializeMetrics metrics)
|
||||||
: IEmailSender
|
: IEmailSender
|
||||||
{
|
{
|
||||||
private static readonly Action<ILogger, string, string, string, string, Exception?> LogDevelopmentEmail =
|
private static readonly Action<ILogger, string, string, string, string, Exception?> LogDevelopmentEmail =
|
||||||
@@ -14,6 +17,7 @@ internal class LoggerEmailSender(ILogger<IEmailSender> logger)
|
|||||||
public Task SendEmailAsync(string email, string subject, string message)
|
public Task SendEmailAsync(string email, string subject, string message)
|
||||||
{
|
{
|
||||||
LogDevelopmentEmail(logger, email, subject, Environment.NewLine, message, null);
|
LogDevelopmentEmail(logger, email, subject, Environment.NewLine, message, null);
|
||||||
|
metrics.RecordEmailDelivery("logger", true);
|
||||||
|
|
||||||
return Task.CompletedTask;
|
return Task.CompletedTask;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ using System.Text;
|
|||||||
using System.Text.Json;
|
using System.Text.Json;
|
||||||
using Socialize.Api.Infrastructure.Emailer.Configuration;
|
using Socialize.Api.Infrastructure.Emailer.Configuration;
|
||||||
using Socialize.Api.Infrastructure.Emailer.Contracts;
|
using Socialize.Api.Infrastructure.Emailer.Contracts;
|
||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
using Microsoft.Extensions.Options;
|
using Microsoft.Extensions.Options;
|
||||||
|
|
||||||
namespace Socialize.Api.Infrastructure.Emailer.Services;
|
namespace Socialize.Api.Infrastructure.Emailer.Services;
|
||||||
@@ -11,13 +12,16 @@ internal class ResendEmailSender : IEmailSender
|
|||||||
{
|
{
|
||||||
private static readonly Uri EndpointUri = new("https://api.resend.com/emails");
|
private static readonly Uri EndpointUri = new("https://api.resend.com/emails");
|
||||||
private readonly HttpClient _httpClient;
|
private readonly HttpClient _httpClient;
|
||||||
|
private readonly SocializeMetrics _metrics;
|
||||||
private readonly EmailerOptions _options;
|
private readonly EmailerOptions _options;
|
||||||
|
|
||||||
public ResendEmailSender(
|
public ResendEmailSender(
|
||||||
IHttpClientFactory httpClientFactory,
|
IHttpClientFactory httpClientFactory,
|
||||||
IOptions<EmailerOptions> options)
|
IOptions<EmailerOptions> options,
|
||||||
|
SocializeMetrics metrics)
|
||||||
{
|
{
|
||||||
_httpClient = httpClientFactory.CreateClient();
|
_httpClient = httpClientFactory.CreateClient();
|
||||||
|
_metrics = metrics;
|
||||||
_options = options.Value;
|
_options = options.Value;
|
||||||
|
|
||||||
string apiKey = NormalizeApiKey(_options.ApiKey);
|
string apiKey = NormalizeApiKey(_options.ApiKey);
|
||||||
@@ -49,6 +53,8 @@ internal class ResendEmailSender : IEmailSender
|
|||||||
|
|
||||||
string json = JsonSerializer.Serialize(payload);
|
string json = JsonSerializer.Serialize(payload);
|
||||||
using StringContent content = new(json, Encoding.UTF8, "application/json");
|
using StringContent content = new(json, Encoding.UTF8, "application/json");
|
||||||
|
try
|
||||||
|
{
|
||||||
using HttpResponseMessage response = await _httpClient.PostAsync(EndpointUri, content);
|
using HttpResponseMessage response = await _httpClient.PostAsync(EndpointUri, content);
|
||||||
|
|
||||||
if (!response.IsSuccessStatusCode)
|
if (!response.IsSuccessStatusCode)
|
||||||
@@ -57,6 +63,24 @@ internal class ResendEmailSender : IEmailSender
|
|||||||
throw new InvalidOperationException(
|
throw new InvalidOperationException(
|
||||||
$"Resend email failed: {response.StatusCode} - {body}");
|
$"Resend email failed: {response.StatusCode} - {body}");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_metrics.RecordEmailDelivery("resend", true);
|
||||||
|
}
|
||||||
|
catch (HttpRequestException)
|
||||||
|
{
|
||||||
|
_metrics.RecordEmailDelivery("resend", false);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
catch (TaskCanceledException)
|
||||||
|
{
|
||||||
|
_metrics.RecordEmailDelivery("resend", false);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
catch (InvalidOperationException)
|
||||||
|
{
|
||||||
|
_metrics.RecordEmailDelivery("resend", false);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static string NormalizeApiKey(string? apiKey)
|
private static string NormalizeApiKey(string? apiKey)
|
||||||
|
|||||||
@@ -0,0 +1,29 @@
|
|||||||
|
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||||
|
using Microsoft.Extensions.Options;
|
||||||
|
using Socialize.Api.Infrastructure.Emailer.Configuration;
|
||||||
|
|
||||||
|
namespace Socialize.Api.Infrastructure.Observability;
|
||||||
|
|
||||||
|
internal sealed class EmailerConfigurationHealthCheck(
|
||||||
|
IWebHostEnvironment environment,
|
||||||
|
IOptions<EmailerOptions> options)
|
||||||
|
: IHealthCheck
|
||||||
|
{
|
||||||
|
public Task<HealthCheckResult> CheckHealthAsync(
|
||||||
|
HealthCheckContext context,
|
||||||
|
CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
if (environment.IsDevelopment())
|
||||||
|
{
|
||||||
|
return Task.FromResult(HealthCheckResult.Healthy("Development email sender logs email instead of delivering it."));
|
||||||
|
}
|
||||||
|
|
||||||
|
EmailerOptions value = options.Value;
|
||||||
|
if (string.IsNullOrWhiteSpace(value.ApiKey) || string.IsNullOrWhiteSpace(value.FromEmail))
|
||||||
|
{
|
||||||
|
return Task.FromResult(HealthCheckResult.Unhealthy("Emailer API key or from address is missing."));
|
||||||
|
}
|
||||||
|
|
||||||
|
return Task.FromResult(HealthCheckResult.Healthy("Emailer configuration is present."));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||||
|
using Microsoft.Extensions.Options;
|
||||||
|
using Socialize.Api.Infrastructure.BlobStorage.Configuration;
|
||||||
|
using Socialize.Api.Infrastructure.BlobStorage.Services;
|
||||||
|
|
||||||
|
namespace Socialize.Api.Infrastructure.Observability;
|
||||||
|
|
||||||
|
internal sealed class LocalBlobStorageHealthCheck(
|
||||||
|
LocalBlobStorage blobStorage,
|
||||||
|
IOptions<LocalBlobStorageOptions> options)
|
||||||
|
: IHealthCheck
|
||||||
|
{
|
||||||
|
public async Task<HealthCheckResult> CheckHealthAsync(
|
||||||
|
HealthCheckContext context,
|
||||||
|
CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
string rootPath = blobStorage.GetRootPath();
|
||||||
|
if (string.IsNullOrWhiteSpace(options.Value.RequestPath))
|
||||||
|
{
|
||||||
|
return HealthCheckResult.Unhealthy("Local blob storage request path is not configured.");
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Directory.CreateDirectory(rootPath);
|
||||||
|
string probePath = Path.Combine(rootPath, ".healthcheck");
|
||||||
|
await File.WriteAllTextAsync(
|
||||||
|
probePath,
|
||||||
|
DateTimeOffset.UtcNow.ToString("O", System.Globalization.CultureInfo.InvariantCulture),
|
||||||
|
cancellationToken);
|
||||||
|
File.Delete(probePath);
|
||||||
|
|
||||||
|
return HealthCheckResult.Healthy("Local blob storage is writable.");
|
||||||
|
}
|
||||||
|
catch (Exception ex) when (ex is IOException or UnauthorizedAccessException)
|
||||||
|
{
|
||||||
|
return HealthCheckResult.Unhealthy("Local blob storage is not writable.", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,161 @@
|
|||||||
|
using System.Text.Json;
|
||||||
|
using Microsoft.AspNetCore.Diagnostics.HealthChecks;
|
||||||
|
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||||
|
using Npgsql;
|
||||||
|
using OpenTelemetry.Logs;
|
||||||
|
using OpenTelemetry.Metrics;
|
||||||
|
using OpenTelemetry.Resources;
|
||||||
|
using OpenTelemetry.Trace;
|
||||||
|
|
||||||
|
namespace Socialize.Api.Infrastructure.Observability;
|
||||||
|
|
||||||
|
internal static class ObservabilityRegistration
|
||||||
|
{
|
||||||
|
private const string DefaultServiceName = "socialize-api";
|
||||||
|
|
||||||
|
public static WebApplicationBuilder AddObservability(this WebApplicationBuilder builder)
|
||||||
|
{
|
||||||
|
string serviceName = GetConfigurationValue(builder.Configuration, "OTEL_SERVICE_NAME", DefaultServiceName);
|
||||||
|
string serviceVersion = typeof(Program).Assembly.GetName().Version?.ToString() ?? "unknown";
|
||||||
|
|
||||||
|
builder.Logging.Configure(options =>
|
||||||
|
{
|
||||||
|
options.ActivityTrackingOptions =
|
||||||
|
ActivityTrackingOptions.TraceId |
|
||||||
|
ActivityTrackingOptions.SpanId |
|
||||||
|
ActivityTrackingOptions.ParentId;
|
||||||
|
});
|
||||||
|
|
||||||
|
builder.Logging.AddJsonConsole(options =>
|
||||||
|
{
|
||||||
|
options.IncludeScopes = true;
|
||||||
|
options.TimestampFormat = "yyyy-MM-ddTHH:mm:ss.fffZ";
|
||||||
|
options.UseUtcTimestamp = true;
|
||||||
|
options.JsonWriterOptions = new JsonWriterOptions { Indented = false };
|
||||||
|
});
|
||||||
|
|
||||||
|
bool otlpEnabled = HasOtlpEndpoint(builder.Configuration);
|
||||||
|
if (otlpEnabled)
|
||||||
|
{
|
||||||
|
builder.Logging.AddOpenTelemetry(options =>
|
||||||
|
{
|
||||||
|
options.IncludeFormattedMessage = true;
|
||||||
|
options.IncludeScopes = true;
|
||||||
|
options.ParseStateValues = true;
|
||||||
|
options.SetResourceBuilder(BuildResource(serviceName, serviceVersion));
|
||||||
|
options.AddOtlpExporter();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
builder.Services.AddSingleton<SocializeMetrics>();
|
||||||
|
builder.Services
|
||||||
|
.AddOpenTelemetry()
|
||||||
|
.ConfigureResource(resource => resource.AddService(
|
||||||
|
serviceName,
|
||||||
|
serviceVersion: serviceVersion))
|
||||||
|
.WithTracing(tracing =>
|
||||||
|
{
|
||||||
|
tracing
|
||||||
|
.AddSource(SocializeMetrics.ActivitySourceName)
|
||||||
|
.AddAspNetCoreInstrumentation(options =>
|
||||||
|
{
|
||||||
|
options.RecordException = true;
|
||||||
|
})
|
||||||
|
.AddHttpClientInstrumentation()
|
||||||
|
.AddNpgsql();
|
||||||
|
|
||||||
|
if (otlpEnabled)
|
||||||
|
{
|
||||||
|
tracing.AddOtlpExporter();
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.WithMetrics(metrics =>
|
||||||
|
{
|
||||||
|
metrics
|
||||||
|
.AddMeter(SocializeMetrics.MeterName)
|
||||||
|
.AddAspNetCoreInstrumentation()
|
||||||
|
.AddHttpClientInstrumentation()
|
||||||
|
.AddRuntimeInstrumentation();
|
||||||
|
|
||||||
|
if (otlpEnabled)
|
||||||
|
{
|
||||||
|
metrics.AddOtlpExporter();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static IApplicationBuilder UseObservabilityLoggingScope(this IApplicationBuilder app)
|
||||||
|
{
|
||||||
|
return app.UseMiddleware<RequestLoggingScopeMiddleware>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static IEndpointRouteBuilder MapObservabilityHealthChecks(this IEndpointRouteBuilder endpoints)
|
||||||
|
{
|
||||||
|
endpoints.MapHealthChecks(
|
||||||
|
"/health",
|
||||||
|
new HealthCheckOptions { ResponseWriter = WriteHealthResponseAsync });
|
||||||
|
endpoints.MapHealthChecks(
|
||||||
|
"/health/live",
|
||||||
|
new HealthCheckOptions
|
||||||
|
{
|
||||||
|
Predicate = registration => registration.Tags.Contains("live", StringComparer.Ordinal),
|
||||||
|
ResponseWriter = WriteHealthResponseAsync,
|
||||||
|
});
|
||||||
|
endpoints.MapHealthChecks(
|
||||||
|
"/health/ready",
|
||||||
|
new HealthCheckOptions
|
||||||
|
{
|
||||||
|
Predicate = registration => registration.Tags.Contains("ready", StringComparer.Ordinal),
|
||||||
|
ResponseWriter = WriteHealthResponseAsync,
|
||||||
|
});
|
||||||
|
|
||||||
|
return endpoints;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ResourceBuilder BuildResource(string serviceName, string serviceVersion)
|
||||||
|
{
|
||||||
|
return ResourceBuilder.CreateDefault().AddService(
|
||||||
|
serviceName,
|
||||||
|
serviceVersion: serviceVersion);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static bool HasOtlpEndpoint(ConfigurationManager configuration)
|
||||||
|
{
|
||||||
|
return !string.IsNullOrWhiteSpace(configuration["OTEL_EXPORTER_OTLP_ENDPOINT"]) ||
|
||||||
|
!string.IsNullOrWhiteSpace(configuration["Otlp:Endpoint"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string GetConfigurationValue(
|
||||||
|
ConfigurationManager configuration,
|
||||||
|
string key,
|
||||||
|
string fallback)
|
||||||
|
{
|
||||||
|
string? value = configuration[key];
|
||||||
|
return string.IsNullOrWhiteSpace(value) ? fallback : value;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static async Task WriteHealthResponseAsync(HttpContext context, HealthReport report)
|
||||||
|
{
|
||||||
|
context.Response.ContentType = "application/json";
|
||||||
|
|
||||||
|
var response = new
|
||||||
|
{
|
||||||
|
status = report.Status.ToString(),
|
||||||
|
checks = report.Entries.Select(entry => new
|
||||||
|
{
|
||||||
|
name = entry.Key,
|
||||||
|
status = entry.Value.Status.ToString(),
|
||||||
|
description = entry.Value.Description,
|
||||||
|
duration = entry.Value.Duration.TotalMilliseconds,
|
||||||
|
}),
|
||||||
|
duration = report.TotalDuration.TotalMilliseconds,
|
||||||
|
};
|
||||||
|
|
||||||
|
await JsonSerializer.SerializeAsync(
|
||||||
|
context.Response.Body,
|
||||||
|
response,
|
||||||
|
cancellationToken: context.RequestAborted);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,61 @@
|
|||||||
|
using System.Diagnostics;
|
||||||
|
using Socialize.Api.Infrastructure.Security;
|
||||||
|
|
||||||
|
namespace Socialize.Api.Infrastructure.Observability;
|
||||||
|
|
||||||
|
internal sealed class RequestLoggingScopeMiddleware(
|
||||||
|
RequestDelegate next,
|
||||||
|
ILogger<RequestLoggingScopeMiddleware> logger)
|
||||||
|
{
|
||||||
|
public async Task InvokeAsync(HttpContext context)
|
||||||
|
{
|
||||||
|
Dictionary<string, object?> scope = new()
|
||||||
|
{
|
||||||
|
["trace_id"] = Activity.Current?.TraceId.ToString() ?? context.TraceIdentifier,
|
||||||
|
["span_id"] = Activity.Current?.SpanId.ToString(),
|
||||||
|
["http.method"] = context.Request.Method,
|
||||||
|
["url.path"] = context.Request.Path.Value,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (context.User.Identity?.IsAuthenticated == true)
|
||||||
|
{
|
||||||
|
scope["user.id"] = context.User.GetUserId();
|
||||||
|
scope["user.email"] = context.User.GetEmail();
|
||||||
|
}
|
||||||
|
|
||||||
|
AddGuidIfPresent(scope, "organization.id", context, "organizationId");
|
||||||
|
AddGuidIfPresent(scope, "workspace.id", context, "workspaceId");
|
||||||
|
AddGuidIfPresent(scope, "client.id", context, "clientId");
|
||||||
|
AddGuidIfPresent(scope, "campaign.id", context, "campaignId");
|
||||||
|
AddGuidIfPresent(scope, "content_item.id", context, "contentItemId");
|
||||||
|
|
||||||
|
using IDisposable? _ = logger.BeginScope(scope);
|
||||||
|
await next(context);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void AddGuidIfPresent(
|
||||||
|
Dictionary<string, object?> scope,
|
||||||
|
string scopeKey,
|
||||||
|
HttpContext context,
|
||||||
|
string requestKey)
|
||||||
|
{
|
||||||
|
string? value = GetRouteOrQueryValue(context, requestKey);
|
||||||
|
if (Guid.TryParse(value, out Guid id))
|
||||||
|
{
|
||||||
|
scope[scopeKey] = id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string? GetRouteOrQueryValue(HttpContext context, string key)
|
||||||
|
{
|
||||||
|
object? routeValue = context.Request.RouteValues[key];
|
||||||
|
if (routeValue is not null)
|
||||||
|
{
|
||||||
|
return Convert.ToString(routeValue, System.Globalization.CultureInfo.InvariantCulture);
|
||||||
|
}
|
||||||
|
|
||||||
|
return context.Request.Query.TryGetValue(key, out Microsoft.Extensions.Primitives.StringValues queryValue)
|
||||||
|
? queryValue.ToString()
|
||||||
|
: null;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,158 @@
|
|||||||
|
using System.Diagnostics;
|
||||||
|
using System.Diagnostics.Metrics;
|
||||||
|
|
||||||
|
namespace Socialize.Api.Infrastructure.Observability;
|
||||||
|
|
||||||
|
internal sealed class SocializeMetrics : IDisposable
|
||||||
|
{
|
||||||
|
public const string MeterName = "Socialize.Api";
|
||||||
|
public const string ActivitySourceName = "Socialize.Api";
|
||||||
|
|
||||||
|
private readonly Counter<long> _approvalDecisionCounter;
|
||||||
|
private readonly Counter<long> _backgroundJobRunCounter;
|
||||||
|
private readonly Counter<long> _blobStorageOperationCounter;
|
||||||
|
private readonly Counter<long> _commentCreatedCounter;
|
||||||
|
private readonly Counter<long> _contentItemCreatedCounter;
|
||||||
|
private readonly Counter<long> _emailDeliveryCounter;
|
||||||
|
private readonly Counter<long> _feedbackSubmittedCounter;
|
||||||
|
private readonly Counter<long> _loginAttemptCounter;
|
||||||
|
private readonly Counter<long> _organizationCreatedCounter;
|
||||||
|
private readonly Counter<long> _workspaceCreatedCounter;
|
||||||
|
private readonly Counter<long> _workspaceInviteCreatedCounter;
|
||||||
|
|
||||||
|
public SocializeMetrics()
|
||||||
|
{
|
||||||
|
Meter = new Meter(MeterName);
|
||||||
|
ActivitySource = new ActivitySource(ActivitySourceName);
|
||||||
|
|
||||||
|
_loginAttemptCounter = Meter.CreateCounter<long>(
|
||||||
|
"socialize.login.attempts",
|
||||||
|
description: "Login attempts partitioned by outcome.");
|
||||||
|
_organizationCreatedCounter = Meter.CreateCounter<long>(
|
||||||
|
"socialize.organizations.created",
|
||||||
|
description: "Organizations created.");
|
||||||
|
_workspaceCreatedCounter = Meter.CreateCounter<long>(
|
||||||
|
"socialize.workspaces.created",
|
||||||
|
description: "Workspaces created.");
|
||||||
|
_contentItemCreatedCounter = Meter.CreateCounter<long>(
|
||||||
|
"socialize.content_items.created",
|
||||||
|
description: "Content items created.");
|
||||||
|
_commentCreatedCounter = Meter.CreateCounter<long>(
|
||||||
|
"socialize.comments.created",
|
||||||
|
description: "Comments created.");
|
||||||
|
_approvalDecisionCounter = Meter.CreateCounter<long>(
|
||||||
|
"socialize.approval_decisions.submitted",
|
||||||
|
description: "Approval decisions submitted.");
|
||||||
|
_feedbackSubmittedCounter = Meter.CreateCounter<long>(
|
||||||
|
"socialize.feedback.submitted",
|
||||||
|
description: "Feedback reports submitted.");
|
||||||
|
_workspaceInviteCreatedCounter = Meter.CreateCounter<long>(
|
||||||
|
"socialize.workspace_invites.created",
|
||||||
|
description: "Workspace invites created.");
|
||||||
|
_emailDeliveryCounter = Meter.CreateCounter<long>(
|
||||||
|
"socialize.email.delivery",
|
||||||
|
description: "Email delivery attempts partitioned by outcome and provider.");
|
||||||
|
_blobStorageOperationCounter = Meter.CreateCounter<long>(
|
||||||
|
"socialize.blob_storage.operations",
|
||||||
|
description: "Blob storage operations partitioned by operation and outcome.");
|
||||||
|
_backgroundJobRunCounter = Meter.CreateCounter<long>(
|
||||||
|
"socialize.background_job.runs",
|
||||||
|
description: "Background job runs partitioned by job and outcome.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Meter Meter { get; }
|
||||||
|
|
||||||
|
public ActivitySource ActivitySource { get; }
|
||||||
|
|
||||||
|
public void RecordLoginAttempt(bool succeeded, string reason)
|
||||||
|
{
|
||||||
|
_loginAttemptCounter.Add(
|
||||||
|
1,
|
||||||
|
new KeyValuePair<string, object?>("outcome", succeeded ? "success" : "failure"),
|
||||||
|
new KeyValuePair<string, object?>("reason", reason));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void RecordOrganizationCreated(Guid organizationId)
|
||||||
|
{
|
||||||
|
_organizationCreatedCounter.Add(
|
||||||
|
1,
|
||||||
|
new KeyValuePair<string, object?>("organization.id", organizationId));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void RecordWorkspaceCreated(Guid organizationId, Guid workspaceId)
|
||||||
|
{
|
||||||
|
_workspaceCreatedCounter.Add(
|
||||||
|
1,
|
||||||
|
new KeyValuePair<string, object?>("organization.id", organizationId),
|
||||||
|
new KeyValuePair<string, object?>("workspace.id", workspaceId));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void RecordContentItemCreated(Guid workspaceId)
|
||||||
|
{
|
||||||
|
_contentItemCreatedCounter.Add(
|
||||||
|
1,
|
||||||
|
new KeyValuePair<string, object?>("workspace.id", workspaceId));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void RecordCommentCreated(Guid workspaceId, bool hasAttachment)
|
||||||
|
{
|
||||||
|
_commentCreatedCounter.Add(
|
||||||
|
1,
|
||||||
|
new KeyValuePair<string, object?>("workspace.id", workspaceId),
|
||||||
|
new KeyValuePair<string, object?>("has_attachment", hasAttachment));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void RecordApprovalDecisionSubmitted(Guid workspaceId, string decision)
|
||||||
|
{
|
||||||
|
_approvalDecisionCounter.Add(
|
||||||
|
1,
|
||||||
|
new KeyValuePair<string, object?>("workspace.id", workspaceId),
|
||||||
|
new KeyValuePair<string, object?>("decision", decision));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void RecordFeedbackSubmitted(string type, Guid? workspaceId)
|
||||||
|
{
|
||||||
|
_feedbackSubmittedCounter.Add(
|
||||||
|
1,
|
||||||
|
new KeyValuePair<string, object?>("feedback.type", type),
|
||||||
|
new KeyValuePair<string, object?>("workspace.id", workspaceId?.ToString() ?? "none"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void RecordWorkspaceInviteCreated(Guid workspaceId, string role)
|
||||||
|
{
|
||||||
|
_workspaceInviteCreatedCounter.Add(
|
||||||
|
1,
|
||||||
|
new KeyValuePair<string, object?>("workspace.id", workspaceId),
|
||||||
|
new KeyValuePair<string, object?>("role", role));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void RecordEmailDelivery(string provider, bool succeeded)
|
||||||
|
{
|
||||||
|
_emailDeliveryCounter.Add(
|
||||||
|
1,
|
||||||
|
new KeyValuePair<string, object?>("provider", provider),
|
||||||
|
new KeyValuePair<string, object?>("outcome", succeeded ? "success" : "failure"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void RecordBlobStorageOperation(string operation, bool succeeded)
|
||||||
|
{
|
||||||
|
_blobStorageOperationCounter.Add(
|
||||||
|
1,
|
||||||
|
new KeyValuePair<string, object?>("operation", operation),
|
||||||
|
new KeyValuePair<string, object?>("outcome", succeeded ? "success" : "failure"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void RecordBackgroundJobRun(string job, bool succeeded)
|
||||||
|
{
|
||||||
|
_backgroundJobRunCounter.Add(
|
||||||
|
1,
|
||||||
|
new KeyValuePair<string, object?>("job", job),
|
||||||
|
new KeyValuePair<string, object?>("outcome", succeeded ? "success" : "failure"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
Meter.Dispose();
|
||||||
|
ActivitySource.Dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
using FastEndpoints;
|
using FastEndpoints;
|
||||||
using Microsoft.EntityFrameworkCore;
|
using Microsoft.EntityFrameworkCore;
|
||||||
using Socialize.Api.Data;
|
using Socialize.Api.Data;
|
||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
using Socialize.Api.Infrastructure.Security;
|
using Socialize.Api.Infrastructure.Security;
|
||||||
using Socialize.Api.Modules.ContentItems.Data;
|
using Socialize.Api.Modules.ContentItems.Data;
|
||||||
using Socialize.Api.Modules.ContentItems.Contracts;
|
using Socialize.Api.Modules.ContentItems.Contracts;
|
||||||
@@ -37,7 +38,8 @@ internal class SubmitApprovalDecisionHandler(
|
|||||||
AccessScopeService accessScopeService,
|
AccessScopeService accessScopeService,
|
||||||
ApprovalWorkflowRuntimeService approvalWorkflowRuntimeService,
|
ApprovalWorkflowRuntimeService approvalWorkflowRuntimeService,
|
||||||
IContentItemActivityWriter activityWriter,
|
IContentItemActivityWriter activityWriter,
|
||||||
INotificationEventWriter notificationEventWriter)
|
INotificationEventWriter notificationEventWriter,
|
||||||
|
SocializeMetrics metrics)
|
||||||
: Endpoint<SubmitApprovalDecisionRequest, ApprovalRequestDto>
|
: Endpoint<SubmitApprovalDecisionRequest, ApprovalRequestDto>
|
||||||
{
|
{
|
||||||
public override void Configure()
|
public override void Configure()
|
||||||
@@ -157,6 +159,7 @@ internal class SubmitApprovalDecisionHandler(
|
|||||||
$$"""{"stage":"{{approval.Stage}}","status":"{{contentItem.Status}}"}"""),
|
$$"""{"stage":"{{approval.Stage}}","status":"{{contentItem.Status}}"}"""),
|
||||||
ct);
|
ct);
|
||||||
}
|
}
|
||||||
|
metrics.RecordApprovalDecisionSubmitted(approval.WorkspaceId, normalizedDecision);
|
||||||
|
|
||||||
List<ApprovalDecision> decisions = await dbContext.ApprovalDecisions
|
List<ApprovalDecision> decisions = await dbContext.ApprovalDecisions
|
||||||
.Where(candidate => candidate.ApprovalRequestId == approval.Id)
|
.Where(candidate => candidate.ApprovalRequestId == approval.Id)
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
|
|
||||||
namespace Socialize.Api.Modules.CalendarIntegrations.Services;
|
namespace Socialize.Api.Modules.CalendarIntegrations.Services;
|
||||||
|
|
||||||
internal sealed class CalendarImportBackgroundService(
|
internal sealed class CalendarImportBackgroundService(
|
||||||
IServiceScopeFactory scopeFactory,
|
IServiceScopeFactory scopeFactory,
|
||||||
|
SocializeMetrics metrics,
|
||||||
ILogger<CalendarImportBackgroundService> logger)
|
ILogger<CalendarImportBackgroundService> logger)
|
||||||
: BackgroundService
|
: BackgroundService
|
||||||
{
|
{
|
||||||
@@ -22,6 +25,7 @@ internal sealed class CalendarImportBackgroundService(
|
|||||||
using IServiceScope scope = scopeFactory.CreateScope();
|
using IServiceScope scope = scopeFactory.CreateScope();
|
||||||
CalendarImportSyncService syncService = scope.ServiceProvider.GetRequiredService<CalendarImportSyncService>();
|
CalendarImportSyncService syncService = scope.ServiceProvider.GetRequiredService<CalendarImportSyncService>();
|
||||||
await syncService.RefreshDueSourcesAsync(stoppingToken);
|
await syncService.RefreshDueSourcesAsync(stoppingToken);
|
||||||
|
metrics.RecordBackgroundJobRun(nameof(CalendarImportBackgroundService), true);
|
||||||
}
|
}
|
||||||
catch (OperationCanceledException ex) when (stoppingToken.IsCancellationRequested)
|
catch (OperationCanceledException ex) when (stoppingToken.IsCancellationRequested)
|
||||||
{
|
{
|
||||||
@@ -30,6 +34,7 @@ internal sealed class CalendarImportBackgroundService(
|
|||||||
#pragma warning disable CA1031 // Background service should log and continue after unexpected sync failures.
|
#pragma warning disable CA1031 // Background service should log and continue after unexpected sync failures.
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
|
metrics.RecordBackgroundJobRun(nameof(CalendarImportBackgroundService), false);
|
||||||
logger.LogError(ex, "Calendar import background sync failed.");
|
logger.LogError(ex, "Calendar import background sync failed.");
|
||||||
}
|
}
|
||||||
#pragma warning restore CA1031
|
#pragma warning restore CA1031
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ using FastEndpoints;
|
|||||||
using Microsoft.EntityFrameworkCore;
|
using Microsoft.EntityFrameworkCore;
|
||||||
using Socialize.Api.Data;
|
using Socialize.Api.Data;
|
||||||
using Socialize.Api.Infrastructure.BlobStorage.Contracts;
|
using Socialize.Api.Infrastructure.BlobStorage.Contracts;
|
||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
using Socialize.Api.Infrastructure.Security;
|
using Socialize.Api.Infrastructure.Security;
|
||||||
using Socialize.Api.Modules.ContentItems.Contracts;
|
using Socialize.Api.Modules.ContentItems.Contracts;
|
||||||
using Socialize.Api.Modules.ContentItems.Data;
|
using Socialize.Api.Modules.ContentItems.Data;
|
||||||
@@ -34,7 +35,8 @@ internal class CreateCommentHandler(
|
|||||||
AccessScopeService accessScopeService,
|
AccessScopeService accessScopeService,
|
||||||
IBlobStorage blobStorage,
|
IBlobStorage blobStorage,
|
||||||
IContentItemActivityWriter activityWriter,
|
IContentItemActivityWriter activityWriter,
|
||||||
INotificationEventWriter notificationEventWriter)
|
INotificationEventWriter notificationEventWriter,
|
||||||
|
SocializeMetrics metrics)
|
||||||
: Endpoint<CreateCommentRequest, CommentDto>
|
: Endpoint<CreateCommentRequest, CommentDto>
|
||||||
{
|
{
|
||||||
public override void Configure()
|
public override void Configure()
|
||||||
@@ -156,6 +158,7 @@ internal class CreateCommentHandler(
|
|||||||
|
|
||||||
dbContext.Comments.Add(comment);
|
dbContext.Comments.Add(comment);
|
||||||
await dbContext.SaveChangesAsync(ct);
|
await dbContext.SaveChangesAsync(ct);
|
||||||
|
metrics.RecordCommentCreated(comment.WorkspaceId, comment.AttachmentBlobName is not null);
|
||||||
|
|
||||||
string? authorPortraitUrl = await dbContext.Users
|
string? authorPortraitUrl = await dbContext.Users
|
||||||
.Where(candidate => candidate.Id == comment.AuthorUserId)
|
.Where(candidate => candidate.Id == comment.AuthorUserId)
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
using FastEndpoints;
|
using FastEndpoints;
|
||||||
using Microsoft.EntityFrameworkCore;
|
using Microsoft.EntityFrameworkCore;
|
||||||
using Socialize.Api.Data;
|
using Socialize.Api.Data;
|
||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
using Socialize.Api.Infrastructure.Security;
|
using Socialize.Api.Infrastructure.Security;
|
||||||
using Socialize.Api.Modules.ContentItems.Contracts;
|
using Socialize.Api.Modules.ContentItems.Contracts;
|
||||||
using Socialize.Api.Modules.Notifications.Contracts;
|
using Socialize.Api.Modules.Notifications.Contracts;
|
||||||
@@ -39,7 +40,8 @@ internal class CreateContentItemHandler(
|
|||||||
AppDbContext dbContext,
|
AppDbContext dbContext,
|
||||||
AccessScopeService accessScopeService,
|
AccessScopeService accessScopeService,
|
||||||
IContentItemActivityWriter activityWriter,
|
IContentItemActivityWriter activityWriter,
|
||||||
INotificationEventWriter notificationEventWriter)
|
INotificationEventWriter notificationEventWriter,
|
||||||
|
SocializeMetrics metrics)
|
||||||
: Endpoint<CreateContentItemRequest, ContentItemDto>
|
: Endpoint<CreateContentItemRequest, ContentItemDto>
|
||||||
{
|
{
|
||||||
public override void Configure()
|
public override void Configure()
|
||||||
@@ -123,6 +125,7 @@ internal class CreateContentItemHandler(
|
|||||||
CreatedAt = DateTimeOffset.UtcNow,
|
CreatedAt = DateTimeOffset.UtcNow,
|
||||||
});
|
});
|
||||||
await dbContext.SaveChangesAsync(ct);
|
await dbContext.SaveChangesAsync(ct);
|
||||||
|
metrics.RecordContentItemCreated(item.WorkspaceId);
|
||||||
|
|
||||||
await activityWriter.WriteAsync(
|
await activityWriter.WriteAsync(
|
||||||
new ContentItemActivityWriteModel(
|
new ContentItemActivityWriteModel(
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
using FastEndpoints;
|
using FastEndpoints;
|
||||||
using Socialize.Api.Data;
|
using Socialize.Api.Data;
|
||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
using Socialize.Api.Infrastructure.Security;
|
using Socialize.Api.Infrastructure.Security;
|
||||||
using Socialize.Api.Modules.Feedback.Contracts;
|
using Socialize.Api.Modules.Feedback.Contracts;
|
||||||
using Socialize.Api.Modules.Feedback.Data;
|
using Socialize.Api.Modules.Feedback.Data;
|
||||||
@@ -45,7 +46,8 @@ internal class SubmitFeedbackRequestValidator
|
|||||||
|
|
||||||
internal class SubmitFeedbackHandler(
|
internal class SubmitFeedbackHandler(
|
||||||
AppDbContext dbContext,
|
AppDbContext dbContext,
|
||||||
FeedbackNotificationService notificationService)
|
FeedbackNotificationService notificationService,
|
||||||
|
SocializeMetrics metrics)
|
||||||
: Endpoint<SubmitFeedbackRequest, FeedbackReportDto>
|
: Endpoint<SubmitFeedbackRequest, FeedbackReportDto>
|
||||||
{
|
{
|
||||||
public override void Configure()
|
public override void Configure()
|
||||||
@@ -93,6 +95,7 @@ internal class SubmitFeedbackHandler(
|
|||||||
dbContext.FeedbackReports.Add(report);
|
dbContext.FeedbackReports.Add(report);
|
||||||
await notificationService.AddNewReportNotificationsAsync(report, ct);
|
await notificationService.AddNewReportNotificationsAsync(report, ct);
|
||||||
await dbContext.SaveChangesAsync(ct);
|
await dbContext.SaveChangesAsync(ct);
|
||||||
|
metrics.RecordFeedbackSubmitted(report.Type.ToString(), report.WorkspaceId);
|
||||||
|
|
||||||
await SendAsync(report.ToDto(), StatusCodes.Status201Created, ct);
|
await SendAsync(report.ToDto(), StatusCodes.Status201Created, ct);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
using FastEndpoints;
|
using FastEndpoints;
|
||||||
using Microsoft.Extensions.Options;
|
using Microsoft.Extensions.Options;
|
||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
using Socialize.Api.Infrastructure.Security;
|
using Socialize.Api.Infrastructure.Security;
|
||||||
using Socialize.Api.Modules.Identity.Data;
|
using Socialize.Api.Modules.Identity.Data;
|
||||||
using Socialize.Api.Modules.Identity.Configuration;
|
using Socialize.Api.Modules.Identity.Configuration;
|
||||||
@@ -21,7 +22,8 @@ internal record LoginResponse(
|
|||||||
internal class LoginHandler(
|
internal class LoginHandler(
|
||||||
UserManager userManager,
|
UserManager userManager,
|
||||||
IOptionsSnapshot<JwtOptions> jwtOptions,
|
IOptionsSnapshot<JwtOptions> jwtOptions,
|
||||||
AccessTokenFactory accessTokenFactory)
|
AccessTokenFactory accessTokenFactory,
|
||||||
|
SocializeMetrics metrics)
|
||||||
: Endpoint<LoginRequest, LoginResponse>
|
: Endpoint<LoginRequest, LoginResponse>
|
||||||
{
|
{
|
||||||
public override void Configure()
|
public override void Configure()
|
||||||
@@ -40,6 +42,7 @@ internal class LoginHandler(
|
|||||||
user ??= await userManager.FindByNameAsync(request.Email);
|
user ??= await userManager.FindByNameAsync(request.Email);
|
||||||
if (user is null)
|
if (user is null)
|
||||||
{
|
{
|
||||||
|
metrics.RecordLoginAttempt(false, "unknown_user");
|
||||||
await SendStringAsync(
|
await SendStringAsync(
|
||||||
"Invalid email or password",
|
"Invalid email or password",
|
||||||
401,
|
401,
|
||||||
@@ -51,6 +54,7 @@ internal class LoginHandler(
|
|||||||
bool isPasswordValid = await userManager.CheckPasswordAsync(user, request.Password);
|
bool isPasswordValid = await userManager.CheckPasswordAsync(user, request.Password);
|
||||||
if (!isPasswordValid)
|
if (!isPasswordValid)
|
||||||
{
|
{
|
||||||
|
metrics.RecordLoginAttempt(false, "invalid_password");
|
||||||
await SendStringAsync(
|
await SendStringAsync(
|
||||||
"Invalid email or password",
|
"Invalid email or password",
|
||||||
401,
|
401,
|
||||||
@@ -61,6 +65,7 @@ internal class LoginHandler(
|
|||||||
// Check if the email is confirmed
|
// Check if the email is confirmed
|
||||||
if (!user.EmailConfirmed)
|
if (!user.EmailConfirmed)
|
||||||
{
|
{
|
||||||
|
metrics.RecordLoginAttempt(false, "email_unconfirmed");
|
||||||
await SendStringAsync(
|
await SendStringAsync(
|
||||||
"Email not verified. Please check your email for verification instructions.",
|
"Email not verified. Please check your email for verification instructions.",
|
||||||
401,
|
401,
|
||||||
@@ -76,6 +81,7 @@ internal class LoginHandler(
|
|||||||
|
|
||||||
// Generate JWT token
|
// Generate JWT token
|
||||||
string accessToken = await accessTokenFactory.CreateAsync(user);
|
string accessToken = await accessTokenFactory.CreateAsync(user);
|
||||||
|
metrics.RecordLoginAttempt(true, "success");
|
||||||
|
|
||||||
await SendOkAsync(
|
await SendOkAsync(
|
||||||
new LoginResponse(accessToken, user.RefreshToken),
|
new LoginResponse(accessToken, user.RefreshToken),
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
using FastEndpoints;
|
using FastEndpoints;
|
||||||
using Microsoft.EntityFrameworkCore;
|
using Microsoft.EntityFrameworkCore;
|
||||||
using Socialize.Api.Data;
|
using Socialize.Api.Data;
|
||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
using Socialize.Api.Infrastructure.Security;
|
using Socialize.Api.Infrastructure.Security;
|
||||||
using Socialize.Api.Modules.Organizations.Data;
|
using Socialize.Api.Modules.Organizations.Data;
|
||||||
using Socialize.Api.Modules.Organizations.Services;
|
using Socialize.Api.Modules.Organizations.Services;
|
||||||
@@ -21,7 +22,8 @@ internal class CreateOrganizationRequestValidator
|
|||||||
}
|
}
|
||||||
|
|
||||||
internal class CreateOrganizationHandler(
|
internal class CreateOrganizationHandler(
|
||||||
AppDbContext dbContext)
|
AppDbContext dbContext,
|
||||||
|
SocializeMetrics metrics)
|
||||||
: Endpoint<CreateOrganizationRequest, OrganizationDto>
|
: Endpoint<CreateOrganizationRequest, OrganizationDto>
|
||||||
{
|
{
|
||||||
public override void Configure()
|
public override void Configure()
|
||||||
@@ -66,6 +68,7 @@ internal class CreateOrganizationHandler(
|
|||||||
dbContext.Organizations.Add(organization);
|
dbContext.Organizations.Add(organization);
|
||||||
dbContext.OrganizationMemberships.Add(ownerMembership);
|
dbContext.OrganizationMemberships.Add(ownerMembership);
|
||||||
await dbContext.SaveChangesAsync(ct);
|
await dbContext.SaveChangesAsync(ct);
|
||||||
|
metrics.RecordOrganizationCreated(organization.Id);
|
||||||
|
|
||||||
await SendAsync(
|
await SendAsync(
|
||||||
OrganizationDto.FromOrganization(
|
OrganizationDto.FromOrganization(
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
using Microsoft.Extensions.Options;
|
using Microsoft.Extensions.Options;
|
||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
using Socialize.Api.Modules.ReleaseCommunications.Configuration;
|
using Socialize.Api.Modules.ReleaseCommunications.Configuration;
|
||||||
|
|
||||||
namespace Socialize.Api.Modules.ReleaseCommunications.Services;
|
namespace Socialize.Api.Modules.ReleaseCommunications.Services;
|
||||||
@@ -6,6 +7,7 @@ namespace Socialize.Api.Modules.ReleaseCommunications.Services;
|
|||||||
internal sealed class ReleaseUpdateEmailDigestBackgroundService(
|
internal sealed class ReleaseUpdateEmailDigestBackgroundService(
|
||||||
IServiceScopeFactory scopeFactory,
|
IServiceScopeFactory scopeFactory,
|
||||||
IOptions<ReleaseCommunicationEmailOptions> options,
|
IOptions<ReleaseCommunicationEmailOptions> options,
|
||||||
|
SocializeMetrics metrics,
|
||||||
ILogger<ReleaseUpdateEmailDigestBackgroundService> logger)
|
ILogger<ReleaseUpdateEmailDigestBackgroundService> logger)
|
||||||
: BackgroundService
|
: BackgroundService
|
||||||
{
|
{
|
||||||
@@ -42,6 +44,7 @@ internal sealed class ReleaseUpdateEmailDigestBackgroundService(
|
|||||||
TimeSpan.FromHours(options.Value.DigestIntervalHours),
|
TimeSpan.FromHours(options.Value.DigestIntervalHours),
|
||||||
force: false,
|
force: false,
|
||||||
ct: stoppingToken);
|
ct: stoppingToken);
|
||||||
|
metrics.RecordBackgroundJobRun(nameof(ReleaseUpdateEmailDigestBackgroundService), true);
|
||||||
if (sentCount > 0 && logger.IsEnabled(LogLevel.Information))
|
if (sentCount > 0 && logger.IsEnabled(LogLevel.Information))
|
||||||
{
|
{
|
||||||
logger.LogInformation("Sent {SentCount} release update digest emails.", sentCount);
|
logger.LogInformation("Sent {SentCount} release update digest emails.", sentCount);
|
||||||
@@ -54,6 +57,7 @@ internal sealed class ReleaseUpdateEmailDigestBackgroundService(
|
|||||||
#pragma warning disable CA1031
|
#pragma warning disable CA1031
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
|
metrics.RecordBackgroundJobRun(nameof(ReleaseUpdateEmailDigestBackgroundService), false);
|
||||||
logger.LogError(ex, "Release update digest service failed.");
|
logger.LogError(ex, "Release update digest service failed.");
|
||||||
}
|
}
|
||||||
#pragma warning restore CA1031
|
#pragma warning restore CA1031
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
using FastEndpoints;
|
using FastEndpoints;
|
||||||
using Microsoft.EntityFrameworkCore;
|
using Microsoft.EntityFrameworkCore;
|
||||||
using Socialize.Api.Data;
|
using Socialize.Api.Data;
|
||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
using Socialize.Api.Infrastructure.Security;
|
using Socialize.Api.Infrastructure.Security;
|
||||||
using Socialize.Api.Modules.Workspaces.Data;
|
using Socialize.Api.Modules.Workspaces.Data;
|
||||||
|
|
||||||
@@ -24,7 +25,8 @@ internal class CreateWorkspaceRequestValidator
|
|||||||
|
|
||||||
internal class CreateWorkspaceHandler(
|
internal class CreateWorkspaceHandler(
|
||||||
AppDbContext dbContext,
|
AppDbContext dbContext,
|
||||||
AccessScopeService accessScopeService)
|
AccessScopeService accessScopeService,
|
||||||
|
SocializeMetrics metrics)
|
||||||
: Endpoint<CreateWorkspaceRequest, WorkspaceDto>
|
: Endpoint<CreateWorkspaceRequest, WorkspaceDto>
|
||||||
{
|
{
|
||||||
public override void Configure()
|
public override void Configure()
|
||||||
@@ -65,6 +67,7 @@ internal class CreateWorkspaceHandler(
|
|||||||
|
|
||||||
dbContext.Workspaces.Add(workspace);
|
dbContext.Workspaces.Add(workspace);
|
||||||
await dbContext.SaveChangesAsync(ct);
|
await dbContext.SaveChangesAsync(ct);
|
||||||
|
metrics.RecordWorkspaceCreated(workspace.OrganizationId, workspace.Id);
|
||||||
|
|
||||||
WorkspaceDto dto = WorkspaceDto.FromWorkspace(workspace, []);
|
WorkspaceDto dto = WorkspaceDto.FromWorkspace(workspace, []);
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
using FastEndpoints;
|
using FastEndpoints;
|
||||||
using Microsoft.EntityFrameworkCore;
|
using Microsoft.EntityFrameworkCore;
|
||||||
using Socialize.Api.Data;
|
using Socialize.Api.Data;
|
||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
using Socialize.Api.Infrastructure.Security;
|
using Socialize.Api.Infrastructure.Security;
|
||||||
using Socialize.Api.Modules.Identity.Contracts;
|
using Socialize.Api.Modules.Identity.Contracts;
|
||||||
using Socialize.Api.Modules.Workspaces.Data;
|
using Socialize.Api.Modules.Workspaces.Data;
|
||||||
@@ -31,7 +32,8 @@ internal class CreateWorkspaceInviteRequestValidator
|
|||||||
|
|
||||||
internal class CreateWorkspaceInviteHandler(
|
internal class CreateWorkspaceInviteHandler(
|
||||||
AppDbContext dbContext,
|
AppDbContext dbContext,
|
||||||
AccessScopeService accessScopeService)
|
AccessScopeService accessScopeService,
|
||||||
|
SocializeMetrics metrics)
|
||||||
: Endpoint<CreateWorkspaceInviteRequest, WorkspaceInviteDto>
|
: Endpoint<CreateWorkspaceInviteRequest, WorkspaceInviteDto>
|
||||||
{
|
{
|
||||||
public override void Configure()
|
public override void Configure()
|
||||||
@@ -91,6 +93,7 @@ internal class CreateWorkspaceInviteHandler(
|
|||||||
|
|
||||||
dbContext.WorkspaceInvites.Add(invite);
|
dbContext.WorkspaceInvites.Add(invite);
|
||||||
await dbContext.SaveChangesAsync(ct);
|
await dbContext.SaveChangesAsync(ct);
|
||||||
|
metrics.RecordWorkspaceInviteCreated(invite.WorkspaceId, invite.Role);
|
||||||
|
|
||||||
await SendAsync(
|
await SendAsync(
|
||||||
new WorkspaceInviteDto(
|
new WorkspaceInviteDto(
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ using Socialize;
|
|||||||
using Socialize.Api.Infrastructure.BlobStorage.Configuration;
|
using Socialize.Api.Infrastructure.BlobStorage.Configuration;
|
||||||
using Socialize.Api.Infrastructure.BlobStorage.Services;
|
using Socialize.Api.Infrastructure.BlobStorage.Services;
|
||||||
using Socialize.Api.Infrastructure;
|
using Socialize.Api.Infrastructure;
|
||||||
|
using Socialize.Api.Infrastructure.Observability;
|
||||||
using Socialize.Api.Infrastructure.TestData;
|
using Socialize.Api.Infrastructure.TestData;
|
||||||
using Socialize.Api.Modules.Approvals;
|
using Socialize.Api.Modules.Approvals;
|
||||||
using Socialize.Api.Modules.Assets;
|
using Socialize.Api.Modules.Assets;
|
||||||
@@ -44,6 +45,8 @@ builder.Services.AddCors(options =>
|
|||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
builder.AddObservability();
|
||||||
|
|
||||||
// Add services to the container.
|
// Add services to the container.
|
||||||
builder.Services.AddWebServices();
|
builder.Services.AddWebServices();
|
||||||
builder.Services.AddAuthorizationAndAuthentication(builder.Configuration);
|
builder.Services.AddAuthorizationAndAuthentication(builder.Configuration);
|
||||||
@@ -110,6 +113,7 @@ app.UseCors("AllowAll");
|
|||||||
|
|
||||||
app.UseAuthentication();
|
app.UseAuthentication();
|
||||||
app.UseAuthorization();
|
app.UseAuthorization();
|
||||||
|
app.UseObservabilityLoggingScope();
|
||||||
|
|
||||||
// Initialize and seed the db.
|
// Initialize and seed the db.
|
||||||
await app.UseAppDataAsync();
|
await app.UseAppDataAsync();
|
||||||
@@ -122,7 +126,7 @@ if (!app.Environment.IsDevelopment())
|
|||||||
app.UseHsts();
|
app.UseHsts();
|
||||||
}
|
}
|
||||||
|
|
||||||
app.UseHealthChecks("/health");
|
app.MapObservabilityHealthChecks();
|
||||||
|
|
||||||
LocalBlobStorageOptions localBlobStorageOptions = app.Services
|
LocalBlobStorageOptions localBlobStorageOptions = app.Services
|
||||||
.GetRequiredService<IOptions<LocalBlobStorageOptions>>()
|
.GetRequiredService<IOptions<LocalBlobStorageOptions>>()
|
||||||
|
|||||||
@@ -28,7 +28,13 @@
|
|||||||
<PackageReference Include="Microsoft.EntityFrameworkCore.Relational" Version="10.0.0" />
|
<PackageReference Include="Microsoft.EntityFrameworkCore.Relational" Version="10.0.0" />
|
||||||
<PackageReference Include="Microsoft.Extensions.Diagnostics.HealthChecks.EntityFrameworkCore"
|
<PackageReference Include="Microsoft.Extensions.Diagnostics.HealthChecks.EntityFrameworkCore"
|
||||||
Version="10.0.0" />
|
Version="10.0.0" />
|
||||||
|
<PackageReference Include="Npgsql.OpenTelemetry" Version="10.0.2" />
|
||||||
<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" Version="10.0.0" />
|
<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" Version="10.0.0" />
|
||||||
|
<PackageReference Include="OpenTelemetry.Exporter.OpenTelemetryProtocol" Version="1.15.3" />
|
||||||
|
<PackageReference Include="OpenTelemetry.Extensions.Hosting" Version="1.15.3" />
|
||||||
|
<PackageReference Include="OpenTelemetry.Instrumentation.AspNetCore" Version="1.15.2" />
|
||||||
|
<PackageReference Include="OpenTelemetry.Instrumentation.Http" Version="1.15.1" />
|
||||||
|
<PackageReference Include="OpenTelemetry.Instrumentation.Runtime" Version="1.15.1" />
|
||||||
<PackageReference Include="Microsoft.EntityFrameworkCore.Design" Version="10.0.0">
|
<PackageReference Include="Microsoft.EntityFrameworkCore.Design" Version="10.0.0">
|
||||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||||
<PrivateAssets>all</PrivateAssets>
|
<PrivateAssets>all</PrivateAssets>
|
||||||
|
|||||||
@@ -3,8 +3,9 @@ services:
|
|||||||
image: postgres:16
|
image: postgres:16
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
env_file:
|
env_file:
|
||||||
- /etc/socialize/socialize.env
|
- path: /etc/socialize/socialize.env
|
||||||
- .deploy.env
|
- path: .deploy.env
|
||||||
|
required: false
|
||||||
environment:
|
environment:
|
||||||
POSTGRES_DB: ${POSTGRES_DB}
|
POSTGRES_DB: ${POSTGRES_DB}
|
||||||
POSTGRES_USER: ${POSTGRES_USER}
|
POSTGRES_USER: ${POSTGRES_USER}
|
||||||
@@ -23,8 +24,9 @@ services:
|
|||||||
image: git.mapachotes.com/jbourdon/socialize-api:${SOCIALIZE_IMAGE_TAG}
|
image: git.mapachotes.com/jbourdon/socialize-api:${SOCIALIZE_IMAGE_TAG}
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
env_file:
|
env_file:
|
||||||
- /etc/socialize/socialize.env
|
- path: /etc/socialize/socialize.env
|
||||||
- .deploy.env
|
- path: .deploy.env
|
||||||
|
required: false
|
||||||
environment:
|
environment:
|
||||||
ASPNETCORE_ENVIRONMENT: ${ASPNETCORE_ENVIRONMENT}
|
ASPNETCORE_ENVIRONMENT: ${ASPNETCORE_ENVIRONMENT}
|
||||||
ASPNETCORE_URLS: ${ASPNETCORE_URLS}
|
ASPNETCORE_URLS: ${ASPNETCORE_URLS}
|
||||||
|
|||||||
95
deploy/observability/alloy/config.alloy
Normal file
95
deploy/observability/alloy/config.alloy
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
logging {
|
||||||
|
level = "info"
|
||||||
|
format = "logfmt"
|
||||||
|
}
|
||||||
|
|
||||||
|
otelcol.receiver.otlp "api" {
|
||||||
|
grpc {
|
||||||
|
endpoint = "0.0.0.0:4317"
|
||||||
|
}
|
||||||
|
|
||||||
|
http {
|
||||||
|
endpoint = "0.0.0.0:4318"
|
||||||
|
}
|
||||||
|
|
||||||
|
output {
|
||||||
|
metrics = [otelcol.processor.transform.metric_labels.input]
|
||||||
|
traces = [otelcol.processor.batch.default.input]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
otelcol.processor.transform "metric_labels" {
|
||||||
|
error_mode = "ignore"
|
||||||
|
|
||||||
|
metric_statements {
|
||||||
|
context = "datapoint"
|
||||||
|
statements = [
|
||||||
|
`set(attributes["service.name"], resource.attributes["service.name"])`,
|
||||||
|
`set(attributes["deployment.environment"], resource.attributes["deployment.environment"])`,
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
output {
|
||||||
|
metrics = [otelcol.processor.batch.default.input]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
otelcol.processor.batch "default" {
|
||||||
|
output {
|
||||||
|
metrics = [otelcol.exporter.prometheus.local.input]
|
||||||
|
traces = [otelcol.exporter.otlp.tempo.input]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
otelcol.exporter.prometheus "local" {
|
||||||
|
forward_to = [prometheus.remote_write.local.receiver]
|
||||||
|
}
|
||||||
|
|
||||||
|
prometheus.remote_write "local" {
|
||||||
|
endpoint {
|
||||||
|
url = "http://prometheus:9090/api/v1/write"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
otelcol.exporter.otlp "tempo" {
|
||||||
|
client {
|
||||||
|
endpoint = "tempo:4317"
|
||||||
|
|
||||||
|
tls {
|
||||||
|
insecure = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
discovery.docker "linux" {
|
||||||
|
host = "unix:///var/run/docker.sock"
|
||||||
|
}
|
||||||
|
|
||||||
|
discovery.relabel "docker_logs" {
|
||||||
|
targets = []
|
||||||
|
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_docker_container_name"]
|
||||||
|
regex = "/(.*)"
|
||||||
|
target_label = "service_name"
|
||||||
|
}
|
||||||
|
|
||||||
|
rule {
|
||||||
|
source_labels = ["__meta_docker_container_label_com_docker_compose_service"]
|
||||||
|
target_label = "compose_service"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
loki.source.docker "default" {
|
||||||
|
host = "unix:///var/run/docker.sock"
|
||||||
|
targets = discovery.docker.linux.targets
|
||||||
|
labels = {"platform" = "docker"}
|
||||||
|
relabel_rules = discovery.relabel.docker_logs.rules
|
||||||
|
forward_to = [loki.write.local.receiver]
|
||||||
|
}
|
||||||
|
|
||||||
|
loki.write "local" {
|
||||||
|
endpoint {
|
||||||
|
url = "http://loki:3100/loki/api/v1/push"
|
||||||
|
}
|
||||||
|
}
|
||||||
94
deploy/observability/compose.observability.yml
Normal file
94
deploy/observability/compose.observability.yml
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
services:
|
||||||
|
api:
|
||||||
|
environment:
|
||||||
|
OTEL_SERVICE_NAME: socialize-api
|
||||||
|
OTEL_EXPORTER_OTLP_ENDPOINT: http://alloy:4317
|
||||||
|
OTEL_EXPORTER_OTLP_PROTOCOL: grpc
|
||||||
|
OTEL_RESOURCE_ATTRIBUTES: deployment.environment=preprod
|
||||||
|
depends_on:
|
||||||
|
alloy:
|
||||||
|
condition: service_started
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:13.0.1
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin}
|
||||||
|
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin}
|
||||||
|
GF_USERS_ALLOW_SIGN_UP: "false"
|
||||||
|
volumes:
|
||||||
|
- grafana-data:/var/lib/grafana
|
||||||
|
- ./observability/grafana/provisioning:/etc/grafana/provisioning:ro
|
||||||
|
- ./observability/grafana/dashboards:/var/lib/grafana/dashboards:ro
|
||||||
|
ports:
|
||||||
|
- "${GRAFANA_HTTP_BIND:-127.0.0.1}:3000:3000"
|
||||||
|
depends_on:
|
||||||
|
- prometheus
|
||||||
|
- loki
|
||||||
|
- tempo
|
||||||
|
networks:
|
||||||
|
- internal
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:v3.11.3
|
||||||
|
restart: unless-stopped
|
||||||
|
command:
|
||||||
|
- --config.file=/etc/prometheus/prometheus.yml
|
||||||
|
- --storage.tsdb.path=/prometheus
|
||||||
|
- --storage.tsdb.retention.time=${PROMETHEUS_RETENTION:-15d}
|
||||||
|
- --web.enable-remote-write-receiver
|
||||||
|
volumes:
|
||||||
|
- prometheus-data:/prometheus
|
||||||
|
- ./observability/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||||
|
- ./observability/prometheus/rules:/etc/prometheus/rules:ro
|
||||||
|
networks:
|
||||||
|
- internal
|
||||||
|
|
||||||
|
loki:
|
||||||
|
image: grafana/loki:3.7.1
|
||||||
|
restart: unless-stopped
|
||||||
|
command: -config.file=/etc/loki/local-config.yml
|
||||||
|
volumes:
|
||||||
|
- loki-data:/loki
|
||||||
|
- ./observability/loki/local-config.yml:/etc/loki/local-config.yml:ro
|
||||||
|
networks:
|
||||||
|
- internal
|
||||||
|
|
||||||
|
tempo:
|
||||||
|
image: grafana/tempo:2.10.3
|
||||||
|
restart: unless-stopped
|
||||||
|
command: -config.file=/etc/tempo.yml
|
||||||
|
volumes:
|
||||||
|
- tempo-data:/var/tempo
|
||||||
|
- ./observability/tempo/tempo.yml:/etc/tempo.yml:ro
|
||||||
|
networks:
|
||||||
|
- internal
|
||||||
|
|
||||||
|
alloy:
|
||||||
|
image: grafana/alloy:v1.16.0
|
||||||
|
restart: unless-stopped
|
||||||
|
command:
|
||||||
|
- run
|
||||||
|
- --server.http.listen-addr=0.0.0.0:12345
|
||||||
|
- --storage.path=/var/lib/alloy/data
|
||||||
|
- /etc/alloy/config.alloy
|
||||||
|
volumes:
|
||||||
|
- alloy-data:/var/lib/alloy/data
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||||
|
- ./observability/alloy/config.alloy:/etc/alloy/config.alloy:ro
|
||||||
|
expose:
|
||||||
|
- "4317"
|
||||||
|
- "4318"
|
||||||
|
- "12345"
|
||||||
|
networks:
|
||||||
|
- internal
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
grafana-data:
|
||||||
|
prometheus-data:
|
||||||
|
loki-data:
|
||||||
|
tempo-data:
|
||||||
|
alloy-data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
internal:
|
||||||
413
deploy/observability/grafana/dashboards/socialize-overview.json
Normal file
413
deploy/observability/grafana/dashboards/socialize-overview.json
Normal file
@@ -0,0 +1,413 @@
|
|||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "Prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "short"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 6,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(http_server_request_duration_seconds_count{service_name=\"socialize-api\"}[5m]))",
|
||||||
|
"legendFormat": "requests/sec"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "API Requests/sec",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "Prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "percentunit"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 6,
|
||||||
|
"x": 6,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "area",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(http_server_request_duration_seconds_count{service_name=\"socialize-api\", http_response_status_code=~\"5..\"}[5m])) / clamp_min(sum(rate(http_server_request_duration_seconds_count{service_name=\"socialize-api\"}[5m])), 0.001)",
|
||||||
|
"legendFormat": "5xx rate"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "API 5xx Rate",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "Prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "s"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 6,
|
||||||
|
"x": 12,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "area",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.95, sum by (le) (rate(http_server_request_duration_seconds_bucket{service_name=\"socialize-api\"}[5m])))",
|
||||||
|
"legendFormat": "p95"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "API p95 Latency",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "Prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "short"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 6,
|
||||||
|
"x": 18,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "area",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(ALERTS{alertstate=\"firing\"})",
|
||||||
|
"legendFormat": "firing"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Firing Alerts",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "Prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 4
|
||||||
|
},
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom"
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(http_server_request_duration_seconds_count{service_name=\"socialize-api\"}[5m])) by (http_request_method, http_route)",
|
||||||
|
"legendFormat": "{{http_request_method}} {{http_route}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Request Rate By Endpoint",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "Prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"unit": "s"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 4
|
||||||
|
},
|
||||||
|
"id": 6,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom"
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.95, sum by (le, http_route) (rate(http_server_request_duration_seconds_bucket{service_name=\"socialize-api\"}[5m])))",
|
||||||
|
"legendFormat": "{{http_route}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "p95 Latency By Endpoint",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "Prometheus"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 12
|
||||||
|
},
|
||||||
|
"id": 7,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom"
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(increase(socialize_login_attempts_total[24h])) by (outcome)",
|
||||||
|
"legendFormat": "login {{outcome}}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(increase(socialize_organizations_created_total[24h]))",
|
||||||
|
"legendFormat": "organizations"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(increase(socialize_workspaces_created_total[24h]))",
|
||||||
|
"legendFormat": "workspaces"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(increase(socialize_content_items_created_total[24h]))",
|
||||||
|
"legendFormat": "content"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(increase(socialize_comments_created_total[24h]))",
|
||||||
|
"legendFormat": "comments"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(increase(socialize_approval_decisions_submitted_total[24h]))",
|
||||||
|
"legendFormat": "approvals"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(increase(socialize_feedback_submitted_total[24h]))",
|
||||||
|
"legendFormat": "feedback"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Usage Signals, 24h Rolling",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "Prometheus"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 12
|
||||||
|
},
|
||||||
|
"id": 8,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom"
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(increase(socialize_email_delivery_total[1h])) by (outcome, provider)",
|
||||||
|
"legendFormat": "email {{provider}} {{outcome}}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(increase(socialize_blob_storage_operations_total[1h])) by (operation, outcome)",
|
||||||
|
"legendFormat": "blob {{operation}} {{outcome}}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(increase(socialize_background_job_runs_total[1h])) by (job, outcome)",
|
||||||
|
"legendFormat": "job {{job}} {{outcome}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Operational Events, 1h Rolling",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "Prometheus"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 7,
|
||||||
|
"w": 24,
|
||||||
|
"x": 0,
|
||||||
|
"y": 20
|
||||||
|
},
|
||||||
|
"id": 9,
|
||||||
|
"options": {
|
||||||
|
"showHeader": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "ALERTS{alertstate=\"firing\"}",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true,
|
||||||
|
"legendFormat": "{{alertname}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Firing Alerts",
|
||||||
|
"type": "table"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "loki",
|
||||||
|
"uid": "Loki"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 9,
|
||||||
|
"w": 24,
|
||||||
|
"x": 0,
|
||||||
|
"y": 27
|
||||||
|
},
|
||||||
|
"id": 10,
|
||||||
|
"options": {
|
||||||
|
"dedupStrategy": "none",
|
||||||
|
"enableLogDetails": true,
|
||||||
|
"prettifyLogMessage": false,
|
||||||
|
"showCommonLabels": false,
|
||||||
|
"showLabels": false,
|
||||||
|
"showTime": true,
|
||||||
|
"sortOrder": "Descending",
|
||||||
|
"wrapLogMessage": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "{platform=\"docker\", compose_service=\"api\"}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "API Logs",
|
||||||
|
"type": "logs"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "30s",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"tags": [
|
||||||
|
"socialize",
|
||||||
|
"preprod"
|
||||||
|
],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-6h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "",
|
||||||
|
"title": "Socialize Overview",
|
||||||
|
"uid": "socialize-overview",
|
||||||
|
"version": 2,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: Socialize
|
||||||
|
orgId: 1
|
||||||
|
folder: Socialize
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
updateIntervalSeconds: 30
|
||||||
|
options:
|
||||||
|
path: /var/lib/grafana/dashboards
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
- name: Prometheus
|
||||||
|
uid: Prometheus
|
||||||
|
type: prometheus
|
||||||
|
access: proxy
|
||||||
|
url: http://prometheus:9090
|
||||||
|
isDefault: true
|
||||||
|
|
||||||
|
- name: Loki
|
||||||
|
uid: Loki
|
||||||
|
type: loki
|
||||||
|
access: proxy
|
||||||
|
url: http://loki:3100
|
||||||
|
|
||||||
|
- name: Tempo
|
||||||
|
uid: Tempo
|
||||||
|
type: tempo
|
||||||
|
access: proxy
|
||||||
|
url: http://tempo:3200
|
||||||
|
jsonData:
|
||||||
|
tracesToLogsV2:
|
||||||
|
datasourceUid: Loki
|
||||||
|
serviceMap:
|
||||||
|
datasourceUid: Prometheus
|
||||||
32
deploy/observability/loki/local-config.yml
Normal file
32
deploy/observability/loki/local-config.yml
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
auth_enabled: false
|
||||||
|
|
||||||
|
server:
|
||||||
|
http_listen_port: 3100
|
||||||
|
|
||||||
|
common:
|
||||||
|
path_prefix: /loki
|
||||||
|
replication_factor: 1
|
||||||
|
ring:
|
||||||
|
kvstore:
|
||||||
|
store: inmemory
|
||||||
|
|
||||||
|
schema_config:
|
||||||
|
configs:
|
||||||
|
- from: 2024-01-01
|
||||||
|
store: tsdb
|
||||||
|
object_store: filesystem
|
||||||
|
schema: v13
|
||||||
|
index:
|
||||||
|
prefix: index_
|
||||||
|
period: 24h
|
||||||
|
|
||||||
|
storage_config:
|
||||||
|
filesystem:
|
||||||
|
directory: /loki/chunks
|
||||||
|
|
||||||
|
limits_config:
|
||||||
|
allow_structured_metadata: true
|
||||||
|
volume_enabled: true
|
||||||
|
|
||||||
|
analytics:
|
||||||
|
reporting_enabled: false
|
||||||
17
deploy/observability/prometheus/prometheus.yml
Normal file
17
deploy/observability/prometheus/prometheus.yml
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
|
||||||
|
rule_files:
|
||||||
|
- /etc/prometheus/rules/*.yml
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: prometheus
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- prometheus:9090
|
||||||
|
|
||||||
|
- job_name: alloy
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- alloy:12345
|
||||||
97
deploy/observability/prometheus/rules/socialize-alerts.yml
Normal file
97
deploy/observability/prometheus/rules/socialize-alerts.yml
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
groups:
|
||||||
|
- name: socialize-preprod
|
||||||
|
rules:
|
||||||
|
- alert: SocializeApiTelemetryMissing
|
||||||
|
expr: absent(http_server_request_duration_seconds_count{service_name="socialize-api"})
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
service: socialize-api
|
||||||
|
annotations:
|
||||||
|
summary: Socialize API telemetry is missing
|
||||||
|
description: No API request telemetry has been received for 5 minutes. The API or telemetry pipeline may be down.
|
||||||
|
|
||||||
|
- alert: SocializeApiHighErrorRate
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
sum(rate(http_server_request_duration_seconds_count{service_name="socialize-api", http_response_status_code=~"5.."}[5m]))
|
||||||
|
/
|
||||||
|
clamp_min(sum(rate(http_server_request_duration_seconds_count{service_name="socialize-api"}[5m])), 0.001)
|
||||||
|
) > 0.05
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
service: socialize-api
|
||||||
|
annotations:
|
||||||
|
summary: Socialize API 5xx rate is high
|
||||||
|
description: More than 5% of API requests are returning 5xx responses over 5 minutes.
|
||||||
|
|
||||||
|
- alert: SocializeApiHighLatency
|
||||||
|
expr: |
|
||||||
|
histogram_quantile(
|
||||||
|
0.95,
|
||||||
|
sum by (le) (rate(http_server_request_duration_seconds_bucket{service_name="socialize-api"}[5m]))
|
||||||
|
) > 2
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: socialize-api
|
||||||
|
annotations:
|
||||||
|
summary: Socialize API p95 latency is high
|
||||||
|
description: API p95 latency has been above 2 seconds for 10 minutes.
|
||||||
|
|
||||||
|
- alert: SocializeCoreUsageQuiet
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
sum(increase(socialize_content_items_created_total[12h]))
|
||||||
|
+ sum(increase(socialize_comments_created_total[12h]))
|
||||||
|
+ sum(increase(socialize_approval_decisions_submitted_total[12h]))
|
||||||
|
+ sum(increase(socialize_feedback_submitted_total[12h]))
|
||||||
|
) < 1
|
||||||
|
for: 30m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: socialize-api
|
||||||
|
annotations:
|
||||||
|
summary: Socialize core usage is quiet
|
||||||
|
description: No content, comment, approval, or feedback activity has been observed over the last 12 hours.
|
||||||
|
|
||||||
|
- alert: SocializeFeedbackBugSubmitted
|
||||||
|
expr: sum(increase(socialize_feedback_submitted_total{feedback_type="Bug"}[15m])) > 0
|
||||||
|
for: 0m
|
||||||
|
labels:
|
||||||
|
severity: info
|
||||||
|
service: socialize-api
|
||||||
|
annotations:
|
||||||
|
summary: New bug feedback submitted
|
||||||
|
description: A user submitted bug feedback in the last 15 minutes.
|
||||||
|
|
||||||
|
- alert: SocializeEmailDeliveryFailures
|
||||||
|
expr: sum(increase(socialize_email_delivery_total{outcome="failure"}[15m])) > 0
|
||||||
|
for: 0m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: socialize-api
|
||||||
|
annotations:
|
||||||
|
summary: Email delivery failures detected
|
||||||
|
description: One or more email delivery attempts failed in the last 15 minutes.
|
||||||
|
|
||||||
|
- alert: SocializeBlobStorageFailures
|
||||||
|
expr: sum(increase(socialize_blob_storage_operations_total{outcome="failure"}[15m])) > 0
|
||||||
|
for: 0m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: socialize-api
|
||||||
|
annotations:
|
||||||
|
summary: Blob storage failures detected
|
||||||
|
description: One or more blob storage operations failed in the last 15 minutes.
|
||||||
|
|
||||||
|
- alert: SocializeBackgroundJobFailures
|
||||||
|
expr: sum(increase(socialize_background_job_runs_total{outcome="failure"}[30m])) > 0
|
||||||
|
for: 0m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: socialize-api
|
||||||
|
annotations:
|
||||||
|
summary: Background job failures detected
|
||||||
|
description: One or more background jobs failed in the last 30 minutes.
|
||||||
25
deploy/observability/tempo/tempo.yml
Normal file
25
deploy/observability/tempo/tempo.yml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
server:
|
||||||
|
http_listen_port: 3200
|
||||||
|
|
||||||
|
distributor:
|
||||||
|
receivers:
|
||||||
|
otlp:
|
||||||
|
protocols:
|
||||||
|
grpc:
|
||||||
|
endpoint: 0.0.0.0:4317
|
||||||
|
http:
|
||||||
|
endpoint: 0.0.0.0:4318
|
||||||
|
|
||||||
|
storage:
|
||||||
|
trace:
|
||||||
|
backend: local
|
||||||
|
local:
|
||||||
|
path: /var/tempo/traces
|
||||||
|
|
||||||
|
compactor:
|
||||||
|
compaction:
|
||||||
|
block_retention: 168h
|
||||||
|
|
||||||
|
metrics_generator:
|
||||||
|
storage:
|
||||||
|
path: /var/tempo/generator/wal
|
||||||
80
docs/FEATURES/observability.md
Normal file
80
docs/FEATURES/observability.md
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
# Observability
|
||||||
|
|
||||||
|
## Status
|
||||||
|
|
||||||
|
Draft
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Give the SaaS operator preproduction visibility into whether Socialize is healthy and whether real users are exercising core workflows.
|
||||||
|
|
||||||
|
This feature is operator-facing. It is not a client-facing analytics suite or status page.
|
||||||
|
|
||||||
|
## Initial Scope
|
||||||
|
|
||||||
|
- structured backend logs suitable for centralized log search
|
||||||
|
- OpenTelemetry traces and metrics emitted by the API
|
||||||
|
- self-hosted Grafana observability stack for preproduction
|
||||||
|
- health, readiness, and liveness endpoints
|
||||||
|
- aggregate product usage counters for core workflow actions
|
||||||
|
- dashboards and alerts for app health and adoption signals
|
||||||
|
|
||||||
|
## Operational Signals
|
||||||
|
|
||||||
|
Health signals should cover:
|
||||||
|
|
||||||
|
- API availability
|
||||||
|
- Postgres connectivity
|
||||||
|
- request rate, latency, and error rate
|
||||||
|
- slow endpoints
|
||||||
|
- outbound HTTP failures
|
||||||
|
- background service failures
|
||||||
|
- email delivery failures
|
||||||
|
- blob storage failures
|
||||||
|
- authentication failures
|
||||||
|
|
||||||
|
Usage signals should cover aggregate counts for:
|
||||||
|
|
||||||
|
- login attempts and successful logins
|
||||||
|
- organizations and workspaces created
|
||||||
|
- content items created
|
||||||
|
- comments created
|
||||||
|
- approval decisions submitted
|
||||||
|
- feedback reports submitted
|
||||||
|
- workspace invites created
|
||||||
|
|
||||||
|
## Privacy And Safety Rules
|
||||||
|
|
||||||
|
- Do not log request bodies, access tokens, refresh tokens, passwords, uploaded file contents, screenshots, or raw customer content.
|
||||||
|
- Usage metrics are aggregate operational signals, not behavioral tracking.
|
||||||
|
- User, organization, and workspace identifiers may be included as structured attributes when already available to backend code.
|
||||||
|
- The first implementation targets preproduction and self-hosted Docker infrastructure only.
|
||||||
|
|
||||||
|
## Deployment Shape
|
||||||
|
|
||||||
|
The application emits OpenTelemetry over OTLP to a local collector.
|
||||||
|
|
||||||
|
The preproduction observability stack runs as an optional Docker Compose overlay with:
|
||||||
|
|
||||||
|
- Grafana for dashboards and alerting
|
||||||
|
- Prometheus for metrics
|
||||||
|
- Loki for logs
|
||||||
|
- Tempo for traces
|
||||||
|
- Grafana Alloy for log collection and telemetry routing
|
||||||
|
|
||||||
|
The normal application compose file must remain usable without the observability overlay.
|
||||||
|
|
||||||
|
## Alerting
|
||||||
|
|
||||||
|
Preproduction alerting should start with local Prometheus alert rules. Notification routing is a separate operational setup step because the first preproduction target may use email, chat, or a private incident channel.
|
||||||
|
|
||||||
|
Initial alerts should cover:
|
||||||
|
|
||||||
|
- app telemetry missing
|
||||||
|
- high API error rate
|
||||||
|
- high API p95 latency
|
||||||
|
- core usage unexpectedly quiet
|
||||||
|
- feedback bug reports submitted
|
||||||
|
- email delivery failures
|
||||||
|
- blob storage failures
|
||||||
|
- background job failures
|
||||||
44
docs/TASKS/observability/001-observability-foundation.md
Normal file
44
docs/TASKS/observability/001-observability-foundation.md
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
# Observability 001: Preprod Foundation
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Add the first preproduction observability foundation for Socialize so the operator can tell whether the app is healthy and whether core workflows are being used.
|
||||||
|
|
||||||
|
## Feature Spec
|
||||||
|
|
||||||
|
- `docs/FEATURES/observability.md`
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
- Add backend OpenTelemetry registration for traces and metrics.
|
||||||
|
- Add structured JSON console logging with request correlation context.
|
||||||
|
- Add aggregate custom counters for core usage events.
|
||||||
|
- Expand health endpoints with liveness and readiness checks.
|
||||||
|
- Add an optional Docker Compose observability overlay for Grafana, Prometheus, Loki, Tempo, and Alloy.
|
||||||
|
- Add basic Grafana datasource/dashboard provisioning.
|
||||||
|
|
||||||
|
## Likely Files
|
||||||
|
|
||||||
|
- `backend/src/Socialize.Api/Program.cs`
|
||||||
|
- `backend/src/Socialize.Api/ApplicationRegistration.cs`
|
||||||
|
- `backend/src/Socialize.Api/Infrastructure/Observability/*`
|
||||||
|
- selected backend handlers for usage counters
|
||||||
|
- `backend/src/Socialize.Api/Socialize.Api.csproj`
|
||||||
|
- `deploy/observability/*`
|
||||||
|
- `README.md`
|
||||||
|
|
||||||
|
## Out Of Scope
|
||||||
|
|
||||||
|
- Client-facing analytics or status page.
|
||||||
|
- Frontend behavioral analytics.
|
||||||
|
- Cloud telemetry providers.
|
||||||
|
- Long-term telemetry retention policy.
|
||||||
|
- Full product analytics warehouse.
|
||||||
|
|
||||||
|
## Validation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
dotnet build backend/Socialize.slnx
|
||||||
|
dotnet test backend/Socialize.slnx
|
||||||
|
docker compose -f deploy/compose.yml -f deploy/observability/compose.observability.yml config
|
||||||
|
```
|
||||||
32
docs/TASKS/observability/002-alerts-dashboard-hardening.md
Normal file
32
docs/TASKS/observability/002-alerts-dashboard-hardening.md
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# Observability 002: Alerts And Dashboard Hardening
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Make the preproduction observability stack actionable by adding alert rules, better operator dashboards, pinned image versions, and operational counters for services that commonly fail silently.
|
||||||
|
|
||||||
|
## Feature Spec
|
||||||
|
|
||||||
|
- `docs/FEATURES/observability.md`
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
- Pin Grafana, Prometheus, Loki, Tempo, and Alloy image tags in the observability compose overlay.
|
||||||
|
- Add Prometheus alert rules for API health, error rate, latency, usage silence, feedback bugs, email failures, blob failures, and background job failures.
|
||||||
|
- Expand the Grafana dashboard with health, usage, operational failure, alert, log, and trace-oriented panels.
|
||||||
|
- Add backend counters for email delivery, blob storage operations, and background job runs.
|
||||||
|
- Document alerting and safe Grafana exposure expectations.
|
||||||
|
|
||||||
|
## Out Of Scope
|
||||||
|
|
||||||
|
- Notification delivery integration for alerts.
|
||||||
|
- Client-facing status page.
|
||||||
|
- Cloud observability backends.
|
||||||
|
- Full product analytics or session tracking.
|
||||||
|
|
||||||
|
## Validation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
dotnet build backend/Socialize.slnx
|
||||||
|
dotnet test backend/Socialize.slnx
|
||||||
|
docker compose -f deploy/compose.yml -f deploy/observability/compose.observability.yml config
|
||||||
|
```
|
||||||
Reference in New Issue
Block a user