infra/nix/monitoring/configuration.nix

398 lines
11 KiB
Nix

{ config, pkgs, ... }:
{
imports = [
./hardware.nix
./networking.nix # generated at runtime by nixos-infect
];
age.secrets.healthchecks-secret = {
file = ../../secrets/monitoring-healthchecks-secret.age;
owner = "healthchecks";
};
age.secrets.healthchecks-smtp = {
file = ../../secrets/fastmail-smtp.age;
owner = "healthchecks";
};
age.secrets.healthchecks-telegram = {
file = ../../secrets/healthchecks-telegram.age;
owner = "healthchecks";
};
age.secrets.prometheus-webconfig-secret = {
file = ../../secrets/monitoring-prometheus-webconfig.age;
owner = "prometheus";
mode = "775";
};
age.secrets.prometheus-password-secret = {
file = ../../secrets/monitoring-prometheus-password.age;
owner = "prometheus";
};
age.secrets.grafana-client-secret = {
file = ../../secrets/monitoring-grafana-client-secret.age;
owner = "grafana";
};
boot.tmp.cleanOnBoot = true;
zramSwap.enable = true;
networking.hostName = "monitoring";
networking.domain = "";
services.openssh.enable = true;
users.users.root.openssh.authorizedKeys.keys = [
''ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBDjEgtIWPA5Ncs/KOcMeT6Q/HACJJetDOLjMvXXwUE+08oTX1EpHrWPpy8J+UHKIyErCNPYq8dgtrbhnMRlxHqI=''
];
networking.firewall.enable = false;
services.grafana = {
enable = true;
settings = {
feature_toggles = {
publicDashboards = true;
};
log = {
filters = "oauth.generic_oauth:debug";
};
server = {
domain = "grafana.gmem.ca";
http_port = 2342;
http_addr = "127.0.0.1";
root_url = "https://grafana.gmem.ca";
};
auth = {
signout_redirect_url = "https://authentik.gmem.ca/application/o/grafana/end-session/";
oauth_auto_login = true;
};
"auth.generic_oauth" = {
name = "authentik";
client_id = "VbOQzwuf0UK9AUGrWvaVaWWHvX2fJsZChxJNGt61";
client_secret = "$__file{${config.age.secrets.grafana-client-secret.path}}";
auth_url = "https://authentik.gmem.ca/application/o/authorize/";
api_url = "https://authentik.gmem.ca/application/o/userinfo/";
token_url = "https://authentik.gmem.ca/application/o/token/";
enabled = true;
scopes = "openid email grafana-user";
role_attribute_path = "contains(info.groups[*], 'Grafana Admins') && 'Admin' || contains(info.groups[*], 'Grafana Editors') && 'Editor' || 'Viewer'";
role_attribute_strict = true;
};
};
};
services.loki = {
enable = true;
configuration = {
server.http_listen_port = 3030;
auth_enabled = false;
ingester = {
lifecycler = {
address = "127.0.0.1";
ring = {
kvstore = {
store = "inmemory";
};
replication_factor = 1;
};
};
chunk_idle_period = "1h";
max_chunk_age = "1h";
chunk_target_size = 999999;
chunk_retain_period = "30s";
max_transfer_retries = 0;
};
schema_config = {
configs = [{
from = "2022-06-06";
store = "boltdb-shipper";
object_store = "filesystem";
schema = "v11";
index = {
prefix = "index_";
period = "24h";
};
}];
};
storage_config = {
boltdb_shipper = {
active_index_directory = "/var/lib/loki/boltdb-shipper-active";
cache_location = "/var/lib/loki/boltdb-shipper-cache";
cache_ttl = "24h";
shared_store = "filesystem";
};
filesystem = {
directory = "/var/lib/loki/chunks";
};
};
limits_config = {
reject_old_samples = true;
reject_old_samples_max_age = "168h";
};
chunk_store_config = {
max_look_back_period = "0s";
};
table_manager = {
retention_deletes_enabled = false;
retention_period = "0s";
};
compactor = {
working_directory = "/var/lib/loki";
shared_store = "filesystem";
compactor_ring = {
kvstore = {
store = "inmemory";
};
};
};
};
};
services.promtail = {
enable = true;
configuration = {
server = {
http_listen_port = 3031;
grpc_listen_port = 0;
};
positions = {
filename = "/tmp/positions.yaml";
};
clients = [{
url = "http://127.0.0.1:${toString config.services.loki.configuration.server.http_listen_port}/loki/api/v1/push";
}];
scrape_configs = [{
job_name = "journal";
journal = {
max_age = "12h";
labels = {
job = "systemd-journal";
host = "monitoring";
};
};
relabel_configs = [{
source_labels = [ "__journal__systemd_unit" ];
target_label = "unit";
}];
}];
};
};
services.alertmanager-ntfy = {
enable = true;
settings = {
http = {
addr = "127.0.0.1:8111";
};
ntfy = {
baseurl = "https://ntfy.gmem.ca";
notification = {
topic = "alerts";
priority = ''
status == "firing" ? "high" : "default"
'';
templates = {
title = ''{{ if eq .Status "resolved" }}Resolved: {{ end }}{{ index .Annotations "summary" }}'';
description = ''{{ index .Annotations "description" }}'';
click = ''http://grafana.gmem.ca/d/{{ index .Annotations "dashboard" }}'';
};
};
};
};
};
services.prometheus = {
enable = true;
webConfigFile = config.age.secrets.prometheus-webconfig-secret.path;
globalConfig = {
scrape_interval = "15s";
};
alertmanagers = [ {
basic_auth = {
username = "homelab";
password_file = config.age.secrets.prometheus-password-secret.path;
};
static_configs = [ {
targets = [
"localhost:9093"
];
} ];
} ];
rules = [(builtins.toJSON {
groups = [
{
name = "internet_connection";
rules = [
{
alert = "LowInternetDownload";
expr = ''speedtest_download_bits_per_second{} < 500000000'';
for = "2h";
labels.severity = "page";
annotations = {
summary = "Internet speed low";
description = "Home internet speed dropped below 500Mb/s.";
dashboard = "o9mIe_Aik";
};
}
];
}
{
name = "healthchecks";
rules = [
{
alert = "HealthcheckFailedCheckin";
expr = ''hc_check_up < 1'';
for = "5m";
labels.severity = "page";
annotations = {
summary = "{{ $labels.name }} healthcheck failed";
description = "The {{ $labels.name }} healthcheck failed to check in.";
dashboard = "f594ea85-45f2-4019-b988-2d17638b5cf3";
};
}
];
}];
})];
alertmanager = {
enable = true;
extraFlags = [ "--web.config.file=${config.age.secrets.prometheus-webconfig-secret.path}" ];
webExternalUrl = "https://alerts.gmem.ca";
configText = ''
global: {}
# The directory from which notification templates are read.
templates:
- '/etc/alertmanager/template/*.tmpl'
# The root route on which each incoming alert enters.
route:
group_by: ['alertname', 'cluster', 'service']
group_wait: 0s
group_interval: 5m
repeat_interval: 3h
# A default receiver
receiver: ntfy
receivers:
- name: ntfy
webhook_configs:
- url: http://localhost:8111/hook
'';
};
port = 9001;
extraFlags = [ "--web.enable-remote-write-receiver" ];
scrapeConfigs = [
{
job_name = "proxmox";
metrics_path = "/pve";
params = { "target" = [ "localhost" ]; };
static_configs = [ { targets = [ "proxmox:9221" ]; } ];
}
{
job_name = "personal_hardware";
static_configs = [ { targets = [ "london:9100" "vancouver:9100" "localhost:9100" ]; } ];
}
{
job_name = "speedtest-exporter";
scrape_interval = "1h";
scrape_timeout = "1m";
static_configs = [ { targets = [ "vancouver:9798" ]; } ];
}
{
job_name = "syncthing";
static_configs = [ { targets = [ "vancouver:8384" "london:8384" ]; } ];
}
{
job_name = "forgejo";
static_configs = [ { targets = [ "git.gmem.ca" ]; } ];
}
{
job_name = "coredns";
static_configs = [ { targets = [ "vancouver:9253" ]; } ];
}
{
job_name = "healthchecks";
scrape_interval = "60s";
metrics_path = "/projects/5f1de50f-a52d-4215-961f-aae7cc6cf6c9/metrics/TbMoU7SUdknzMe-H5Q4HzmKl3itOIrJk";
static_configs = [ { targets = [ "localhost:8000" ]; } ];
}
{
job_name = "vrc_events";
scrape_interval = "60s";
static_configs = [ { targets = [ "vancouver:6534" ]; } ];
}
{
job_name = "blackbox";
metrics_path = "/probe";
params = { "modules" = [ "http_2xx" ]; };
static_configs = [ { targets = [ "google.com" "gabrielsimmer.com" "artbybecki.com" ]; } ];
relabel_configs = [
{ source_labels = ["__address__"]; target_label = "__param_target"; }
{ source_labels = ["__param_target"]; target_label = "instance"; }
{ source_labels = []; target_label = "__address__"; replacement = "vancouver:9115"; } ];
}
];
exporters.node = {
enable = true;
listenAddress = "127.0.0.1";
enabledCollectors = [
"systemd" "processes"
];
};
};
services.tailscale.enable = true;
services.healthchecks = {
enable = true;
# package = healthchecks-edge;
settings = {
SECRET_KEY_FILE = config.age.secrets.healthchecks-secret.path;
SITE_ROOT = "https://healthchecks.gmem.ca";
SITE_NAME = "Archs Healthchecks";
EMAIL_HOST = "smtp.fastmail.com";
EMAIL_HOST_PASSWORD_FILE = config.age.secrets.healthchecks-smtp.path;
EMAIL_HOST_USER = "g@gmem.ca";
DEFAULT_FROM_EMAIL = "healthchecks@gmem.ca";
TELEGRAM_BOT_NAME = "arch_healthchecks_bot";
TELEGRAM_TOKEN_FILE = config.age.secrets.healthchecks-telegram.path;
};
};
# nginx reverse proxy
services.nginx = {
enable = true;
recommendedGzipSettings = true;
recommendedBrotliSettings = true;
recommendedZstdSettings = true;
recommendedOptimisation = true;
recommendedTlsSettings = true;
recommendedProxySettings = true;
virtualHosts.${config.services.grafana.settings.server.domain} = {
default = true;
enableACME = true;
forceSSL = true;
locations."/" = {
proxyPass = "http://127.0.0.1:${toString config.services.grafana.settings.server.http_port}";
proxyWebsockets = true;
};
};
virtualHosts."healthchecks.gmem.ca" = {
enableACME = true;
forceSSL = true;
locations."/" = {
proxyPass = "http://127.0.0.1:8000";
proxyWebsockets = true;
};
};
};
security.acme.acceptTerms = true;
security.acme.defaults.email = "acme@gmem.ca";
system.stateVersion = "23.11";
}