infra/nix/monitoring/configuration.nix

507 lines
14 KiB
Nix

{
config,
pkgs,
...
}: {
imports = [
./hardware.nix
./networking.nix # generated at runtime by nixos-infect
];
age.secrets.healthchecks-secret = {
file = ../../secrets/monitoring-healthchecks-secret.age;
owner = "healthchecks";
};
age.secrets.healthchecks-smtp = {
file = ../../secrets/fastmail-smtp.age;
owner = "healthchecks";
};
age.secrets.healthchecks-telegram = {
file = ../../secrets/healthchecks-telegram.age;
owner = "healthchecks";
};
age.secrets.prometheus-webconfig-secret = {
file = ../../secrets/monitoring-prometheus-webconfig.age;
owner = "prometheus";
mode = "775";
};
age.secrets.prometheus-password-secret = {
file = ../../secrets/monitoring-prometheus-password.age;
owner = "prometheus";
};
age.secrets.grafana-client-secret = {
file = ../../secrets/monitoring-grafana-client-secret.age;
owner = "grafana";
};
nix.settings.auto-optimise-store = true;
boot.tmp.cleanOnBoot = true;
zramSwap.enable = true;
networking.hostName = "monitoring";
networking.domain = "";
services.openssh.enable = true;
users.users.root.openssh.authorizedKeys.keys = [
''ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBDjEgtIWPA5Ncs/KOcMeT6Q/HACJJetDOLjMvXXwUE+08oTX1EpHrWPpy8J+UHKIyErCNPYq8dgtrbhnMRlxHqI=''
];
networking.firewall.enable = false;
services.grafana = {
enable = true;
settings = {
feature_toggles = {
publicDashboards = true;
};
log = {
filters = "oauth.generic_oauth:debug";
};
server = {
domain = "grafana.gmem.ca";
http_port = 2342;
http_addr = "127.0.0.1";
root_url = "https://grafana.gmem.ca";
};
auth = {
signout_redirect_url = "https://authentik.gmem.ca/application/o/grafana/end-session/";
oauth_auto_login = true;
};
"auth.generic_oauth" = {
name = "authentik";
client_id = "VbOQzwuf0UK9AUGrWvaVaWWHvX2fJsZChxJNGt61";
client_secret = "$__file{${config.age.secrets.grafana-client-secret.path}}";
auth_url = "https://authentik.gmem.ca/application/o/authorize/";
api_url = "https://authentik.gmem.ca/application/o/userinfo/";
token_url = "https://authentik.gmem.ca/application/o/token/";
enabled = true;
scopes = "openid email grafana-user";
role_attribute_path = "contains(info.groups[*], 'Grafana Admins') && 'Admin' || contains(info.groups[*], 'Grafana Editors') && 'Editor' || 'Viewer'";
role_attribute_strict = true;
};
};
};
services.loki = {
enable = true;
configuration = {
server.http_listen_port = 3030;
auth_enabled = false;
ingester = {
lifecycler = {
address = "127.0.0.1";
ring = {
kvstore = {
store = "inmemory";
};
replication_factor = 1;
};
};
chunk_idle_period = "1h";
max_chunk_age = "1h";
chunk_target_size = 999999;
chunk_retain_period = "30s";
};
schema_config = {
configs = [
{
from = "2022-06-06";
store = "boltdb-shipper";
object_store = "filesystem";
schema = "v11";
index = {
prefix = "index_";
period = "24h";
};
}
{
from = "2024-05-01";
store = "tsdb";
object_store = "filesystem";
schema = "v13";
index = {
prefix = "index_";
period = "24h";
};
}
];
};
storage_config = {
boltdb_shipper = {
active_index_directory = "/var/lib/loki/boltdb-shipper-active";
cache_location = "/var/lib/loki/boltdb-shipper-cache";
cache_ttl = "24h";
};
tsdb_shipper = {
active_index_directory = "/var/lib/loki/tsdb-shipper-active";
cache_location = "/var/lib/loki/tsdb-shipper-cache";
cache_ttl = "24h";
};
filesystem = {
directory = "/var/lib/loki/chunks";
};
};
limits_config = {
reject_old_samples = true;
reject_old_samples_max_age = "168h";
};
table_manager = {
retention_deletes_enabled = false;
retention_period = "0s";
};
compactor = {
working_directory = "/var/lib/loki";
compactor_ring = {
kvstore = {
store = "inmemory";
};
};
};
};
};
services.promtail = {
enable = true;
configuration = {
server = {
http_listen_port = 3031;
grpc_listen_port = 0;
};
positions = {
filename = "/tmp/positions.yaml";
};
clients = [
{
url = "http://127.0.0.1:${toString config.services.loki.configuration.server.http_listen_port}/loki/api/v1/push";
}
];
scrape_configs = [
{
job_name = "journal";
journal = {
max_age = "12h";
labels = {
job = "systemd-journal";
host = "monitoring";
};
};
relabel_configs = [
{
source_labels = ["__journal__systemd_unit"];
target_label = "unit";
}
];
}
];
};
};
services.alertmanager-ntfy = {
enable = true;
settings = {
http = {
addr = "127.0.0.1:8111";
};
ntfy = {
baseurl = "https://ntfy.gmem.ca";
notification = {
topic = "alerts";
priority = ''
status == "firing" ? "high" : "default"
'';
templates = {
title = ''{{ if eq .Status "resolved" }}Resolved: {{ end }}{{ index .Annotations "summary" }}'';
description = ''{{ index .Annotations "description" }}'';
click = ''http://grafana.gmem.ca/d/{{ index .Annotations "dashboard" }}'';
};
};
};
};
};
services.prometheus = {
enable = true;
webConfigFile = config.age.secrets.prometheus-webconfig-secret.path;
globalConfig = {
scrape_interval = "15s";
};
alertmanagers = [
{
basic_auth = {
username = "homelab";
password_file = config.age.secrets.prometheus-password-secret.path;
};
static_configs = [
{
targets = [
"localhost:9093"
];
}
];
}
];
rules = [
(builtins.toJSON {
groups = [
{
name = "internet_connection";
rules = [
{
alert = "LowInternetDownload";
expr = "speedtest_download_bits_per_second{} < 500000000";
for = "2h";
labels.severity = "page";
annotations = {
summary = "Internet speed low";
description = "Home internet speed dropped below 500Mb/s.";
dashboard = "o9mIe_Aik";
};
}
];
}
{
name = "healthchecks";
rules = [
{
alert = "HealthcheckFailedCheckin";
expr = ''hc_check_up < 1'';
for = "5m";
labels.severity = "page";
annotations = {
summary = "{{ $labels.name }} healthcheck failed";
description = "The {{ $labels.name }} healthcheck failed to check in.";
dashboard = "f594ea85-45f2-4019-b988-2d17638b5cf3";
};
}
];
}
];
})
];
alertmanager = {
enable = true;
extraFlags = ["--web.config.file=${config.age.secrets.prometheus-webconfig-secret.path}"];
webExternalUrl = "https://alerts.gmem.ca";
configText = ''
global: {}
# The directory from which notification templates are read.
templates:
- '/etc/alertmanager/template/*.tmpl'
# The root route on which each incoming alert enters.
route:
group_by: ['alertname', 'cluster', 'service']
group_wait: 0s
group_interval: 5m
repeat_interval: 3h
# A default receiver
receiver: ntfy
receivers:
- name: ntfy
webhook_configs:
- url: http://localhost:8111/hook
'';
};
port = 9001;
extraFlags = ["--web.enable-remote-write-receiver"];
scrapeConfigs = [
{
job_name = "proxmox";
metrics_path = "/pve";
params = {"target" = ["localhost"];};
static_configs = [{targets = ["proxmox:9221"];}];
}
{
job_name = "personal_hardware";
static_configs = [{targets = ["london:9100" "vancouver:9100" "localhost:9100"];}];
}
{
job_name = "speedtest-exporter";
scrape_interval = "1h";
scrape_timeout = "1m";
static_configs = [{targets = ["vancouver:9798"];}];
}
{
job_name = "syncthing";
static_configs = [{targets = ["vancouver:8384" "london:8384"];}];
}
{
job_name = "forgejo";
static_configs = [{targets = ["git.gmem.ca"];}];
}
{
job_name = "healthchecks";
scrape_interval = "60s";
metrics_path = "/projects/5f1de50f-a52d-4215-961f-aae7cc6cf6c9/metrics/TbMoU7SUdknzMe-H5Q4HzmKl3itOIrJk";
static_configs = [{targets = ["localhost:8000"];}];
}
{
job_name = "vrc_events";
scrape_interval = "60s";
static_configs = [{targets = ["vancouver:6534"];}];
}
{
job_name = "dnsmasq";
scrape_interval = "10s";
static_configs = [{targets = ["100.102.19.124:9153" "100.92.113.87:9153"];}];
}
{
job_name = "blackbox_home";
metrics_path = "/probe";
params = {"modules" = ["http_2xx"];};
static_configs = [
{
labels = {
location = "home";
};
targets = ["floofy.tech" "1.1.1.1" "waterwolf.club"];
}
];
relabel_configs = [
{
source_labels = ["__address__"];
target_label = "__param_target";
}
{
source_labels = ["__param_target"];
target_label = "instance";
}
{
source_labels = [];
target_label = "__address__";
replacement = "vancouver:9115";
}
];
}
{
job_name = "blackbox_hetzner";
metrics_path = "/probe";
params = {"modules" = ["http_2xx"];};
static_configs = [
{
labels = {
location = "hetzner";
};
targets = ["floofy.tech" "1.1.1.1" "waterwolf.club"];
}
];
relabel_configs = [
{
source_labels = ["__address__"];
target_label = "__param_target";
}
{
source_labels = ["__param_target"];
target_label = "instance";
}
{
source_labels = [];
target_label = "__address__";
replacement = "127.0.0.1:9115";
}
];
}
{
job_name = "haproxy";
scrape_interval = "10s";
static_configs = [{targets = ["100.87.208.14:8404"];}];
}
];
exporters.node = {
enable = true;
listenAddress = "127.0.0.1";
enabledCollectors = [
"systemd"
"processes"
];
};
exporters.blackbox = {
enable = true;
configFile = "/var/lib/blackbox/config.yml";
};
};
services.tailscale.enable = true;
services.healthchecks = {
enable = true;
# package = healthchecks-edge;
settings = {
SECRET_KEY_FILE = config.age.secrets.healthchecks-secret.path;
SITE_ROOT = "https://healthchecks.gmem.ca";
SITE_NAME = "Archs Healthchecks";
EMAIL_HOST = "smtp.fastmail.com";
EMAIL_HOST_PASSWORD_FILE = config.age.secrets.healthchecks-smtp.path;
EMAIL_HOST_USER = "g@gmem.ca";
DEFAULT_FROM_EMAIL = "healthchecks@gmem.ca";
TELEGRAM_BOT_NAME = "arch_healthchecks_bot";
TELEGRAM_TOKEN_FILE = config.age.secrets.healthchecks-telegram.path;
};
};
services.uptime-kuma = {
enable = true;
settings = {
PORT = "4000";
};
};
# nginx reverse proxy
services.nginx = {
enable = true;
recommendedGzipSettings = true;
recommendedBrotliSettings = true;
recommendedZstdSettings = true;
recommendedOptimisation = true;
recommendedTlsSettings = true;
recommendedProxySettings = true;
virtualHosts.${config.services.grafana.settings.server.domain} = {
default = true;
enableACME = true;
forceSSL = true;
locations."/" = {
proxyPass = "http://127.0.0.1:${toString config.services.grafana.settings.server.http_port}";
proxyWebsockets = true;
};
};
virtualHosts."healthchecks.gmem.ca" = {
enableACME = true;
forceSSL = true;
locations."/" = {
proxyPass = "http://127.0.0.1:8000";
proxyWebsockets = true;
};
};
virtualHosts."uptime.gmem.ca" = {
enableACME = true;
forceSSL = true;
locations."/" = {
proxyPass = "http://127.0.0.1:${toString config.services.uptime-kuma.settings.PORT}";
proxyWebsockets = true;
};
};
virtualHosts."status.floofy.tech" = {
enableACME = true;
forceSSL = true;
locations."/" = {
proxyPass = "http://127.0.0.1:${toString config.services.uptime-kuma.settings.PORT}";
proxyWebsockets = true;
};
};
};
security.acme.acceptTerms = true;
security.acme.defaults.email = "acme@gmem.ca";
system.stateVersion = "23.11";
}