server: add metric for goroutine panics

This is useful to setup an alert when a panic occurs.
This commit is contained in:
Simon Ser 2023-06-02 11:02:23 +02:00
parent 64196d5b6e
commit 2351fc8b83

View file

@ -175,6 +175,7 @@ type Server struct {
downstreamInMessagesTotal prometheus.Counter downstreamInMessagesTotal prometheus.Counter
upstreamConnectErrorsTotal prometheus.Counter upstreamConnectErrorsTotal prometheus.Counter
workerPanicsTotal prometheus.Counter
} }
webPush *database.WebPushConfig webPush *database.WebPushConfig
@ -282,6 +283,11 @@ func (s *Server) registerMetrics() {
Name: "soju_upstream_connect_errors_total", Name: "soju_upstream_connect_errors_total",
Help: "Total number of upstream connection errors", Help: "Total number of upstream connection errors",
}) })
s.metrics.workerPanicsTotal = factory.NewCounter(prometheus.CounterOpts{
Name: "soju_worker_panics_total",
Help: "Total number of panics in worker goroutines",
})
} }
func (s *Server) loadWebPushConfig(ctx context.Context) error { func (s *Server) loadWebPushConfig(ctx context.Context) error {
@ -426,6 +432,7 @@ func (s *Server) addUserLocked(user *database.User) *user {
defer func() { defer func() {
if err := recover(); err != nil { if err := recover(); err != nil {
s.Logger.Printf("panic serving user %q: %v\n%v", user.Username, err, string(debug.Stack())) s.Logger.Printf("panic serving user %q: %v\n%v", user.Username, err, string(debug.Stack()))
s.metrics.workerPanicsTotal.Inc()
} }
s.lock.Lock() s.lock.Lock()