Skip to content

Commit

Permalink
Further prom metrics and alerts
Browse files Browse the repository at this point in the history
  • Loading branch information
sergiorua committed Jun 10, 2023
1 parent e169280 commit 958707a
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 70 deletions.
48 changes: 0 additions & 48 deletions controllers/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ limitations under the License.
*/
package controllers

import "github.com/prometheus/client_golang/prometheus"

const (
leaseIdLabel = "vals-operator.digitalis.io/lease-id"
leaseDurationLabel = "vals-operator.digitalis.io/lease-duration"
Expand All @@ -30,49 +28,3 @@ const (
managedByLabel = "app.kubernetes.io/managed-by"
k8sSecretPrefix = "ref+k8s://"
)

var (
SecretFailures = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "vals_operator_secret_failures",
Help: "Number of errors generating secrets",
},
)
DbSecretFailures = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "vals_operator_dbsecret_failures",
Help: "Number of errors generating DB secrets",
},
)
SecretError = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "vals_operator_secret_error",
Help: "Reports timestamp from when a secret last failed to be updated",
}, []string{"secret", "namespace"})
DbSecretError = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "vals_operator_dbsecret_error",
Help: "Reports timestamp from when a DB secret last failed to be updated",
}, []string{"secret", "namespace"})

SecretInfo = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "vals_operator_secret_info",
Help: "Tracks secret, timestamp is when it was last updated",
}, []string{"secret", "namespace"})
DbSecretInfo = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "vals_operator_dbsecret_info",
Help: "Tracks database secret, timestamp is when it was last updated",
}, []string{"secret", "namespace"})
DbSecretExpireTime = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "vals_operator_dbsecret_expire_time",
Help: "Reports if the when the secret expired last",
}, []string{"secret", "namespace"})
VaultError = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "vals_operator_vault_error",
Help: "Timestamp if Vault backend is used and fails",
}, []string{"addr"})
)
13 changes: 7 additions & 6 deletions controllers/dbsecret_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/predicate"

digitalisiov1beta1 "digitalis.io/vals-operator/apis/digitalis.io/v1beta1"
dmetrics "digitalis.io/vals-operator/metrics"
"digitalis.io/vals-operator/utils"
"digitalis.io/vals-operator/vault"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -199,16 +200,16 @@ func (r *DbSecretReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
creds, err := vault.GetDbCredentials(dbSecret.Spec.Vault.Role, dbSecret.Spec.Vault.Mount)
if err != nil {
r.Log.Error(err, "Failed to obtain credentials from Vault", "name", dbSecret.Name, "namespace", dbSecret.Namespace)
DbSecretFailures.Inc()
DbSecretError.WithLabelValues(dbSecret.Name, dbSecret.Namespace).SetToCurrentTime()
dmetrics.DbSecretFailures.Inc()
dmetrics.DbSecretError.WithLabelValues(dbSecret.Name, dbSecret.Namespace).SetToCurrentTime()
return ctrl.Result{}, err
}

err = r.upsertSecret(&dbSecret, creds, currentSecret)
if err != nil {
r.Log.Error(err, "Failed to create secret", "name", dbSecret.Name, "namespace", dbSecret.Namespace)
DbSecretFailures.Inc()
DbSecretError.WithLabelValues(dbSecret.Name, dbSecret.Namespace).SetToCurrentTime()
dmetrics.DbSecretFailures.Inc()
dmetrics.DbSecretError.WithLabelValues(dbSecret.Name, dbSecret.Namespace).SetToCurrentTime()
return ctrl.Result{}, nil
}

Expand Down Expand Up @@ -381,8 +382,8 @@ func (r *DbSecretReconciler) upsertSecret(sDef *digitalisiov1beta1.DbSecret, cre
if err != nil {
f = float64(time.Now().UnixNano())
}
DbSecretExpireTime.WithLabelValues(secret.Name, secret.Namespace).Set(f)
DbSecretInfo.WithLabelValues(secret.Name, secret.Namespace).SetToCurrentTime()
dmetrics.DbSecretExpireTime.WithLabelValues(secret.Name, secret.Namespace).Set(f)
dmetrics.DbSecretInfo.WithLabelValues(secret.Name, secret.Namespace).SetToCurrentTime()

if r.recordingEnabled(sDef) {
r.Recorder.Event(sDef, corev1.EventTypeNormal, "Updated", "Secret created or updated")
Expand Down
7 changes: 4 additions & 3 deletions controllers/valssecret_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import (
secretv1 "digitalis.io/vals-operator/apis/digitalis.io/v1"
valsDb "digitalis.io/vals-operator/db"
dbType "digitalis.io/vals-operator/db/types"
dmetrics "digitalis.io/vals-operator/metrics"
"digitalis.io/vals-operator/utils"
sprig "github.com/Masterminds/sprig/v3"
)
Expand Down Expand Up @@ -300,13 +301,13 @@ func (r *ValsSecretReconciler) upsertSecret(sDef *secretv1.ValsSecret, data map[
msg := fmt.Sprintf("Secret %s not saved %v", secret.Name, err)
r.Recorder.Event(sDef, corev1.EventTypeNormal, "Failed", msg)
}
SecretFailures.Inc()
SecretError.WithLabelValues(secret.Name, secret.Namespace).SetToCurrentTime()
dmetrics.SecretFailures.Inc()
dmetrics.SecretError.WithLabelValues(secret.Name, secret.Namespace).SetToCurrentTime()
return err
}

/* Prometheus */
SecretInfo.WithLabelValues(secret.Name, secret.Namespace).SetToCurrentTime()
dmetrics.SecretInfo.WithLabelValues(secret.Name, secret.Namespace).SetToCurrentTime()

if r.recordingEnabled(sDef) {
r.Recorder.Event(sDef, corev1.EventTypeNormal, "Updated", "Secret created or updated")
Expand Down
17 changes: 9 additions & 8 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
secretv1 "digitalis.io/vals-operator/apis/digitalis.io/v1"
digitalisiov1beta1 "digitalis.io/vals-operator/apis/digitalis.io/v1beta1"
"digitalis.io/vals-operator/controllers"
dmetrics "digitalis.io/vals-operator/metrics"
"digitalis.io/vals-operator/vault"
"sigs.k8s.io/controller-runtime/pkg/metrics"
//+kubebuilder:scaffold:imports
Expand All @@ -57,14 +58,14 @@ func init() {
utilruntime.Must(digitalisiov1beta1.AddToScheme(scheme))

metrics.Registry.MustRegister(
controllers.SecretFailures,
controllers.DbSecretFailures,
controllers.SecretError,
controllers.DbSecretError,
controllers.DbSecretExpireTime,
controllers.DbSecretInfo,
controllers.SecretInfo,
controllers.VaultError
dmetrics.SecretFailures,
dmetrics.DbSecretFailures,
dmetrics.SecretError,
dmetrics.DbSecretError,
dmetrics.DbSecretExpireTime,
dmetrics.DbSecretInfo,
dmetrics.SecretInfo,
dmetrics.VaultError,
)
//+kubebuilder:scaffold:scheme
}
Expand Down
64 changes: 64 additions & 0 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
Copyright 2023 Digitalis.IO.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics

import "github.com/prometheus/client_golang/prometheus"

var (
SecretFailures = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "vals_operator_secret_failures",
Help: "Number of errors generating secrets",
},
)
DbSecretFailures = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "vals_operator_dbsecret_failures",
Help: "Number of errors generating DB secrets",
},
)
SecretError = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "vals_operator_secret_error",
Help: "Reports timestamp from when a secret last failed to be updated",
}, []string{"secret", "namespace"})
DbSecretError = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "vals_operator_dbsecret_error",
Help: "Reports timestamp from when a DB secret last failed to be updated",
}, []string{"secret", "namespace"})

SecretInfo = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "vals_operator_secret_info",
Help: "Tracks secret, timestamp is when it was last updated",
}, []string{"secret", "namespace"})
DbSecretInfo = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "vals_operator_dbsecret_info",
Help: "Tracks database secret, timestamp is when it was last updated",
}, []string{"secret", "namespace"})
DbSecretExpireTime = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "vals_operator_dbsecret_expire_time",
Help: "Reports if the when the secret expired last",
}, []string{"secret", "namespace"})
VaultError = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "vals_operator_vault_error",
Help: "Timestamp if Vault backend is used and fails",
}, []string{"addr"})
)
14 changes: 9 additions & 5 deletions vault/vault.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"os"
"strings"

"digitalis.io/vals-operator/controllers"
dmetrics "digitalis.io/vals-operator/metrics"
"github.com/hashicorp/vault/api"
vault "github.com/hashicorp/vault/api"
vaultApprole "github.com/hashicorp/vault/api/auth/approle"
Expand Down Expand Up @@ -70,20 +70,23 @@ func tokenRenewer(client *vault.Client) {
for {
vaultLoginResp, err := login(client)
if err != nil {
controllers.VaultError.WithLabelValues(vaultURL).SetToCurrentTime()
dmetrics.VaultError.WithLabelValues(vaultURL).SetToCurrentTime()
dmetrics.DbSecretFailures.Inc()
log.Error(err, "unable to authenticate to Vault")
return
}
err = os.Setenv("VAULT_TOKEN", vaultLoginResp.Auth.ClientToken)
if err != nil {
controllers.VaultError.WithLabelValues(vaultURL).SetToCurrentTime()
dmetrics.VaultError.WithLabelValues(vaultURL).SetToCurrentTime()
dmetrics.DbSecretFailures.Inc()
log.Error(err, "Cannot set VAULT_TOKEN env variable")
return
}

tokenErr := manageTokenLifecycle(client, vaultLoginResp)
if tokenErr != nil {
controllers.VaultError.WithLabelValues(vaultURL).SetToCurrentTime()
dmetrics.VaultError.WithLabelValues(vaultURL).SetToCurrentTime()
dmetrics.DbSecretFailures.Inc()
log.Error(err, "unable to start managing token lifecycle")
return
}
Expand Down Expand Up @@ -324,7 +327,8 @@ func Start() error {

client, err = vaultClient()
if err != nil {
controllers.VaultError.WithLabelValues(vaultURL).SetToCurrentTime()
dmetrics.VaultError.WithLabelValues(vaultURL).SetToCurrentTime()
dmetrics.DbSecretFailures.Inc()
log.Error(err, "Error setting up vault client")
return err
}
Expand Down

0 comments on commit 958707a

Please sign in to comment.