Skip to content

Commit

Permalink
Merge pull request #687 from zeeke/metrics-exporter-prometheus
Browse files Browse the repository at this point in the history
[metrics 2/x] Configure Prometheus Operator
  • Loading branch information
adrianchiris authored Jul 31, 2024
2 parents ee40683 + 3dff029 commit 57e1e90
Show file tree
Hide file tree
Showing 15 changed files with 1,122 additions and 46 deletions.
56 changes: 56 additions & 0 deletions bindata/manifests/metrics-exporter/metrics-prometheus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{{ if .IsPrometheusOperatorInstalled }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: sriov-network-metrics-exporter
namespace: {{.Namespace}}
spec:
endpoints:
- interval: 30s
port: sriov-network-metrics
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token"
scheme: "https"
honorLabels: true
tlsConfig:
serverName: sriov-network-metrics-exporter-service.{{.Namespace}}.svc
caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt
insecureSkipVerify: false
namespaceSelector:
matchNames:
- {{.Namespace}}
selector:
matchLabels:
name: sriov-network-metrics-exporter-service
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: {{.Namespace}}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: {{.PrometheusOperatorServiceAccount}}
namespace: {{.PrometheusOperatorNamespace}}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: {{.Namespace}}
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods
verbs:
- get
- list
- watch
{{ end }}
2 changes: 1 addition & 1 deletion bindata/manifests/metrics-exporter/metrics-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
namespace: {{.Namespace}}
annotations:
prometheus.io/target: "true"
{{- if eq .ClusterType "openshift" }}
{{ if .IsOpenshift }}
service.beta.openshift.io/serving-cert-secret-name: {{ .MetricsExporterSecretName }}
{{- end }}
labels:
Expand Down
29 changes: 21 additions & 8 deletions controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,12 @@ func (r *SriovOperatorConfigReconciler) syncMetricsExporter(ctx context.Context,
data.Data["MetricsExporterSecretName"] = os.Getenv("METRICS_EXPORTER_SECRET_NAME")
data.Data["MetricsExporterPort"] = os.Getenv("METRICS_EXPORTER_PORT")
data.Data["MetricsExporterKubeRbacProxyImage"] = os.Getenv("METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE")
data.Data["ClusterType"] = vars.ClusterType
data.Data["IsOpenshift"] = r.PlatformHelper.IsOpenshiftCluster()

data.Data["IsPrometheusOperatorInstalled"] = strings.ToLower(os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED")) == trueString
data.Data["PrometheusOperatorServiceAccount"] = os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT")
data.Data["PrometheusOperatorNamespace"] = os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE")

data.Data["NodeSelectorField"] = GetDefaultNodeSelector()
if dc.Spec.ConfigDaemonNodeSelector != nil {
data.Data["NodeSelectorField"] = dc.Spec.ConfigDaemonNodeSelector
Expand All @@ -250,23 +255,21 @@ func (r *SriovOperatorConfigReconciler) syncMetricsExporter(ctx context.Context,
return err
}

deployMetricsExporter, ok := dc.Spec.FeatureGates[consts.MetricsExporterFeatureGate]
if ok && deployMetricsExporter {
if r.FeatureGate.IsEnabled(consts.MetricsExporterFeatureGate) {
for _, obj := range objs {
err = r.syncK8sResource(ctx, dc, obj)
if err != nil {
logger.Error(err, "Couldn't sync metrics exporter objects")
return err
}
}

return nil
}

for _, obj := range objs {
err = r.deleteK8sResource(ctx, obj)
if err != nil {
return err
}
err = r.deleteK8sResources(ctx, objs)
if err != nil {
return err
}

return nil
Expand Down Expand Up @@ -362,6 +365,16 @@ func (r *SriovOperatorConfigReconciler) deleteK8sResource(ctx context.Context, i
return nil
}

func (r *SriovOperatorConfigReconciler) deleteK8sResources(ctx context.Context, objs []*uns.Unstructured) error {
for _, obj := range objs {
err := r.deleteK8sResource(ctx, obj)
if err != nil {
return err
}
}
return nil
}

func (r *SriovOperatorConfigReconciler) syncK8sResource(ctx context.Context, cr *sriovnetworkv1.SriovOperatorConfig, in *uns.Unstructured) error {
switch in.GetKind() {
case clusterRoleResourceName, clusterRoleBindingResourceName, mutatingWebhookConfigurationCRDName, validatingWebhookConfigurationCRDName, machineConfigCRDName:
Expand Down
96 changes: 60 additions & 36 deletions controllers/sriovoperatorconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,21 @@ import (
admv1 "k8s.io/api/admissionregistration/v1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/golang/mock/gomock"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate"
mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift"
Expand All @@ -37,7 +40,7 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
By("Create SriovOperatorConfig controller k8s objs")
config := &sriovnetworkv1.SriovOperatorConfig{}
config.SetNamespace(testNamespace)
config.SetName(constants.DefaultConfigName)
config.SetName(consts.DefaultConfigName)
config.Spec = sriovnetworkv1.SriovOperatorConfigSpec{
EnableInjector: true,
EnableOperatorWebhook: true,
Expand Down Expand Up @@ -105,7 +108,7 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
})

Context("When is up", func() {
JustBeforeEach(func() {
BeforeEach(func() {
config := &sriovnetworkv1.SriovOperatorConfig{}
err := util.WaitForNamespacedObject(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())
Expand Down Expand Up @@ -333,41 +336,54 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
Expect(err).ToNot(HaveOccurred())
})

It("should deploy the metrics-exporter when the feature gate is enabled", func() {
config := &sriovnetworkv1.SriovOperatorConfig{}
Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "default"}, config)).NotTo(HaveOccurred())

daemonSet := &appsv1.DaemonSet{}
err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-metrics-exporter", Namespace: testNamespace}, daemonSet)
Expect(err).To(HaveOccurred())
Expect(errors.IsNotFound(err)).To(BeTrue())

By("Turn `metricsExporter` feature gate on")
config.Spec.FeatureGates = map[string]bool{constants.MetricsExporterFeatureGate: true}
err = k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())

DeferCleanup(func() {
config.Spec.FeatureGates = map[string]bool{}
err = k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())
Context("metricsExporter feature gate", func() {
When("is disabled", func() {
It("should not deploy the daemonset", func() {
daemonSet := &appsv1.DaemonSet{}
err := k8sClient.Get(ctx, types.NamespacedName{Name: "sriov-metrics-exporter", Namespace: testNamespace}, daemonSet)
Expect(err).To(HaveOccurred())
Expect(errors.IsNotFound(err)).To(BeTrue())
})
})

err = util.WaitForNamespacedObject(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObject(&corev1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())

By("Turn `metricsExporter` feature gate off")
config.Spec.FeatureGates = map[string]bool{}
err = k8sClient.Update(ctx, config)

err = util.WaitForNamespacedObjectDeleted(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObjectDeleted(&corev1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())
When("is enabled", func() {
BeforeEach(func() {
config := &sriovnetworkv1.SriovOperatorConfig{}
Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "default"}, config)).NotTo(HaveOccurred())

By("Turn `metricsExporter` feature gate on")
config.Spec.FeatureGates = map[string]bool{consts.MetricsExporterFeatureGate: true}
err := k8sClient.Update(ctx, config)
Expect(err).NotTo(HaveOccurred())
})

It("should deploy the sriov-network-metrics-exporter DaemonSet", func() {
err := util.WaitForNamespacedObject(&appsv1.DaemonSet{}, k8sClient, testNamespace, "sriov-network-metrics-exporter", util.RetryInterval, util.APITimeout)
Expect(err).NotTo(HaveOccurred())

err = util.WaitForNamespacedObject(&corev1.Service{}, k8sClient, testNamespace, "sriov-network-metrics-exporter-service", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())
})

It("should deploy extra configuration when the Prometheus operator is installed", func() {
DeferCleanup(os.Setenv, "METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED", os.Getenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED"))
os.Setenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED", "true")

err := util.WaitForNamespacedObject(&rbacv1.Role{}, k8sClient, testNamespace, "prometheus-k8s", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())

err = util.WaitForNamespacedObject(&rbacv1.RoleBinding{}, k8sClient, testNamespace, "prometheus-k8s", util.RetryInterval, util.APITimeout)
Expect(err).ToNot(HaveOccurred())

assertResourceExists(
schema.GroupVersionKind{
Group: "monitoring.coreos.com",
Kind: "ServiceMonitor",
Version: "v1",
},
client.ObjectKey{Namespace: testNamespace, Name: "sriov-network-metrics-exporter"})
})
})
})

// This test verifies that the CABundle field in the webhook configuration added by third party components is not
Expand Down Expand Up @@ -429,6 +445,7 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
g.Expect(injectorCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("ca-bundle-2\n")))
}, "1s").Should(Succeed())
})

It("should reconcile to a converging state when multiple node policies are set", func() {
By("Creating a consistent number of node policies")
for i := 0; i < 30; i++ {
Expand Down Expand Up @@ -477,3 +494,10 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
})
})
})

func assertResourceExists(gvk schema.GroupVersionKind, key client.ObjectKey) {
u := &unstructured.Unstructured{}
u.SetGroupVersionKind(gvk)
err := k8sClient.Get(context.Background(), key, u)
Expect(err).NotTo(HaveOccurred())
}
7 changes: 7 additions & 0 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1"
openshiftconfigv1 "github.com/openshift/api/config/v1"
mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"

//+kubebuilder:scaffold:imports
sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
Expand Down Expand Up @@ -137,6 +138,10 @@ var _ = BeforeSuite(func() {
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE", "mock-image")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT", "k8s-prometheus")
Expect(err).NotTo(HaveOccurred())
err = os.Setenv("METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE", "default")
Expect(err).NotTo(HaveOccurred())

By("bootstrapping test environment")
testEnv = &envtest.Environment{
Expand All @@ -159,6 +164,8 @@ var _ = BeforeSuite(func() {
Expect(err).NotTo(HaveOccurred())
err = openshiftconfigv1.AddToScheme(scheme.Scheme)
Expect(err).NotTo(HaveOccurred())
err = monitoringv1.AddToScheme(scheme.Scheme)
Expect(err).NotTo(HaveOccurred())

vars.Config = cfg
vars.Scheme = scheme.Scheme
Expand Down
6 changes: 6 additions & 0 deletions deploy/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ spec:
value: $METRICS_EXPORTER_IMAGE
- name: METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE
value: $METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE
- name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED
value: "$METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED"
- name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT
value: $METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT
- name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE
value: $METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE
- name: RESOURCE_PREFIX
value: $RESOURCE_PREFIX
- name: DEV_MODE
Expand Down
3 changes: 3 additions & 0 deletions deployment/sriov-network-operator-chart/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ We have introduced the following Chart parameters.
| `operator.clustertype` | string | `kubernetes` | Cluster environment type |
| `operator.metricsExporter.port` | string | `9110` | Port where the Network Metrics Exporter listen |
| `operator.metricsExporter.certificates.secretName` | string | `metrics-exporter-cert` | Secret name to serve metrics via TLS. The secret must have the same fields as `operator.admissionControllers.certificates.secretNames` |
| `operator.metricsExporter.prometheusOperator.enabled` | bool | false | Wheter the operator shoud configure Prometheus resources or not (e.g. `ServiceMonitors`). |
| `operator.metricsExporter.prometheusOperator.serviceAccount` | string | `prometheus-k8s` | The service account used by the Prometheus Operator. This is used to give Prometheus the permission to list resource in the SR-IOV operator namespace |
| `operator.metricsExporter.prometheusOperator.namespace` | string | `monitoring` | The namespace where the Prometheus Operator is installed. Setting this variable makes the operator deploy `monitoring.coreos.com` resources. |

#### Admission Controllers parameters

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ spec:
value: {{ .Values.operator.metricsExporter.certificates.secretName }}
- name: METRICS_EXPORTER_KUBE_RBAC_PROXY_IMAGE
value: {{ .Values.images.metricsExporterKubeRbacProxy }}
{{- if .Values.operator.metricsExporter.prometheusOperator.enabled }}
- name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED
value: {{ .Values.operator.metricsExporter.prometheusOperator.enabled | quote}}
- name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT
value: {{ .Values.operator.metricsExporter.prometheusOperator.serviceAccount }}
- name: METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE
value: {{ .Values.operator.metricsExporter.prometheusOperator.namespace }}
{{- end }}
- name: RESOURCE_PREFIX
value: {{ .Values.operator.resourcePrefix }}
- name: IMAGE_PULL_SECRETS
Expand Down
4 changes: 4 additions & 0 deletions deployment/sriov-network-operator-chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ operator:
port: "9110"
certificates:
secretName: "metrics-exporter-cert"
prometheusOperator:
enabled: false
serviceAccount: "prometheus-k8s"
namespace: "monitoring"
admissionControllers:
enabled: false
certificates:
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ require (
github.com/openshift/machine-config-operator v0.0.1-0.20231024085435-7e1fb719c1ba
github.com/ovn-org/libovsdb v0.6.1-0.20240125124854-03f787b1a892
github.com/pkg/errors v0.9.1
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.68.0
github.com/prometheus-operator/prometheus-operator/pkg/client v0.68.0
github.com/safchain/ethtool v0.3.0
github.com/spf13/cobra v1.7.0
github.com/stretchr/testify v1.8.4
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,10 @@ github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZ
github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.68.0 h1:yl9ceUSUBo9woQIO+8eoWpcxZkdZgm89g+rVvu37TUw=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.68.0/go.mod h1:9Uuu3pEU2jB8PwuqkHvegQ0HV/BlZRJUyfTYAqfdVF8=
github.com/prometheus-operator/prometheus-operator/pkg/client v0.68.0 h1:8FS0sXpFkFPxp2gfkxyEMnhZV9yhf7xPbpsIeUZHlzM=
github.com/prometheus-operator/prometheus-operator/pkg/client v0.68.0/go.mod h1:ul4ND0BMCcOX1OSZvbJA1/lh7yQ2ILHNKuZIojGISe4=
github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q=
github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
Expand Down
3 changes: 3 additions & 0 deletions hack/run-e2e-conformance-virtual-ocp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,9 @@ export CLUSTER_TYPE=openshift
export DEV_MODE=TRUE
export CLUSTER_HAS_EMULATED_PF=TRUE
export OPERATOR_LEADER_ELECTION_ENABLE=true
export METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED=true
export METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT:-"prometheus-k8s"}
export METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE:-"openshfit-monitoring"}

export SRIOV_NETWORK_OPERATOR_IMAGE="$registry/$NAMESPACE/sriov-network-operator:latest"
export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE="$registry/$NAMESPACE/sriov-network-config-daemon:latest"
Expand Down
Loading

0 comments on commit 57e1e90

Please sign in to comment.