Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(controller): Add health checks for CustomResourceDefinitions (CRDs) #21323

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
61 changes: 53 additions & 8 deletions controller/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
kubeutil "github.com/argoproj/gitops-engine/pkg/utils/kube"
log "github.com/sirupsen/logrus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"

"github.com/argoproj/argo-cd/v2/common"
Expand Down Expand Up @@ -39,18 +40,56 @@ func setApplicationHealth(resources []managedResource, statuses []appv1.Resource
var err error
healthOverrides := lua.ResourceHealthOverrides(resourceOverrides)
gvk := schema.GroupVersionKind{Group: res.Group, Version: res.Version, Kind: res.Kind}
if res.Live == nil {

if res.Kind == "CustomResourceDefinition" && res.Group == "apiextensions.k8s.io" {
log.Infof("Processing CRD %s/%s", res.Live.GetNamespace(), res.Live.GetName())
// Custom logic for CRD health
conditions, found, err := unstructured.NestedSlice(res.Live.Object, "status", "conditions")
if err != nil {
log.WithError(err).Warnf("Failed to retrieve conditions for CRD %s/%s", res.Live.GetNamespace(), res.Live.GetName())
}

if found {
log.Infof("Conditions found for CRD %s/%s: %+v", res.Live.GetNamespace(), res.Live.GetName(), conditions)
for _, condition := range conditions {
condMap, ok := condition.(map[string]interface{})
if ok {
condType, condTypeExists := condMap["type"].(string)
log.Infof("Processing condition: %+v", condType)
condStatus, condStatusExists := condMap["status"].(string)
condMessage, _ := condMap["message"].(string)
log.Infof("Condition type: %s, status: %s, message: %s", condType, condStatus, condMessage)
if condTypeExists && condStatusExists && condType == "NonStructuralSchema" && condStatus == "True" {
healthStatus = &health.HealthStatus{
Status: health.HealthStatusDegraded,
Message: condMessage,
}
log.Infof("Health status set to Degraded with message: %s", healthStatus.Message)
break
}
} else {
log.Warnf("Unexpected condition format for CRD %s/%s", res.Live.GetNamespace(), res.Live.GetName())
}
}
}
if healthStatus == nil {
log.Infof("Health status set to Healthy for CRD %s/%s", res.Live.GetNamespace(), res.Live.GetName())
healthStatus = &health.HealthStatus{Status: health.HealthStatusHealthy}
}
} else if res.Live == nil {
healthStatus = &health.HealthStatus{Status: health.HealthStatusMissing}
} else {
// App the manages itself should not affect own health
// App that manages itself should not affect its own health
if isSelfReferencedApp(app, kubeutil.GetObjectRef(res.Live)) {
continue
}
healthStatus, err = health.GetResourceHealth(res.Live, healthOverrides)
if err != nil && savedErr == nil {
if err != nil {
errCount++
savedErr = fmt.Errorf("failed to get resource health for %q with name %q in namespace %q: %w", res.Live.GetKind(), res.Live.GetName(), res.Live.GetNamespace(), err)
// also log so we don't lose the message
if savedErr == nil {
savedErr = fmt.Errorf("failed to get resource health for %q with name %q in namespace %q: %w", res.Live.GetKind(), res.Live.GetName(), res.Live.GetNamespace(), err)
}
// Log the error for debugging
log.WithField("application", app.QualifiedName()).Warn(savedErr)
}
}
Expand All @@ -61,17 +100,18 @@ func setApplicationHealth(resources []managedResource, statuses []appv1.Resource

if persistResourceHealth {
resHealth := appv1.HealthStatus{Status: healthStatus.Status, Message: healthStatus.Message}
log.Infof("Persisting health status: %+v", resHealth)
statuses[i].Health = &resHealth
} else {
statuses[i].Health = nil
}

// Is health status is missing but resource has not built-in/custom health check then it should not affect parent app health
// Health status checks
if _, hasOverride := healthOverrides[lua.GetConfigMapKey(gvk)]; healthStatus.Status == health.HealthStatusMissing && !hasOverride && health.GetHealthCheckFunc(gvk) == nil {
continue
}

// Missing or Unknown health status of child Argo CD app should not affect parent
// Ignore certain health statuses for child apps
if res.Kind == application.ApplicationKind && res.Group == application.Group && (healthStatus.Status == health.HealthStatusMissing || healthStatus.Status == health.HealthStatusUnknown) {
continue
}
Expand All @@ -80,9 +120,10 @@ func setApplicationHealth(resources []managedResource, statuses []appv1.Resource
appHealth.Status = healthStatus.Status
}
}

if persistResourceHealth {
app.Status.ResourceHealthSource = appv1.ResourceHealthLocationInline
// if the status didn't change, don't update the timestamp
// Update timestamp only if health status changes
if app.Status.Health.Status == appHealth.Status && app.Status.Health.LastTransitionTime != nil {
appHealth.LastTransitionTime = app.Status.Health.LastTransitionTime
} else {
Expand All @@ -92,8 +133,12 @@ func setApplicationHealth(resources []managedResource, statuses []appv1.Resource
} else {
app.Status.ResourceHealthSource = appv1.ResourceHealthLocationAppTree
}

if savedErr != nil && errCount > 1 {
savedErr = fmt.Errorf("see application-controller logs for %d other errors; most recent error was: %w", errCount-1, savedErr)
}

log.Infof("Application %s health: %s", app.Name, appHealth)

return &appHealth, savedErr
}
41 changes: 41 additions & 0 deletions controller/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/argoproj/gitops-engine/pkg/health"
synccommon "github.com/argoproj/gitops-engine/pkg/sync/common"
"github.com/argoproj/gitops-engine/pkg/utils/kube"
log "github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -250,3 +251,43 @@ return hs`,
assert.Equal(t, health.HealthStatusHealthy, healthStatus.Status)
})
}

func TestSetApplicationHealth_CRDHealthCheck(t *testing.T) {
crd := resourceFromFile("./testdata/customresourcedefinition.yaml")

// Simulate a CRD with NonStructuralSchema condition
crdConditions := []map[string]interface{}{
{
"type": "NonStructuralSchema",
"status": "True",
"reason": "Violations",
"message": "CRD has non-structural schema issues", // Ensure the message matches what you expect
},
}
// Convert []map[string]interface{} to []interface{}
conditionsInterface := make([]interface{}, len(crdConditions))
for i, condition := range crdConditions {
conditionsInterface[i] = condition
}

// Set the conditions in the CRD's status field
err := unstructured.SetNestedSlice(crd.Object, conditionsInterface, "status", "conditions")
require.NoError(t, err)

resources := []managedResource{{
Group: "apiextensions.k8s.io", Version: "v1", Kind: "CustomResourceDefinition", Live: &crd,
}}
resourceStatuses := initStatuses(resources)

// Test the health check for CRDs
healthStatus, err := setApplicationHealth(resources, resourceStatuses, lua.ResourceHealthOverrides{}, app, true)
require.NoError(t, err)

// Debug log to inspect resource statuses
log.Infof("Overall health status: %+v", healthStatus)
log.Infof("Resource statuses after health check: %+v", resourceStatuses)

require.NotNil(t, resourceStatuses[0].Health, "Health should not be nil")
assert.Equal(t, health.HealthStatusDegraded, resourceStatuses[0].Health.Status)
assert.Equal(t, "CRD has non-structural schema issues", resourceStatuses[0].Health.Message)
}
18 changes: 18 additions & 0 deletions controller/testdata/customresourcedefinition.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: examples.example.io
spec:
group: example.io
names:
kind: Example
listKind: ExampleList
plural: examples
singular: example
scope: Namespaced
versions:
- name: v1alpha1
served: true
storage: true
status:
conditions: []
Loading