From ca9ae2e1be9a49a8b64e9b4f4c43323db6991899 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Tue, 28 Oct 2025 13:54:09 +0000 Subject: [PATCH] [VC-46370] CyberArk: Skip deleted resources when converting data readings to snapshot - Skip resources marked deleted when extracting from DataReadings - Append only non-deleted runtime.Objects to avoid nil entries - Document that extractor functions exclude deleted resources - Update tests and example input to assert deleted resources are ignored - Document the motivation for the caching mechanism and its faults Signed-off-by: Richard Wall --- examples/machinehub/input.json | 22 ++++++++++++ pkg/client/client_cyberark.go | 16 +++++++-- ...lient_cyberark_convertdatareadings_test.go | 23 ++++++++++++ pkg/datagatherer/k8s/dynamic.go | 36 +++++++++++++++++++ 4 files changed, 95 insertions(+), 2 deletions(-) diff --git a/examples/machinehub/input.json b/examples/machinehub/input.json index b70a965b..2cdba65c 100644 --- a/examples/machinehub/input.json +++ b/examples/machinehub/input.json @@ -21,6 +21,17 @@ "namespace": "team-1" } } + }, + { + "deleted_at": "2024-06-10T12:00:00Z", + "resource": { + "kind": "Secret", + "apiVersion": "v1", + "metadata": { + "name": "deleted-secret-1", + "namespace": "team-2" + } + } } ] } @@ -38,6 +49,17 @@ "namespace": "team-1" } } + }, + { + "deleted_at": "2024-06-10T12:00:00Z", + "resource": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "name": "deleted-pod-1", + "namespace": "team-2" + } + } } ] } diff --git a/pkg/client/client_cyberark.go b/pkg/client/client_cyberark.go index a979868e..3e0659fc 100644 --- a/pkg/client/client_cyberark.go +++ b/pkg/client/client_cyberark.go @@ -48,6 +48,7 @@ func NewCyberArk(httpClient *http.Client) (*CyberArkClient, error) { // PostDataReadingsWithOptions uploads data readings to CyberArk. // It converts the supplied data readings into a snapshot format expected by CyberArk. +// Deleted resources are excluded from the snapshot because they are not needed by CyberArk. // It then minimizes the snapshot to avoid uploading unnecessary data. // It initializes a data upload client with the configured HTTP client and credentials, // then uploads a snapshot. @@ -112,6 +113,8 @@ func extractClusterIDAndServerVersionFromReading(reading *api.DataReading, targe // extractResourceListFromReading converts the opaque data from a DynamicData // data reading to runtime.Object resources, to allow access to the metadata and // other kubernetes API fields. +// Deleted resources are skipped because the CyberArk Discovery and Context service +// does not need to see resources that no longer exist. func extractResourceListFromReading(reading *api.DataReading, target *[]runtime.Object) error { if reading == nil { return fmt.Errorf("programmer mistake: the DataReading must not be nil") @@ -122,10 +125,13 @@ func extractResourceListFromReading(reading *api.DataReading, target *[]runtime. "programmer mistake: the DataReading must have data type *api.DynamicData. "+ "This DataReading (%s) has data type %T", reading.DataGatherer, reading.Data) } - resources := make([]runtime.Object, len(data.Items)) + resources := make([]runtime.Object, 0, len(data.Items)) for i, item := range data.Items { + if !item.DeletedAt.IsZero() { + continue + } if resource, ok := item.Resource.(runtime.Object); ok { - resources[i] = resource + resources = append(resources, resource) } else { return fmt.Errorf( "programmer mistake: the DynamicData items must have Resource type runtime.Object. "+ @@ -136,6 +142,11 @@ func extractResourceListFromReading(reading *api.DataReading, target *[]runtime. return nil } +// defaultExtractorFunctions maps data gatherer names to functions that extract +// their data from DataReadings into the appropriate fields of a Snapshot. +// Each function takes a DataReading and a pointer to a Snapshot, +// and populates the relevant field(s) of the Snapshot based on the DataReading's data. +// Deleted resources are excluded from the snapshot because they are not needed by CyberArk. var defaultExtractorFunctions = map[string]func(*api.DataReading, *dataupload.Snapshot) error{ "ark/discovery": extractClusterIDAndServerVersionFromReading, "ark/secrets": func(r *api.DataReading, s *dataupload.Snapshot) error { @@ -184,6 +195,7 @@ var defaultExtractorFunctions = map[string]func(*api.DataReading, *dataupload.Sn // The extractorFunctions map should contain functions for each expected // DataGatherer name, which will be called with the corresponding DataReading // and the target snapshot to populate the relevant fields. +// Deleted resources are excluded from the snapshot because they are not needed by CyberArk. func convertDataReadings( extractorFunctions map[string]func(*api.DataReading, *dataupload.Snapshot) error, readings []*api.DataReading, diff --git a/pkg/client/client_cyberark_convertdatareadings_test.go b/pkg/client/client_cyberark_convertdatareadings_test.go index 5f98f64d..20ae60d9 100644 --- a/pkg/client/client_cyberark_convertdatareadings_test.go +++ b/pkg/client/client_cyberark_convertdatareadings_test.go @@ -218,6 +218,19 @@ func TestExtractResourceListFromReading(t *testing.T) { }, }, }, + // Deleted resource should be ignored + { + DeletedAt: api.Time{Time: time.Now()}, + Resource: &unstructured.Unstructured{ + Object: map[string]interface{}{ + "kind": "Namespace", + "metadata": map[string]interface{}{ + "name": "kube-system", + "uid": "uid-kube-system", + }, + }, + }, + }, }, }, }, @@ -270,6 +283,16 @@ func TestConvertDataReadings(t *testing.T) { }, }, }, + // Deleted secret should be ignored + { + DeletedAt: api.Time{Time: time.Now()}, + Resource: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "deleted-1", + Namespace: "team-1", + }, + }, + }, }, }, }, diff --git a/pkg/datagatherer/k8s/dynamic.go b/pkg/datagatherer/k8s/dynamic.go index 7cb131b5..4a979aea 100644 --- a/pkg/datagatherer/k8s/dynamic.go +++ b/pkg/datagatherer/k8s/dynamic.go @@ -1,5 +1,37 @@ package k8s +// The venafi-kubernetes-agent has a requirement that **all** resources should +// be uploaded, even short-lived secrets, which are created and deleted +// in-between data uploads. A cache was added to the datagatherer code, to +// satisfy this requirement. The cache stores all resources for 5 minutes. And +// the informer event handlers (onAdd, onUpdate, onDelete) update the cache +// accordingly. The onDelete handler does not remove the object from the cache, +// but instead marks the object as deleted by setting the DeletedAt field on the +// GatheredResource. This ensures that deleted resources are still present in +// the cache for the duration of the cache expiry time. +// +// The cache expiry is hard coded to 5 minutes, which is longer than the +// venafi-kubernetes-agent default upload interval of 1 minute. This means that +// even if a resource is created and deleted in-between data gatherer runs, it +// will still be present in the cache when the data gatherer runs. +// +// TODO(wallrj): When the agent is deployed as CyberArk disco-agent, the deleted +// items are currently discarded before upload. If this remains the case, then the cache is unnecessary +// and should be disabled to save memory. +// If, in the future, the CyberArk Discovery and Context service does want to +// see deleted items, the "deleted resource reporting mechanism" will need to be +// redesigned, so that deleted items are retained for the duration of the upload +// interval. +// +// TODO(wallrj): When the agent is deployed as CyberArk disco-agent, the upload +// interval is 12 hours by default, so the 5 minute cache expiry is not +// sufficient. +// +// TODO(wallrj): The shared informer is configured to refresh all relist all +// resources every 1 minute, which will cause unnecessary load on the apiserver. +// We need to look back at the Git history and understand whether this was done +// for good reason or due to some misunderstanding. + import ( "context" "errors" @@ -197,6 +229,8 @@ func (c *ConfigDynamic) newDataGathererWithClient(ctx context.Context, cl dynami if informerFunc, ok := kubernetesNativeResources[c.GroupVersionResource]; ok { factory := informers.NewSharedInformerFactoryWithOptions(clientset, + // TODO(wallrj): This causes all resources to be relisted every 1 + // minute which will cause unnecessary load on the apiserver. 60*time.Second, informers.WithNamespace(metav1.NamespaceAll), informers.WithTweakListOptions(func(options *metav1.ListOptions) { @@ -207,6 +241,8 @@ func (c *ConfigDynamic) newDataGathererWithClient(ctx context.Context, cl dynami } else { factory := dynamicinformer.NewFilteredDynamicSharedInformerFactory( cl, + // TODO(wallrj): This causes all resources to be relisted every 1 + // minute which will cause unnecessary load on the apiserver. 60*time.Second, metav1.NamespaceAll, func(options *metav1.ListOptions) {