From 28f6735c1c07bd59fc74629ac779f5b6e2f9abcf Mon Sep 17 00:00:00 2001 From: Uday Bhaskar Date: Tue, 17 Feb 2026 22:44:46 +0530 Subject: [PATCH 1/3] update docs and configmap field name (#1132) (cherry picked from commit 26c5fda738e201becb4de6b46b4aabf0fa892391) --- api/v1alpha1/deviceconfig_types.go | 4 +- api/v1alpha1/zz_generated.deepcopy.go | 4 +- ...md-gpu-operator.clusterserviceversion.yaml | 6 +- bundle/manifests/amd.com_deviceconfigs.yaml | 2 +- config/crd/bases/amd.com_deviceconfigs.yaml | 2 +- ...md-gpu-operator.clusterserviceversion.yaml | 6 +- docs/autoremediation/auto-remediation.md | 88 +++++++++++++++---- .../template-patch/default-deviceconfig.yaml | 4 +- helm-charts-k8s/crds/deviceconfig-crd.yaml | 2 +- .../templates/default-deviceconfig.yaml | 4 +- .../configs/default-configmap.yaml | 8 +- internal/controllers/remediation_handler.go | 10 +-- tests/helm-e2e/helm_e2e_test.go | 4 +- 13 files changed, 97 insertions(+), 47 deletions(-) diff --git a/api/v1alpha1/deviceconfig_types.go b/api/v1alpha1/deviceconfig_types.go index 5a23c38b1..5b8b969c0 100644 --- a/api/v1alpha1/deviceconfig_types.go +++ b/api/v1alpha1/deviceconfig_types.go @@ -90,8 +90,8 @@ type RemediationWorkflowSpec struct { Enable *bool `json:"enable,omitempty"` // Name of the ConfigMap that holds condition-to-workflow mappings. - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="ConditionalWorkflows",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:conditionalWorkflows"} - ConditionalWorkflows *v1.LocalObjectReference `json:"conditionalWorkflows,omitempty"` + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Config",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:config"} + Config *v1.LocalObjectReference `json:"config,omitempty"` // Time to live for argo workflow object and its pods for a failed workflow. Accepts duration strings like "30s", "4h", "24h". By default, it is set to 24h //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="TtlForFailedWorkflows",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:ttlForFailedWorkflows"} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 091e81e93..6b09de073 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -743,8 +743,8 @@ func (in *RemediationWorkflowSpec) DeepCopyInto(out *RemediationWorkflowSpec) { *out = new(bool) **out = **in } - if in.ConditionalWorkflows != nil { - in, out := &in.ConditionalWorkflows, &out.ConditionalWorkflows + if in.Config != nil { + in, out := &in.Config, &out.Config *out = new(v1.LocalObjectReference) **out = **in } diff --git a/bundle/manifests/amd-gpu-operator.clusterserviceversion.yaml b/bundle/manifests/amd-gpu-operator.clusterserviceversion.yaml index fb2946bd4..1cf7d7684 100644 --- a/bundle/manifests/amd-gpu-operator.clusterserviceversion.yaml +++ b/bundle/manifests/amd-gpu-operator.clusterserviceversion.yaml @@ -708,10 +708,10 @@ spec: x-descriptors: - urn:alm:descriptor:com.amd.deviceconfigs:remediationWorkflow - description: Name of the ConfigMap that holds condition-to-workflow mappings. - displayName: ConditionalWorkflows - path: remediationWorkflow.conditionalWorkflows + displayName: Config + path: remediationWorkflow.config x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:conditionalWorkflows + - urn:alm:descriptor:com.amd.deviceconfigs:config - description: enable remediation workflows. disabled by default enable if operator should automatically handle remediation of node incase of gpu issues displayName: Enable diff --git a/bundle/manifests/amd.com_deviceconfigs.yaml b/bundle/manifests/amd.com_deviceconfigs.yaml index 576007bee..a60d2d1b4 100644 --- a/bundle/manifests/amd.com_deviceconfigs.yaml +++ b/bundle/manifests/amd.com_deviceconfigs.yaml @@ -1386,7 +1386,7 @@ spec: remediationWorkflow: description: remediation workflow properties: - conditionalWorkflows: + config: description: Name of the ConfigMap that holds condition-to-workflow mappings. properties: diff --git a/config/crd/bases/amd.com_deviceconfigs.yaml b/config/crd/bases/amd.com_deviceconfigs.yaml index af37025f0..b09bcdc8d 100644 --- a/config/crd/bases/amd.com_deviceconfigs.yaml +++ b/config/crd/bases/amd.com_deviceconfigs.yaml @@ -1382,7 +1382,7 @@ spec: remediationWorkflow: description: remediation workflow properties: - conditionalWorkflows: + config: description: Name of the ConfigMap that holds condition-to-workflow mappings. properties: diff --git a/config/manifests/bases/amd-gpu-operator.clusterserviceversion.yaml b/config/manifests/bases/amd-gpu-operator.clusterserviceversion.yaml index 8ad50a8d1..feeb09807 100644 --- a/config/manifests/bases/amd-gpu-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/amd-gpu-operator.clusterserviceversion.yaml @@ -679,10 +679,10 @@ spec: x-descriptors: - urn:alm:descriptor:com.amd.deviceconfigs:remediationWorkflow - description: Name of the ConfigMap that holds condition-to-workflow mappings. - displayName: ConditionalWorkflows - path: remediationWorkflow.conditionalWorkflows + displayName: Config + path: remediationWorkflow.config x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:conditionalWorkflows + - urn:alm:descriptor:com.amd.deviceconfigs:config - description: enable remediation workflows. disabled by default enable if operator should automatically handle remediation of node incase of gpu issues displayName: Enable diff --git a/docs/autoremediation/auto-remediation.md b/docs/autoremediation/auto-remediation.md index a056fa2c5..2afb3749b 100644 --- a/docs/autoremediation/auto-remediation.md +++ b/docs/autoremediation/auto-remediation.md @@ -80,29 +80,79 @@ The GPU Operator installs Argo Workflows v3.6.5, using a [customized installatio The DeviceConfig Custom Resource includes a `RemediationWorkflowSpec` section for configuring and customizing the auto-remediation feature: -```golang -type RemediationWorkflowSpec struct { - Enable *bool - - ConditionalWorkflows *v1.LocalObjectReference - - TtlForFailedWorkflows int - - TesterImage string - - MaxParallelWorkflows int - - NodeRemediationLabels map[string]string - - NodeRemediationTaints []v1.Taint - - NodeDrainPolicy *DrainSpec -} +```yaml +remediationWorkflow: + # Enable auto node remediation feature for AMD GPU Operator. Disabled by default. + # Set to true to activate automatic remediation workflows when GPU issues are detected. + enable: true + + # ConfigMap containing mappings between node conditions and remediation workflows. + # If not specified, the operator uses the default 'default-conditional-workflow-mappings' ConfigMap. + # The ConfigMap defines which workflow template to execute for each specific error condition. + config: + name: configmapName + + # Time-to-live duration for retaining failed workflow objects and pods before cleanup. + # Accepts duration strings like "5h", "24h", "30m", "1h30m". Default is 24 hours. + # Retaining failed workflows allows for post-mortem analysis and troubleshooting. + ttlForFailedWorkflows: 5h + + # Container image used for executing GPU validation tests during remediation workflows. + # This image runs test suites to verify GPU health after remediation completes. + # Default image supports only RVS tests. Contact AMD for AGFHC-enabled test runner. + testerImage: docker.io/rocm/test-runner:v1.4.1 + + # Maximum number of remediation workflows that can execute concurrently across the cluster. + # Helps maintain minimum node availability by preventing excessive simultaneous remediations. + # A value of 0 (default) means no limit is enforced. Excess workflows are queued as Pending. + maxParallelWorkflows: 0 + + # Custom taints to apply to nodes during the remediation process. + # If not specified, the operator applies the default taint 'amd-gpu-unhealthy:NoSchedule'. + # Taints prevent new workload scheduling on affected nodes during remediation. + nodeRemediationTaints: + - key: # Taint key (e.g., 'amd-gpu-unhealthy') + value: # Taint value (e.g., specific error condition) + effect: # Taint effect (e.g., 'NoSchedule', 'NoExecute', 'PreferNoSchedule') + + # Custom labels to apply to nodes during automatic remediation workflows. + # These labels persist throughout the remediation process and can be used for + # monitoring, tracking, or applying custom policies. + nodeRemediationLabels: + label-one-key: label-one-val + label-two-key: label-two-val + + # Configuration for pod eviction behavior when draining workloads from nodes. + # Controls how pods are removed during remediation, including timeouts, grace periods, + # and namespace exclusions to protect critical infrastructure. + nodeDrainPolicy: + # Enable forced draining of pods that do not respond to standard termination signals. + # When true, pods that cannot be evicted gracefully will be forcibly removed. + force: false + + # Maximum time in seconds to wait for the drain operation to complete. + # A value of 0 means infinite timeout. Default is 300 seconds (5 minutes). + timeoutSeconds: 300 + + # Grace period in seconds for pods to shut down gracefully after termination signal. + # Overrides each pod's terminationGracePeriodSeconds. Use -1 to respect pod settings. + gracePeriodSeconds: 60 + + # When true, DaemonSet-managed pods are excluded from the drain operation. + # DaemonSets are designed to run on all nodes and will automatically reschedule. + ignoreDaemonSets: true + + # List of namespaces to exclude from pod eviction during drain operation. + # Pods in these namespaces remain on the node, allowing critical infrastructure + # components to continue operating throughout the remediation process. + ignoreNamespaces: + - kube-system + - cert-manager ``` **Enable** - Controls whether automatic node remediation is enabled. Set this field to `true` to activate the auto-remediation feature in the cluster. -**ConditionalWorkflows** - References a ConfigMap that contains mappings between node conditions and their corresponding remediation workflows. The GPU Operator automatically creates a `default-conditional-workflow-mappings` ConfigMap with predefined mappings. Users can either modify this default ConfigMap or create their own custom ConfigMap. If left empty, the default ConfigMap will be used automatically. More about the ConfigMap in [below section](auto-remediation.md#remediation-workflow-configmap). +**Config** - References a ConfigMap that contains mappings between node conditions and their corresponding remediation workflows. The GPU Operator automatically creates a `default-conditional-workflow-mappings` ConfigMap with predefined mappings. Users can either modify this default ConfigMap or create their own custom ConfigMap. If left empty, the default ConfigMap will be used automatically. More about the ConfigMap in [below section](auto-remediation.md#remediation-workflow-configmap). > **Note:** The `default-conditional-workflow-mappings` ConfigMap is created automatically by the GPU Operator. diff --git a/hack/k8s-patch/template-patch/default-deviceconfig.yaml b/hack/k8s-patch/template-patch/default-deviceconfig.yaml index 32bdbc5a0..f76e21a29 100644 --- a/hack/k8s-patch/template-patch/default-deviceconfig.yaml +++ b/hack/k8s-patch/template-patch/default-deviceconfig.yaml @@ -425,8 +425,8 @@ spec: enable: {{ .enable }} {{- end }} - {{- with .conditionalWorkflows }} - conditionalWorkflows: + {{- with .config }} + config: {{- toYaml . | nindent 6 }} {{- end }} diff --git a/helm-charts-k8s/crds/deviceconfig-crd.yaml b/helm-charts-k8s/crds/deviceconfig-crd.yaml index 0edb86212..b5ebffb75 100644 --- a/helm-charts-k8s/crds/deviceconfig-crd.yaml +++ b/helm-charts-k8s/crds/deviceconfig-crd.yaml @@ -1388,7 +1388,7 @@ spec: remediationWorkflow: description: remediation workflow properties: - conditionalWorkflows: + config: description: Name of the ConfigMap that holds condition-to-workflow mappings. properties: diff --git a/helm-charts-k8s/templates/default-deviceconfig.yaml b/helm-charts-k8s/templates/default-deviceconfig.yaml index 32bdbc5a0..f76e21a29 100644 --- a/helm-charts-k8s/templates/default-deviceconfig.yaml +++ b/helm-charts-k8s/templates/default-deviceconfig.yaml @@ -425,8 +425,8 @@ spec: enable: {{ .enable }} {{- end }} - {{- with .conditionalWorkflows }} - conditionalWorkflows: + {{- with .config }} + config: {{- toYaml . | nindent 6 }} {{- end }} diff --git a/internal/controllers/remediation/configs/default-configmap.yaml b/internal/controllers/remediation/configs/default-configmap.yaml index 4435fffe4..e47501840 100644 --- a/internal/controllers/remediation/configs/default-configmap.yaml +++ b/internal/controllers/remediation/configs/default-configmap.yaml @@ -320,11 +320,11 @@ recoveryPolicy: maxAllowedRunsPerWindow: 3 windowSize: 15m -- nodeCondition: "AMDGPUUnhealthy" - workflowTemplate: "default-template" +- nodeCondition: AMDGPUUnhealthy + workflowTemplate: default-template validationTestsProfile: - framework: "AGFHC" - recipe: "all_lvl4" + framework: AGFHC + recipe: all_lvl4 iterations: 1 stopOnFailure: true timeoutSeconds: 4800 diff --git a/internal/controllers/remediation_handler.go b/internal/controllers/remediation_handler.go index 3a3e0b7c2..69a4507b2 100644 --- a/internal/controllers/remediation_handler.go +++ b/internal/controllers/remediation_handler.go @@ -254,8 +254,8 @@ func (n *remediationMgr) HandleDelete(ctx context.Context, deviceConfig *amdv1al } var cfgMapName string - if deviceConfig.Spec.RemediationWorkflow.ConditionalWorkflows != nil { - cfgMapName = deviceConfig.Spec.RemediationWorkflow.ConditionalWorkflows.Name + if deviceConfig.Spec.RemediationWorkflow.Config != nil { + cfgMapName = deviceConfig.Spec.RemediationWorkflow.Config.Name } else { cfgMapName = deviceConfig.Name + "-" + DefaultConfigMapSuffix } @@ -856,15 +856,15 @@ func (h *remediationMgrHelper) createDefaultObjects(ctx context.Context, devConf logger := log.FromContext(ctx) var cfgMapName string - if devConfig.Spec.RemediationWorkflow.ConditionalWorkflows != nil { - cfgMapName = devConfig.Spec.RemediationWorkflow.ConditionalWorkflows.Name + if devConfig.Spec.RemediationWorkflow.Config != nil { + cfgMapName = devConfig.Spec.RemediationWorkflow.Config.Name } else { cfgMapName = devConfig.Name + "-" + DefaultConfigMapSuffix } // Create default configmap if required cm, err := h.getConfigMap(ctx, cfgMapName, devConfig.Namespace) if err != nil { - if devConfig.Spec.RemediationWorkflow.ConditionalWorkflows == nil { + if devConfig.Spec.RemediationWorkflow.Config == nil { cm, err = h.createDefaultConfigMap(ctx, cfgMapName, devConfig.Namespace) if err != nil { logger.Error(err, "Failed to create default configmap") diff --git a/tests/helm-e2e/helm_e2e_test.go b/tests/helm-e2e/helm_e2e_test.go index 13f05c6ac..a8c0be426 100644 --- a/tests/helm-e2e/helm_e2e_test.go +++ b/tests/helm-e2e/helm_e2e_test.go @@ -969,7 +969,7 @@ deviceConfig: devicePluginImagePullPolicy: Always remediationWorkflow: enable: true - conditionalWorkflows: + config: name: "conditional-workflows-configmap" ttlForFailedWorkflows: 36h testerImage: "test.io/test/remediation-workflow-tester:v1.3.0" @@ -981,7 +981,7 @@ deviceConfig: expectSpec: &v1alpha1.DeviceConfigSpec{ RemediationWorkflow: v1alpha1.RemediationWorkflowSpec{ Enable: &boolTrue, - ConditionalWorkflows: &corev1.LocalObjectReference{ + Config: &corev1.LocalObjectReference{ Name: "conditional-workflows-configmap", }, TtlForFailedWorkflows: "36h", From fa7178c509c203bd40425a0158b55241154834be Mon Sep 17 00:00:00 2001 From: Uday Bhaskar Date: Thu, 19 Feb 2026 10:46:54 +0530 Subject: [PATCH 2/3] auto-node-remediation - provide auto start option to user (#1134) * auto-node-remediation - provide auto start option to user * make new fields configurable * update documentation (cherry picked from commit 23e8b496d579f74b98de6bc0db0856b967e23d8e) --- .wordlist.txt | 914 ++++++++++++++++++ api/v1alpha1/deviceconfig_types.go | 9 + api/v1alpha1/zz_generated.deepcopy.go | 5 + ...md-gpu-operator.clusterserviceversion.yaml | 12 + bundle/manifests/amd.com_deviceconfigs.yaml | 9 + config/crd/bases/amd.com_deviceconfigs.yaml | 9 + ...md-gpu-operator.clusterserviceversion.yaml | 12 + docs/autoremediation/auto-remediation.md | 9 + .../template-patch/default-deviceconfig.yaml | 19 + helm-charts-k8s/crds/deviceconfig-crd.yaml | 9 + .../templates/default-deviceconfig.yaml | 19 + .../controllers/mock_remediation_handler.go | 16 +- internal/controllers/remediation_handler.go | 19 +- tests/helm-e2e/helm_e2e_test.go | 2 + 14 files changed, 1048 insertions(+), 15 deletions(-) diff --git a/.wordlist.txt b/.wordlist.txt index d109f261e..784c6fb25 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -202,3 +202,917 @@ VMs webhook xgmi YAML +AAC +ABI +ALU +AMD +AMDGPU +AMDGPUs +AMDMIGraphX +AMI +AOCC +AOMP +APIC +APIs +ASIC +ASICs +ASan +ASm +ATI +AWQ +AdaLoRA +AddressSanitizer +AlexNet +Arb +AutoAWQ +AutoGPTQ +BLAS +BMC +BitCode +Blit +Bluefield +CCD +CDNA +CIFAR +CLI +CLion +CMake +CMakeLists +CMakePackage +CP +CPC +CPF +CPP +CPU +CPUs +CSC +CSE +CSV +CSn +CTests +CU +CUDA +CUs +CXX +Cavium +CentOS +ChatGPT +CoRR +Codespaces +Commitizen +CommonMark +Concretized +Conda +ConnectX +DDP +DGEMM +DKMS +DL +DLM +DMA +DNN +DNNL +DPM +DRI +DW +DWORD +Dask +DataFrame +DataLoader +DataParallel +DeepSpeed +Dependabot +DevCap +Diffusers +Dockerfile +Dockerfiles +Doxygen +ELMo +ENDPGM +EPEL +EPYC +ESXi +EU +ExLlama +FFT +FFTs +FFmpeg +FHS +FMA +FP +FSDP +Filesystem +Flang +Fortran +Fuyu +GALB +GCD +GCDs +GCN +GDB +GDDR +GDR +GDS +GEMM +GEMMs +GFortran +GIM +GL +GLXT +GMI +GPG +GPR +GPT +GPTQ +GPU +GPU's +GPUs +GQA +GRBM +GenAI +GenZ +GitHub +Gitpod +HBM +HCA +HIPCC +HIPExtension +HIPIFY +HPC +HPCG +HPE +HPL +HSA +HWE +Haswell +Higgs +Hyperparameters +ICV +IDE +IDEs +IMDb +IOMMU +IOP +IOPM +IOV +IRQ +ISA +ISV +ISVs +ImageNet +InfiniBand +Inlines +IntelliSense +Intersphinx +Intra +Ioffe +JAX +JIT +JSON +Jupyter +KFD +KVM +Keras +Khronos +Kubernetes +LAPACK +LCLK +LDS +LLM +LLMs +LLVM +LM +LSAN +LSTM +LTS +LinearReLU +LoRA +MEM +MERCHANTABILITY +MFMA +MHA +MIGraphX +MIOpen +MIOpenGEMM +MIVisionX +MLIR +MLM +MLP +MMA +MMIO +MMIOH +MNIST +MPI +MQA +MSVC +MVAPICH +MVFFR +Makefile +Makefiles +Matplotlib +Megatron +Mellanox +Mellanox's +Meta's +MirroredStrategy +MoE +Multicore +Multithreaded +MyEnvironment +MyST +NBIO +NBIOs +NHWC +NIC +NICs +NLI +NLP +NPS +NSP +NUMA +NVCC +NVIDIA +NVPTX +Nano +Navi +Noncoherently +NousResearch's +NumPy +OAM +OAMs +OCP +OEM +OFED +OMP +OMPI +OMPT +OMPX +ONNX +OSS +OSU +Omniperf +Omnitrace +OpenAI +OpenCL +OpenCV +OpenFabrics +OpenGL +OpenMP +OpenSSL +OpenVX +PCI +PCIe +PEFT +PIL +PILImage +PPO +PRNG +PRs +PaLM +Pageable +PeerDirect +Perfetto +PipelineParallel +PnP +PowerShell +PyPi +PyTorch +QLoRA +Qcycles +RAII +RCCL +RDC +RDMA +RDNA +RHEL +RNN +ROC +ROCProfiler +ROCTracer +ROCclr +ROCdbgapi +ROCgdb +ROCk +ROCm +ROCmCC +ROCmSoftwarePlatform +ROCmValidationSuite +ROCr +RPC +RST +RW +Radeon +ReLU +RelWithDebInfo +Req +Rickle +RoCE +Roofline +Ryzen +SALU +SBIOS +SCA +SDK +SDMA +SDRAM +SENDMSG +SFT +SGPR +SGPRs +SHA +SIGQUIT +SIMD +SIMDs +SKU +SKUs +SLES +SMEM +SMI +SMT +SPI +SQs +SRAM +SRAMECC +SVD +SWE +SciPy +SerDes +Shlens +Skylake +SmoothQuant +Softmax +Spack +StarCoder +Supermicro +Szegedy +TCA +TCC +TCI +TCIU +TCP +TCR +TFLOPS +TGI +TPOT +TPU +TPUs +TRL +TTFT +TTGIR +TTIR +Templated +TensorBoard +TensorFlow +TensorParallel +ToC +TorchAudio +TorchInductor +TorchMIGraphX +TorchScript +TorchServe +TorchVision +TransferBench +TrapStatus +Tunable +TunableOp +UAC +UC +UCC +UCX +UIF +URI +USM +UTCL +UTIL +Uncached +Unhandled +VALU +VBIOS +VGPR +VGPRs +VGPU +VM +VMEM +VMWare +VRAM +VSIX +VSkipped +Vanhoucke +Vulkan +WGP +WX +WikiText +Wojna +Workgroups +Writebacks +XDL +XGBoost +XGBoost's +XGMI +XLA +XT +XTX +Xeon +Xilinx +Xnack +Xteam +YAML +YML +YModel +ZeRO +ZenDNN +accuracies +activations +addr +alloc +allocator +allocators +amdgpu +api +atmi +atomics +autogenerated +autoregression +autoregressive +avx +awk +backend +backends +backpropagation +backtick +benchmarking +bilinear +bitsandbytes +blit +boson +bosons +buildable +bursty +bzip +cacheable +cd +centos +centric +changelog +chiplet +ckProfiler +cmake +cmd +coalescable +codebase +codebases +codename +collater +comgr +completers +composability +composable +concretization +config +conformant +convolutional +convolves +cpp +csn +cuBLAS +cuFFT +cuLIB +cuRAND +cuSOLVER +cuSPARSE +customizations +dataset +dataset's +datasets +dataspace +datatype +datatypes +dbgapi +de +deallocation +denoise +denoised +denoises +denormalize +deserializers +detections +dev +devicelibs +devsel +dimensionality +disambiguates +distro +doxysphinx +dropdown +el +embeddings +enablement +endpgm +env +epilog +etcetera +ethernet +exascale +executables +ffmpeg +filesystem +fortran +galb +gcc +gdb +gfortran +gfx +githooks +github +gnupg +grayscale +gzip +heterogenous +hipBLAS +hipBLASLt +hipCUB +hipFFT +hipLIB +hipRAND +hipSOLVER +hipSPARSE +hipSPARSELt +hipTensor +hipamd +hipblas +hipcub +hipfft +hipfort +hipify +hipsolver +hipsparse +hpp +hsa +hsakmt +html +hyperparameter +ib_core +inband +incrementing +inferencing +inflight +init +initializer +inlining +installable +instantiation +interprocedural +intersphinx +intra +invariants +invocating +invoker +ipo +kdb +libfabric +libjpeg +libs +linearized +linter +linux +llvm +localscratch +logits +lossy +macOS +matchers +microarchitecture +migraphx +miopen +miopengemm +mivisionx +mkdir +mlirmiopen +mtypes +mvffr +myst +namespace +namespaces +natively +numref +ocl +opencl +opencv +openmp +openssl +optimizers +os +pageable +parallelization +parallelize +parameterization +passthrough +perfcounter +performant +perl +pragma +pre +prebuilt +precisions +precompiled +prefetch +prefetchable +preprocess +preprocessed +preprocessing +prequantized +prerequisites +profiler +protobuf +pseudorandom +py +quantized +quantizing +quasirandom +queueing +rccl +rdc +reStructuredText +reformats +repos +representativeness +req +resampling +rescaling +reusability +roadmap +roc +rocAL +rocALUTION +rocBLAS +rocFFT +rocLIB +rocMLIR +rocPRIM +rocRAND +rocSOLVER +rocSPARSE +rocThrust +rocWMMA +rocalution +rocblas +rocclr +rocfft +rocm +rocminfo +rocprim +rocprof +rocprofiler +rocr +rocrand +rocsolver +rocsparse +rocthrust +roctracer +runtime +runtimes +sL +scalability +scalable +sendmsg +serializers +shader +sharded +sharding +sigmoid +sm +smi +softmax +spack +src +stochastically +strided +struct +subdirectories +subdirectory +subexpression +subfolder +subfolders +suboptimal +supercomputing +templated +th +tokenization +tokenize +tokenized +tokenizer +tokenizes +toolchain +toolchains +toolset +toolsets +torchtune +torchvision +tqdm +tracebacks +tunable +tunings +txt +uarch +unallocated +uncached +uncorrectable +uninstallation +unsqueeze +unstacking +unswitching +untrusted +untuned +upstreamed +upvote +utils +vL +vLLM +variational +vdi +vectorizable +vectorization +vectorize +vectorized +vectorizer +vectorizes +vjxb +walkthrough +walkthroughs +wavefront +wavefronts +whitespaces +workgroup +workgroups +writeback +writebacks +wrreq +wzo +xFormers +xargs +xz +yaml +ysvmadyb +zyppe +CommonConfig +Kube +OnDelete +OpenShift's +RollingUpdate +Techsupport +UpgradeStrategy +UtilsContainer +AutoStartWorkflow +allocatable +allowPrivilegeEscalation +amdgpuhealth +apiserver +apiVersion +args +attachMetadata +autobuild +aws +bd +bearerTokenFile +bool +caFile +certFile +checkmark +clientCAConfigMap +clientName +clusterIP +configmap +configs +containerSecurityContext +controllerConfigYaml +controllerManager +controllerMetricsService +cpu +cpx +crds +cron +cryptographic +devel +devicePlugin +devicePluginImage +devicePluginSpec +deviceconfig +deviceconfigs +disableHttps +discoverable +dkms +dmesg +emptyDir +enableNodeLabeller +gapped +generationID +gpu +gpu's +gpus +grafana +grpc +honorLabels +honorTimestamps +hsio +imagePullPolicy +imagePullSecrets +initramfs +insecureSkipVerify +installdefaultNFDRule +io +isigned +kfd +keyFile +keySecret +kmm +kube +kubernetesClusterDomain +kubelet +kubernetes +labeller's +mTLS +managerConfig +maxParallel +metricsclient +metricsExporter +misconfigurations +mkdocs +modprobe +mortem +namesapace +namespaced +nano +nmc +nodeAffinity +nodeCondition +nodeLabellerImage +nodePort +nodeSelector +nodeSelectorTerms +nodelabeller +notifyRemediationMessage +notifyTestFailureMessage +numa +observability +onwards +openshift +osImage +oyaml +pci +physicalActionNeeded +podman +programmatically +pytest +ras +rbac +readded +rebootRequired +recoveryPolicy +relatedImageBuild +relatedImageBuildPullSecret +relatedImageSign +relatedImageSignPullSecret +relatedImageWorker +relatedImageWorkerPullSecret +repo +retorquing +rocHPL +rollout +searchability +serverName +serviceAccount +serviceAccountNamespaceSelector +serviceAccountSelector +serviceType +signimage +simd +skipRebootStep +slurm +spx +staticAuthorization +svc +symlinks +sysmon +targetPort +techsupport +testRunner +tlsConfig +un +uncordoned +unpartitioned +upgradeCRD +upgradePolicy +upgradeStrategy +url +validationTestsProfile +vf +vfio +virtfn +virtualized +vram +webhook's +webhookServer +webhookService +workflowTemplate +xGMI +yamls diff --git a/api/v1alpha1/deviceconfig_types.go b/api/v1alpha1/deviceconfig_types.go index 5b8b969c0..3ac33acf2 100644 --- a/api/v1alpha1/deviceconfig_types.go +++ b/api/v1alpha1/deviceconfig_types.go @@ -125,6 +125,15 @@ type RemediationWorkflowSpec struct { //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="NodeDrainPolicy",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:nodeDrainPolicy"} // +optional NodeDrainPolicy *DrainSpec `json:"nodeDrainPolicy,omitempty"` + + // AutoStartWorkflow specifies the behavior of the remediation workflow. Default value is true. + // If true, remediation workflow will be automatically started when the node condition matches. + // If false, remediation workflow will be in suspended state when the node condition matches and needs to be manually started by the user. + // This field gives users more control and flexibility on when to start the remediation workflow. + // Default value is set to true if not specified and the remediation workflow automatically starts when the node condition matches. + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="AutoStartWorkflow",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:autoStartWorkflow"} + // +kubebuilder:default:=true + AutoStartWorkflow *bool `json:"autoStartWorkflow,omitempty"` } type RegistryTLS struct { diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 6b09de073..c4738ddf9 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -767,6 +767,11 @@ func (in *RemediationWorkflowSpec) DeepCopyInto(out *RemediationWorkflowSpec) { *out = new(DrainSpec) (*in).DeepCopyInto(*out) } + if in.AutoStartWorkflow != nil { + in, out := &in.AutoStartWorkflow, &out.AutoStartWorkflow + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemediationWorkflowSpec. diff --git a/bundle/manifests/amd-gpu-operator.clusterserviceversion.yaml b/bundle/manifests/amd-gpu-operator.clusterserviceversion.yaml index 1cf7d7684..0440f943f 100644 --- a/bundle/manifests/amd-gpu-operator.clusterserviceversion.yaml +++ b/bundle/manifests/amd-gpu-operator.clusterserviceversion.yaml @@ -707,6 +707,18 @@ spec: path: remediationWorkflow x-descriptors: - urn:alm:descriptor:com.amd.deviceconfigs:remediationWorkflow + - description: AutoStartWorkflow specifies the behavior of the remediation workflow. + Default value is true. If true, remediation workflow will be automatically + started when the node condition matches. If false, remediation workflow + will be in suspended state when the node condition matches and needs to + be manually started by the user. This field gives users more control and + flexibility on when to start the remediation workflow. Default value is + set to true if not specified and the remediation workflow automatically + starts when the node condition matches. + displayName: AutoStartWorkflow + path: remediationWorkflow.autoStartWorkflow + x-descriptors: + - urn:alm:descriptor:com.amd.deviceconfigs:autoStartWorkflow - description: Name of the ConfigMap that holds condition-to-workflow mappings. displayName: Config path: remediationWorkflow.config diff --git a/bundle/manifests/amd.com_deviceconfigs.yaml b/bundle/manifests/amd.com_deviceconfigs.yaml index a60d2d1b4..a37cd52bf 100644 --- a/bundle/manifests/amd.com_deviceconfigs.yaml +++ b/bundle/manifests/amd.com_deviceconfigs.yaml @@ -1386,6 +1386,15 @@ spec: remediationWorkflow: description: remediation workflow properties: + autoStartWorkflow: + default: true + description: |- + AutoStartWorkflow specifies the behavior of the remediation workflow. Default value is true. + If true, remediation workflow will be automatically started when the node condition matches. + If false, remediation workflow will be in suspended state when the node condition matches and needs to be manually started by the user. + This field gives users more control and flexibility on when to start the remediation workflow. + Default value is set to true if not specified and the remediation workflow automatically starts when the node condition matches. + type: boolean config: description: Name of the ConfigMap that holds condition-to-workflow mappings. diff --git a/config/crd/bases/amd.com_deviceconfigs.yaml b/config/crd/bases/amd.com_deviceconfigs.yaml index b09bcdc8d..5ed414faf 100644 --- a/config/crd/bases/amd.com_deviceconfigs.yaml +++ b/config/crd/bases/amd.com_deviceconfigs.yaml @@ -1382,6 +1382,15 @@ spec: remediationWorkflow: description: remediation workflow properties: + autoStartWorkflow: + default: true + description: |- + AutoStartWorkflow specifies the behavior of the remediation workflow. Default value is true. + If true, remediation workflow will be automatically started when the node condition matches. + If false, remediation workflow will be in suspended state when the node condition matches and needs to be manually started by the user. + This field gives users more control and flexibility on when to start the remediation workflow. + Default value is set to true if not specified and the remediation workflow automatically starts when the node condition matches. + type: boolean config: description: Name of the ConfigMap that holds condition-to-workflow mappings. diff --git a/config/manifests/bases/amd-gpu-operator.clusterserviceversion.yaml b/config/manifests/bases/amd-gpu-operator.clusterserviceversion.yaml index feeb09807..d956bd91b 100644 --- a/config/manifests/bases/amd-gpu-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/amd-gpu-operator.clusterserviceversion.yaml @@ -678,6 +678,18 @@ spec: path: remediationWorkflow x-descriptors: - urn:alm:descriptor:com.amd.deviceconfigs:remediationWorkflow + - description: AutoStartWorkflow specifies the behavior of the remediation workflow. + Default value is true. If true, remediation workflow will be automatically + started when the node condition matches. If false, remediation workflow + will be in suspended state when the node condition matches and needs to + be manually started by the user. This field gives users more control and + flexibility on when to start the remediation workflow. Default value is + set to true if not specified and the remediation workflow automatically + starts when the node condition matches. + displayName: AutoStartWorkflow + path: remediationWorkflow.autoStartWorkflow + x-descriptors: + - urn:alm:descriptor:com.amd.deviceconfigs:autoStartWorkflow - description: Name of the ConfigMap that holds condition-to-workflow mappings. displayName: Config path: remediationWorkflow.config diff --git a/docs/autoremediation/auto-remediation.md b/docs/autoremediation/auto-remediation.md index 2afb3749b..a4bdc6069 100644 --- a/docs/autoremediation/auto-remediation.md +++ b/docs/autoremediation/auto-remediation.md @@ -148,6 +148,13 @@ remediationWorkflow: ignoreNamespaces: - kube-system - cert-manager + + # AutoStartWorkflow specifies the behavior of the remediation workflow. Default value is true. + # If true, remediation workflow will be automatically started when the node condition matches. + # If false, remediation workflow will be in suspended state when the node condition matches and needs to be manually started by the user. + # This field gives users more control and flexibility on when to start the remediation workflow. + # Default value is set to true if not specified and the remediation workflow automatically starts when the node condition matches. + autoStartWorkflow: true ``` **Enable** - Controls whether automatic node remediation is enabled. Set this field to `true` to activate the auto-remediation feature in the cluster. @@ -172,6 +179,8 @@ When the number of triggered workflows exceeds this limit, additional workflows **NodeDrainPolicy** - Configures the pod eviction behavior when draining workloads from nodes during the remediation process. This policy controls how pods are removed, including timeout settings, grace periods, and namespace exclusions. See the [Node Drain Policy Configuration](#node-drain-policy-configuration) section below for detailed field descriptions. +**AutoStartWorkflow** - Specifies the behavior of the remediation workflow. Default value is `true`. If `true`, the remediation workflow is automatically started when the node condition matches. If `false`, the remediation workflow remains in a suspended state when the node condition matches and must be manually started by the user. To resume the workflow at a later point, refer to the [resume workflow section](#resuming-a-paused-workflow) + **Spec.CommonConfig.UtilsContainer** - Remediation workflow uses a utility image for executing the steps. Specify the utility image in `Spec.CommonConfig.UtilsContainer` section of Device Config. If the UtilsContainer section is not specified, default image used is `docker.io/rocm/gpu-operator-utils:latest` #### Node Drain Policy Configuration diff --git a/hack/k8s-patch/template-patch/default-deviceconfig.yaml b/hack/k8s-patch/template-patch/default-deviceconfig.yaml index f76e21a29..cb7df5ee3 100644 --- a/hack/k8s-patch/template-patch/default-deviceconfig.yaml +++ b/hack/k8s-patch/template-patch/default-deviceconfig.yaml @@ -441,6 +441,25 @@ spec: {{- with .maxParallelWorkflows }} maxParallelWorkflows: {{ . }} {{- end }} + + {{- with .nodeRemediationLabels }} + nodeRemediationLabels: + {{- toYaml . | nindent 6 }} + {{- end }} + + {{- with .nodeRemediationTaints }} + nodeRemediationTaints: + {{- toYaml . | nindent 6 }} + {{- end }} + + {{- with .nodeDrainPolicy }} + nodeDrainPolicy: + {{- toYaml . | nindent 6 }} + {{- end }} + + {{- with .autoStartWorkflow }} + autoStartWorkflow: {{ . }} + {{- end }} {{- end }} {{- end }} diff --git a/helm-charts-k8s/crds/deviceconfig-crd.yaml b/helm-charts-k8s/crds/deviceconfig-crd.yaml index b5ebffb75..9afbb90b0 100644 --- a/helm-charts-k8s/crds/deviceconfig-crd.yaml +++ b/helm-charts-k8s/crds/deviceconfig-crd.yaml @@ -1388,6 +1388,15 @@ spec: remediationWorkflow: description: remediation workflow properties: + autoStartWorkflow: + default: true + description: |- + AutoStartWorkflow specifies the behavior of the remediation workflow. Default value is true. + If true, remediation workflow will be automatically started when the node condition matches. + If false, remediation workflow will be in suspended state when the node condition matches and needs to be manually started by the user. + This field gives users more control and flexibility on when to start the remediation workflow. + Default value is set to true if not specified and the remediation workflow automatically starts when the node condition matches. + type: boolean config: description: Name of the ConfigMap that holds condition-to-workflow mappings. diff --git a/helm-charts-k8s/templates/default-deviceconfig.yaml b/helm-charts-k8s/templates/default-deviceconfig.yaml index f76e21a29..cb7df5ee3 100644 --- a/helm-charts-k8s/templates/default-deviceconfig.yaml +++ b/helm-charts-k8s/templates/default-deviceconfig.yaml @@ -441,6 +441,25 @@ spec: {{- with .maxParallelWorkflows }} maxParallelWorkflows: {{ . }} {{- end }} + + {{- with .nodeRemediationLabels }} + nodeRemediationLabels: + {{- toYaml . | nindent 6 }} + {{- end }} + + {{- with .nodeRemediationTaints }} + nodeRemediationTaints: + {{- toYaml . | nindent 6 }} + {{- end }} + + {{- with .nodeDrainPolicy }} + nodeDrainPolicy: + {{- toYaml . | nindent 6 }} + {{- end }} + + {{- with .autoStartWorkflow }} + autoStartWorkflow: {{ . }} + {{- end }} {{- end }} {{- end }} diff --git a/internal/controllers/mock_remediation_handler.go b/internal/controllers/mock_remediation_handler.go index 42fe85334..de0fca3b5 100644 --- a/internal/controllers/mock_remediation_handler.go +++ b/internal/controllers/mock_remediation_handler.go @@ -154,29 +154,29 @@ func (mr *MockremediationMgrHelperAPIMockRecorder) attemptAbortWorkflowOnNode(ct } // attemptResumeWorkflowOnNode mocks base method. -func (m *MockremediationMgrHelperAPI) attemptResumeWorkflowOnNode(ctx context.Context, node *v1.Node, mapping ConditionWorkflowMapping, wf *v1alpha10.Workflow) { +func (m *MockremediationMgrHelperAPI) attemptResumeWorkflowOnNode(ctx context.Context, node *v1.Node, mapping ConditionWorkflowMapping, wf *v1alpha10.Workflow, stageName string) { m.ctrl.T.Helper() - m.ctrl.Call(m, "attemptResumeWorkflowOnNode", ctx, node, mapping, wf) + m.ctrl.Call(m, "attemptResumeWorkflowOnNode", ctx, node, mapping, wf, stageName) } // attemptResumeWorkflowOnNode indicates an expected call of attemptResumeWorkflowOnNode. -func (mr *MockremediationMgrHelperAPIMockRecorder) attemptResumeWorkflowOnNode(ctx, node, mapping, wf any) *gomock.Call { +func (mr *MockremediationMgrHelperAPIMockRecorder) attemptResumeWorkflowOnNode(ctx, node, mapping, wf, stageName any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "attemptResumeWorkflowOnNode", reflect.TypeOf((*MockremediationMgrHelperAPI)(nil).attemptResumeWorkflowOnNode), ctx, node, mapping, wf) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "attemptResumeWorkflowOnNode", reflect.TypeOf((*MockremediationMgrHelperAPI)(nil).attemptResumeWorkflowOnNode), ctx, node, mapping, wf, stageName) } // canResumeWorkflowOnNode mocks base method. -func (m *MockremediationMgrHelperAPI) canResumeWorkflowOnNode(ctx context.Context, node *v1.Node, mapping *ConditionWorkflowMapping) bool { +func (m *MockremediationMgrHelperAPI) canResumeWorkflowOnNode(ctx context.Context, node *v1.Node, mapping *ConditionWorkflowMapping, stageName string) bool { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "canResumeWorkflowOnNode", ctx, node, mapping) + ret := m.ctrl.Call(m, "canResumeWorkflowOnNode", ctx, node, mapping, stageName) ret0, _ := ret[0].(bool) return ret0 } // canResumeWorkflowOnNode indicates an expected call of canResumeWorkflowOnNode. -func (mr *MockremediationMgrHelperAPIMockRecorder) canResumeWorkflowOnNode(ctx, node, mapping any) *gomock.Call { +func (mr *MockremediationMgrHelperAPIMockRecorder) canResumeWorkflowOnNode(ctx, node, mapping, stageName any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "canResumeWorkflowOnNode", reflect.TypeOf((*MockremediationMgrHelperAPI)(nil).canResumeWorkflowOnNode), ctx, node, mapping) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "canResumeWorkflowOnNode", reflect.TypeOf((*MockremediationMgrHelperAPI)(nil).canResumeWorkflowOnNode), ctx, node, mapping, stageName) } // checkIfTaintExists mocks base method. diff --git a/internal/controllers/remediation_handler.go b/internal/controllers/remediation_handler.go index 69a4507b2..fc423c11d 100644 --- a/internal/controllers/remediation_handler.go +++ b/internal/controllers/remediation_handler.go @@ -301,7 +301,7 @@ type remediationMgrHelperAPI interface { getMaxAllowedRunsPerWindow(recoveryPolicy *RecoveryPolicyConfig) int getWindowSize(recoveryPolicy *RecoveryPolicyConfig) string isRecoveryPolicyViolated(ctx context.Context, nodeName string, mapping *ConditionWorkflowMapping) bool - canResumeWorkflowOnNode(ctx context.Context, node *v1.Node, mapping *ConditionWorkflowMapping) bool + canResumeWorkflowOnNode(ctx context.Context, node *v1.Node, mapping *ConditionWorkflowMapping, stageName string) bool syncInternalMapFromStatusCR(ctx context.Context, namespace string) error isNodeLabelledForForceResume(ctx context.Context, node *v1.Node) bool removeForceResumeWorkflowLabelFromNode(ctx context.Context, node *v1.Node) error @@ -309,7 +309,7 @@ type remediationMgrHelperAPI interface { removeAbortWorkflowLabelFromNode(ctx context.Context, node *v1.Node) error abortWorkflow(ctx context.Context, workflow *workflowv1alpha1.Workflow) error attemptAbortWorkflowOnNode(ctx context.Context, node *v1.Node, wf *workflowv1alpha1.Workflow) (bool, error) - attemptResumeWorkflowOnNode(ctx context.Context, node *v1.Node, mapping ConditionWorkflowMapping, wf *workflowv1alpha1.Workflow) + attemptResumeWorkflowOnNode(ctx context.Context, node *v1.Node, mapping ConditionWorkflowMapping, wf *workflowv1alpha1.Workflow, stageName string) handleSuspendedWorkflowsOnNode(ctx context.Context, devConfig *amdv1alpha1.DeviceConfig, node *v1.Node, mapping ConditionWorkflowMapping, wf *workflowv1alpha1.Workflow) bool getWorkflowTaskScriptSource(scriptFileName string) (string, error) updateMaxParallelWorkflows(ctx context.Context, devConfig *amdv1alpha1.DeviceConfig) error @@ -608,6 +608,7 @@ func (h *remediationMgrHelper) createDefaultWorkflowTemplate(ctx context.Context { Name: "inbuilt", Steps: []workflowv1alpha1.ParallelSteps{ + {Steps: []workflowv1alpha1.WorkflowStep{{Name: "autostart", Template: "suspend", When: "{{workflow.parameters.auto_start}} == 'false'"}}}, // If auto start is disabled, workflow will be created in suspended state and needs to be manually resumed by user {Steps: []workflowv1alpha1.WorkflowStep{{Name: "applylabels", Template: "applylabels"}}}, {Steps: []workflowv1alpha1.WorkflowStep{{Name: "taint", Template: "taint"}}}, {Steps: []workflowv1alpha1.WorkflowStep{{Name: "drain", Template: "drain"}}}, @@ -1075,6 +1076,10 @@ func (h *remediationMgrHelper) populateWorkflow(ctx context.Context, wfTemplate Name: "skipRebootStep", Value: workflowv1alpha1.AnyStringPtr(mapping.SkipRebootStep), }, + { + Name: "auto_start", + Value: workflowv1alpha1.AnyStringPtr(strconv.FormatBool(*devConfig.Spec.RemediationWorkflow.AutoStartWorkflow)), + }, }, } @@ -1203,7 +1208,7 @@ func (h *remediationMgrHelper) handleSuspendedWorkflowsOnNode(ctx context.Contex } // Check if the workflow can be resumed, and attempt resume - h.attemptResumeWorkflowOnNode(ctx, node, mapping, wf) + h.attemptResumeWorkflowOnNode(ctx, node, mapping, wf, wfStage.DisplayName) // irrespective of whether it was resumed or not, return false to avoid creating a new workflow return false } @@ -1229,10 +1234,10 @@ func (h *remediationMgrHelper) attemptAbortWorkflowOnNode(ctx context.Context, n return canAbort, nil } -func (h *remediationMgrHelper) attemptResumeWorkflowOnNode(ctx context.Context, node *v1.Node, mapping ConditionWorkflowMapping, wf *workflowv1alpha1.Workflow) { +func (h *remediationMgrHelper) attemptResumeWorkflowOnNode(ctx context.Context, node *v1.Node, mapping ConditionWorkflowMapping, wf *workflowv1alpha1.Workflow, stageName string) { logger := log.FromContext(ctx) // Check if the workflow can be resumed - canResume := h.canResumeWorkflowOnNode(ctx, node, &mapping) + canResume := h.canResumeWorkflowOnNode(ctx, node, &mapping, stageName) if canResume { logger.Info(fmt.Sprintf("Attempting to resume suspended workflow %q on node %q.", wf.Name, node.Name)) if err := h.resumeSuspendedWorkflow(ctx, wf.Name, wf.Namespace); err != nil { @@ -1529,7 +1534,7 @@ func (h *remediationMgrHelper) removeForceResumeWorkflowLabelFromNode(ctx contex return nil } -func (h *remediationMgrHelper) canResumeWorkflowOnNode(ctx context.Context, node *v1.Node, mapping *ConditionWorkflowMapping) bool { +func (h *remediationMgrHelper) canResumeWorkflowOnNode(ctx context.Context, node *v1.Node, mapping *ConditionWorkflowMapping, stageName string) bool { logger := log.FromContext(ctx) // Check if the recovery policy is violated, if so, do not allow resumption @@ -1540,7 +1545,7 @@ func (h *remediationMgrHelper) canResumeWorkflowOnNode(ctx context.Context, node } // if no physical action is needed, allow resumption of workflow - if !mapping.PhysicalActionNeeded { + if !mapping.PhysicalActionNeeded && stageName != "autostart" { return true } diff --git a/tests/helm-e2e/helm_e2e_test.go b/tests/helm-e2e/helm_e2e_test.go index a8c0be426..aee6d12e4 100644 --- a/tests/helm-e2e/helm_e2e_test.go +++ b/tests/helm-e2e/helm_e2e_test.go @@ -973,6 +973,7 @@ deviceConfig: name: "conditional-workflows-configmap" ttlForFailedWorkflows: 36h testerImage: "test.io/test/remediation-workflow-tester:v1.3.0" + autoStartWorkflow: true `, extraArgs: []string{"-f", tmpValuesYamlPath, "--set", "crds.defaultCR.upgrade=true"}, helmFunc: s.upgradeHelmChart, @@ -986,6 +987,7 @@ deviceConfig: }, TtlForFailedWorkflows: "36h", TesterImage: "test.io/test/remediation-workflow-tester:v1.3.0", + AutoStartWorkflow: &boolTrue, }, }, verifyFunc: s.verifyRemediationWorkflow, From 3d22c445c69f81c700e00bf6b7eac2bda16b27ec Mon Sep 17 00:00:00 2001 From: Uday Bhaskar Date: Sat, 21 Feb 2026 00:08:06 +0530 Subject: [PATCH 3/3] handle argo crds in helm charts (#1120) * handle argo crds with helm * argo workflow controller to handle operator workflows alone (cherry picked from commit 7cff47c6e3d24e471cd0ed8134668b14f59b577e) --- Dockerfile | 8 + Makefile | 25 +- .../argoproj.io_clusterworkflowtemplates.yaml | 36 + .../crds/argoproj.io_cronworkflows.yaml | 40 + .../argoproj.io_workflowartifactgctasks.yaml | 1137 ++++++ .../argoproj.io_workfloweventbindings.yaml | 681 ++++ .../crds/argoproj.io_workflows.yaml | 55 + .../crds/argoproj.io_workflowtaskresults.yaml | 662 ++++ .../crds/argoproj.io_workflowtasksets.yaml | 41 + .../crds/argoproj.io_workflowtemplates.yaml | 35 + .../k8s-remediation-patch/kustomization.yaml | 18 + .../metadata-patch/Chart.yaml | 4 +- .../metadata-patch/values.yaml | 11 - .../template-patch/deployment.yaml | 3034 ----------------- hack/k8s-patch/metadata-patch/Chart.yaml | 6 +- hack/k8s-patch/metadata-patch/values.yaml | 5 + .../template-patch/post-delete-hook.yaml | 28 + .../template-patch/pre-upgrade-hook.yaml | 12 + .../remediation-deployment.yaml | 344 ++ helm-charts-k8s/Chart.lock | 8 +- helm-charts-k8s/Chart.yaml | 6 +- helm-charts-k8s/README.md | 9 +- .../charts/remediation-crds/.helmignore | 23 + .../charts/remediation-crds/Chart.yaml | 5 + .../crds/clusterworkflowtemplate-crd.yaml | 52 + .../crds/cronworkflow-crd.yaml | 56 + .../remediation-crds/crds/workflow-crd.yaml | 71 + .../crds/workflowartifactgctask-crd.yaml | 1153 +++++++ .../crds/workfloweventbinding-crd.yaml | 697 ++++ .../crds/workflowtaskresult-crd.yaml | 678 ++++ .../crds/workflowtaskset-crd.yaml | 57 + .../crds/workflowtemplate-crd.yaml | 51 + .../remediation-crds/templates/_helpers.tpl | 62 + .../charts/remediation-crds/values.yaml | 0 helm-charts-k8s/charts/remediation/Chart.yaml | 5 - .../remediation/templates/deployment.yaml | 3034 ----------------- .../charts/remediation/values.yaml | 11 - .../templates/post-delete-hook.yaml | 28 + .../templates/pre-upgrade-hook.yaml | 12 + .../templates/remediation-deployment.yaml | 344 ++ helm-charts-k8s/values.yaml | 5 + .../controllers/mock_remediation_handler.go | 81 + internal/controllers/remediation_handler.go | 230 +- 43 files changed, 6723 insertions(+), 6137 deletions(-) create mode 100644 hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_clusterworkflowtemplates.yaml create mode 100644 hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_cronworkflows.yaml create mode 100644 hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowartifactgctasks.yaml create mode 100644 hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workfloweventbindings.yaml create mode 100644 hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflows.yaml create mode 100644 hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowtaskresults.yaml create mode 100644 hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowtasksets.yaml create mode 100644 hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowtemplates.yaml create mode 100644 hack/k8s-patch/k8s-remediation-patch/kustomization.yaml delete mode 100644 hack/k8s-patch/k8s-remediation-patch/template-patch/deployment.yaml create mode 100644 hack/k8s-patch/template-patch/remediation-deployment.yaml create mode 100644 helm-charts-k8s/charts/remediation-crds/.helmignore create mode 100644 helm-charts-k8s/charts/remediation-crds/Chart.yaml create mode 100644 helm-charts-k8s/charts/remediation-crds/crds/clusterworkflowtemplate-crd.yaml create mode 100644 helm-charts-k8s/charts/remediation-crds/crds/cronworkflow-crd.yaml create mode 100644 helm-charts-k8s/charts/remediation-crds/crds/workflow-crd.yaml create mode 100644 helm-charts-k8s/charts/remediation-crds/crds/workflowartifactgctask-crd.yaml create mode 100644 helm-charts-k8s/charts/remediation-crds/crds/workfloweventbinding-crd.yaml create mode 100644 helm-charts-k8s/charts/remediation-crds/crds/workflowtaskresult-crd.yaml create mode 100644 helm-charts-k8s/charts/remediation-crds/crds/workflowtaskset-crd.yaml create mode 100644 helm-charts-k8s/charts/remediation-crds/crds/workflowtemplate-crd.yaml create mode 100644 helm-charts-k8s/charts/remediation-crds/templates/_helpers.tpl create mode 100644 helm-charts-k8s/charts/remediation-crds/values.yaml delete mode 100644 helm-charts-k8s/charts/remediation/Chart.yaml delete mode 100644 helm-charts-k8s/charts/remediation/templates/deployment.yaml delete mode 100644 helm-charts-k8s/charts/remediation/values.yaml create mode 100644 helm-charts-k8s/templates/remediation-deployment.yaml diff --git a/Dockerfile b/Dockerfile index fce87d424..69c101b21 100644 --- a/Dockerfile +++ b/Dockerfile @@ -54,6 +54,14 @@ COPY --from=builder /opt/app-root/src/helm-charts-k8s/crds/deviceconfig-crd.yaml /opt/app-root/src/helm-charts-k8s/charts/node-feature-discovery/crds/nfd-api-crds.yaml \ /opt/app-root/src/helm-charts-k8s/charts/kmm/crds/module-crd.yaml \ /opt/app-root/src/helm-charts-k8s/charts/kmm/crds/nodemodulesconfig-crd.yaml \ + /opt/app-root/src/helm-charts-k8s/charts/remediation-crds/crds/clusterworkflowtemplate-crd.yaml \ + /opt/app-root/src/helm-charts-k8s/charts/remediation-crds/crds/cronworkflow-crd.yaml \ + /opt/app-root/src/helm-charts-k8s/charts/remediation-crds/crds/workflowartifactgctask-crd.yaml \ + /opt/app-root/src/helm-charts-k8s/charts/remediation-crds/crds/workflow-crd.yaml \ + /opt/app-root/src/helm-charts-k8s/charts/remediation-crds/crds/workfloweventbinding-crd.yaml \ + /opt/app-root/src/helm-charts-k8s/charts/remediation-crds/crds/workflowtaskresult-crd.yaml \ + /opt/app-root/src/helm-charts-k8s/charts/remediation-crds/crds/workflowtaskset-crd.yaml \ + /opt/app-root/src/helm-charts-k8s/charts/remediation-crds/crds/workflowtemplate-crd.yaml \ /opt/helm-charts-crds-k8s/ RUN mkdir -p /remediation diff --git a/Makefile b/Makefile index 868bc22f3..8b5a6f089 100644 --- a/Makefile +++ b/Makefile @@ -48,6 +48,7 @@ YAML_FILES=bundle/manifests/amd-gpu-operator-node-metrics_rbac.authorization.k8s CRD_YAML_FILES = deviceconfig-crd.yaml remediationworkflowstatus-crd.yaml K8S_KMM_CRD_YAML_FILES=module-crd.yaml nodemodulesconfig-crd.yaml DEFAULT_VALUES_FILES=helm-charts-k8s/values.yaml hack/k8s-patch/metadata-patch/values.yaml +REMEDIATION_CRD_YAML_FILES=clusterworkflowtemplate-crd.yaml cronworkflow-crd.yaml workflowartifactgctask-crd.yaml workflow-crd.yaml workfloweventbinding-crd.yaml workflowtaskresult-crd.yaml workflowtaskset-crd.yaml workflowtemplate-crd.yaml GPU_OPERATOR_CHART ?= $(shell pwd)/helm-charts-k8s/gpu-operator-helm-k8s-$(PROJECT_VERSION).tgz KUBECTL_CMD ?= kubectl @@ -68,8 +69,12 @@ ifdef SKIP_INSTALL_DEFAULT_CR SKIP_INSTALL_DEFAULT_CR_CMD=--set crds.defaultCR.install=false endif -ifdef SKIP_REMEDIATION_CONTROLLER - SKIP_REMEDIATION_CONTROLLER_CMD=--set remediation.enabled=false +ifdef SKIP_REMEDIATION + SKIP_REMEDIATION_CMD=--set remediation.enabled=false +endif + +ifdef SKIP_REMEDIATION_CRDS + SKIP_REMEDIATION_CRDS_CMD=--set remediation.installCRDs=false endif ################################# @@ -332,7 +337,7 @@ helm: ## Build helm charts for Kubernetes. $(MAKE) helm-k8s .PHONY: helm-k8s -helm-k8s: helmify manifests kustomize clean-helm gen-kmm-charts +helm-k8s: helmify manifests kustomize clean-helm gen-kmm-charts gen-remediation-charts $(KUSTOMIZE) build config/default | $(HELMIFY) helm-charts-k8s # Patching k8s helm chart metadata cp $(shell pwd)/hack/k8s-patch/metadata-patch/*.yaml $(shell pwd)/helm-charts-k8s/ @@ -345,9 +350,7 @@ helm-k8s: helmify manifests kustomize clean-helm gen-kmm-charts # Patching k8s helm chart kmm subchart cp $(shell pwd)/hack/k8s-patch/k8s-kmm-patch/metadata-patch/*.yaml $(shell pwd)/helm-charts-k8s/charts/kmm/ cp $(shell pwd)/hack/k8s-patch/k8s-kmm-patch/template-patch/*.yaml $(shell pwd)/helm-charts-k8s/charts/kmm/templates/ - mkdir -p $(shell pwd)/helm-charts-k8s/charts/remediation/templates - cp $(shell pwd)/hack/k8s-patch/k8s-remediation-patch/metadata-patch/*.yaml $(shell pwd)/helm-charts-k8s/charts/remediation/ - cp $(shell pwd)/hack/k8s-patch/k8s-remediation-patch/template-patch/*.yaml $(shell pwd)/helm-charts-k8s/charts/remediation/templates/ + cp $(shell pwd)/hack/k8s-patch/k8s-remediation-patch/metadata-patch/*.yaml $(shell pwd)/helm-charts-k8s/charts/remediation-crds/ cd $(shell pwd)/helm-charts-k8s; helm dependency update; helm lint .; cd ..; mkdir $(shell pwd)/helm-charts-k8s/crds echo "moving crd yaml files to crds folder" @@ -585,7 +588,15 @@ endif rm helm-charts-k8s/charts/kmm/templates/$$file; \ done -cert-manager-install: ## Deploy cert-manager. +gen-remediation-charts: + $(KUSTOMIZE) build $(shell pwd)/hack/k8s-patch/k8s-remediation-patch | $(HELMIFY) helm-charts-k8s/charts/remediation-crds + mkdir -p helm-charts-k8s/charts/remediation-crds/crds + @for file in $(REMEDIATION_CRD_YAML_FILES); do \ + helm template amd-gpu helm-charts-k8s/charts/remediation-crds -s templates/$$file > helm-charts-k8s/charts/remediation-crds/crds/$$file; \ + rm helm-charts-k8s/charts/remediation-crds/templates/$$file; \ + done + +cert-manager-install: helm repo add jetstack https://charts.jetstack.io --force-update helm install cert-manager jetstack/cert-manager --namespace cert-manager --create-namespace --version v1.15.1 --set crds.enabled=true diff --git a/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_clusterworkflowtemplates.yaml b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_clusterworkflowtemplates.yaml new file mode 100644 index 000000000..125fc2fa6 --- /dev/null +++ b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_clusterworkflowtemplates.yaml @@ -0,0 +1,36 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clusterworkflowtemplates.argoproj.io +spec: + group: argoproj.io + names: + kind: ClusterWorkflowTemplate + listKind: ClusterWorkflowTemplateList + plural: clusterworkflowtemplates + shortNames: + - clusterwftmpl + - cwft + singular: clusterworkflowtemplate + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + required: + - metadata + - spec + type: object + served: true + storage: true \ No newline at end of file diff --git a/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_cronworkflows.yaml b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_cronworkflows.yaml new file mode 100644 index 000000000..3821e88a0 --- /dev/null +++ b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_cronworkflows.yaml @@ -0,0 +1,40 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: cronworkflows.argoproj.io +spec: + group: argoproj.io + names: + kind: CronWorkflow + listKind: CronWorkflowList + plural: cronworkflows + shortNames: + - cwf + - cronwf + singular: cronworkflow + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + status: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + required: + - metadata + - spec + type: object + served: true + storage: true \ No newline at end of file diff --git a/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowartifactgctasks.yaml b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowartifactgctasks.yaml new file mode 100644 index 000000000..21c4821e7 --- /dev/null +++ b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowartifactgctasks.yaml @@ -0,0 +1,1137 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: workflowartifactgctasks.argoproj.io +spec: + group: argoproj.io + names: + kind: WorkflowArtifactGCTask + listKind: WorkflowArtifactGCTaskList + plural: workflowartifactgctasks + shortNames: + - wfat + singular: workflowartifactgctask + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + properties: + artifactsByNode: + additionalProperties: + properties: + archiveLocation: + properties: + archiveLogs: + type: boolean + artifactory: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + url: + type: string + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - url + type: object + azure: + properties: + accountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + blob: + type: string + container: + type: string + endpoint: + type: string + useSDKCreds: + type: boolean + required: + - blob + - container + - endpoint + type: object + gcs: + properties: + bucket: + type: string + key: + type: string + serviceAccountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - key + type: object + git: + properties: + branch: + type: string + depth: + format: int64 + type: integer + disableSubmodules: + type: boolean + fetch: + items: + type: string + type: array + insecureIgnoreHostKey: + type: boolean + insecureSkipTLS: + type: boolean + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + repo: + type: string + revision: + type: string + singleBranch: + type: boolean + sshPrivateKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - repo + type: object + hdfs: + properties: + addresses: + items: + type: string + type: array + dataTransferProtection: + type: string + force: + type: boolean + hdfsUser: + type: string + krbCCacheSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbConfigConfigMap: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbKeytabSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbRealm: + type: string + krbServicePrincipalName: + type: string + krbUsername: + type: string + path: + type: string + required: + - path + type: object + http: + properties: + auth: + properties: + basicAuth: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientCert: + properties: + clientCertSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + oauth2: + properties: + clientIDSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientSecretSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + items: + properties: + key: + type: string + value: + type: string + required: + - key + type: object + type: array + scopes: + items: + type: string + type: array + tokenURLSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + headers: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + url: + type: string + required: + - url + type: object + oss: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + createBucketIfNotPresent: + type: boolean + endpoint: + type: string + key: + type: string + lifecycleRule: + properties: + markDeletionAfterDays: + format: int32 + type: integer + markInfrequentAccessAfterDays: + format: int32 + type: integer + type: object + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + securityToken: + type: string + useSDKCreds: + type: boolean + required: + - key + type: object + raw: + properties: + data: + type: string + required: + - data + type: object + s3: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + caSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + createBucketIfNotPresent: + properties: + objectLocking: + type: boolean + type: object + encryptionOptions: + properties: + enableEncryption: + type: boolean + kmsEncryptionContext: + type: string + kmsKeyId: + type: string + serverSideCustomerKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + endpoint: + type: string + insecure: + type: boolean + key: + type: string + region: + type: string + roleARN: + type: string + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + sessionTokenSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + useSDKCreds: + type: boolean + type: object + type: object + artifacts: + additionalProperties: + properties: + archive: + properties: + none: + type: object + tar: + properties: + compressionLevel: + format: int32 + type: integer + type: object + zip: + type: object + type: object + archiveLogs: + type: boolean + artifactGC: + properties: + podMetadata: + properties: + annotations: + additionalProperties: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + type: object + serviceAccountName: + type: string + strategy: + enum: + - "" + - OnWorkflowCompletion + - OnWorkflowDeletion + - Never + type: string + type: object + artifactory: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + url: + type: string + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - url + type: object + azure: + properties: + accountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + blob: + type: string + container: + type: string + endpoint: + type: string + useSDKCreds: + type: boolean + required: + - blob + - container + - endpoint + type: object + deleted: + type: boolean + from: + type: string + fromExpression: + type: string + gcs: + properties: + bucket: + type: string + key: + type: string + serviceAccountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - key + type: object + git: + properties: + branch: + type: string + depth: + format: int64 + type: integer + disableSubmodules: + type: boolean + fetch: + items: + type: string + type: array + insecureIgnoreHostKey: + type: boolean + insecureSkipTLS: + type: boolean + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + repo: + type: string + revision: + type: string + singleBranch: + type: boolean + sshPrivateKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - repo + type: object + globalName: + type: string + hdfs: + properties: + addresses: + items: + type: string + type: array + dataTransferProtection: + type: string + force: + type: boolean + hdfsUser: + type: string + krbCCacheSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbConfigConfigMap: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbKeytabSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbRealm: + type: string + krbServicePrincipalName: + type: string + krbUsername: + type: string + path: + type: string + required: + - path + type: object + http: + properties: + auth: + properties: + basicAuth: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientCert: + properties: + clientCertSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + oauth2: + properties: + clientIDSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientSecretSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + items: + properties: + key: + type: string + value: + type: string + required: + - key + type: object + type: array + scopes: + items: + type: string + type: array + tokenURLSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + headers: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + url: + type: string + required: + - url + type: object + mode: + format: int32 + type: integer + name: + type: string + optional: + type: boolean + oss: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + createBucketIfNotPresent: + type: boolean + endpoint: + type: string + key: + type: string + lifecycleRule: + properties: + markDeletionAfterDays: + format: int32 + type: integer + markInfrequentAccessAfterDays: + format: int32 + type: integer + type: object + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + securityToken: + type: string + useSDKCreds: + type: boolean + required: + - key + type: object + path: + type: string + raw: + properties: + data: + type: string + required: + - data + type: object + recurseMode: + type: boolean + s3: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + caSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + createBucketIfNotPresent: + properties: + objectLocking: + type: boolean + type: object + encryptionOptions: + properties: + enableEncryption: + type: boolean + kmsEncryptionContext: + type: string + kmsKeyId: + type: string + serverSideCustomerKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + endpoint: + type: string + insecure: + type: boolean + key: + type: string + region: + type: string + roleARN: + type: string + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + sessionTokenSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + useSDKCreds: + type: boolean + type: object + subPath: + type: string + required: + - name + type: object + type: object + type: object + type: object + type: object + status: + properties: + artifactResultsByNode: + additionalProperties: + properties: + artifactResults: + additionalProperties: + properties: + error: + type: string + name: + type: string + success: + type: boolean + required: + - name + type: object + type: object + type: object + type: object + type: object + required: + - metadata + - spec + type: object + served: true + storage: true + subresources: + status: {} \ No newline at end of file diff --git a/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workfloweventbindings.yaml b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workfloweventbindings.yaml new file mode 100644 index 000000000..d77f46c07 --- /dev/null +++ b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workfloweventbindings.yaml @@ -0,0 +1,681 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: workfloweventbindings.argoproj.io +spec: + group: argoproj.io + names: + kind: WorkflowEventBinding + listKind: WorkflowEventBindingList + plural: workfloweventbindings + shortNames: + - wfeb + singular: workfloweventbinding + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + properties: + event: + properties: + selector: + type: string + required: + - selector + type: object + submit: + properties: + arguments: + properties: + artifacts: + items: + properties: + archive: + properties: + none: + type: object + tar: + properties: + compressionLevel: + format: int32 + type: integer + type: object + zip: + type: object + type: object + archiveLogs: + type: boolean + artifactGC: + properties: + podMetadata: + properties: + annotations: + additionalProperties: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + type: object + serviceAccountName: + type: string + strategy: + enum: + - "" + - OnWorkflowCompletion + - OnWorkflowDeletion + - Never + type: string + type: object + artifactory: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + url: + type: string + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - url + type: object + azure: + properties: + accountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + blob: + type: string + container: + type: string + endpoint: + type: string + useSDKCreds: + type: boolean + required: + - blob + - container + - endpoint + type: object + deleted: + type: boolean + from: + type: string + fromExpression: + type: string + gcs: + properties: + bucket: + type: string + key: + type: string + serviceAccountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - key + type: object + git: + properties: + branch: + type: string + depth: + format: int64 + type: integer + disableSubmodules: + type: boolean + fetch: + items: + type: string + type: array + insecureIgnoreHostKey: + type: boolean + insecureSkipTLS: + type: boolean + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + repo: + type: string + revision: + type: string + singleBranch: + type: boolean + sshPrivateKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - repo + type: object + globalName: + type: string + hdfs: + properties: + addresses: + items: + type: string + type: array + dataTransferProtection: + type: string + force: + type: boolean + hdfsUser: + type: string + krbCCacheSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbConfigConfigMap: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbKeytabSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbRealm: + type: string + krbServicePrincipalName: + type: string + krbUsername: + type: string + path: + type: string + required: + - path + type: object + http: + properties: + auth: + properties: + basicAuth: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientCert: + properties: + clientCertSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + oauth2: + properties: + clientIDSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientSecretSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + items: + properties: + key: + type: string + value: + type: string + required: + - key + type: object + type: array + scopes: + items: + type: string + type: array + tokenURLSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + headers: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + url: + type: string + required: + - url + type: object + mode: + format: int32 + type: integer + name: + type: string + optional: + type: boolean + oss: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + createBucketIfNotPresent: + type: boolean + endpoint: + type: string + key: + type: string + lifecycleRule: + properties: + markDeletionAfterDays: + format: int32 + type: integer + markInfrequentAccessAfterDays: + format: int32 + type: integer + type: object + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + securityToken: + type: string + useSDKCreds: + type: boolean + required: + - key + type: object + path: + type: string + raw: + properties: + data: + type: string + required: + - data + type: object + recurseMode: + type: boolean + s3: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + caSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + createBucketIfNotPresent: + properties: + objectLocking: + type: boolean + type: object + encryptionOptions: + properties: + enableEncryption: + type: boolean + kmsEncryptionContext: + type: string + kmsKeyId: + type: string + serverSideCustomerKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + endpoint: + type: string + insecure: + type: boolean + key: + type: string + region: + type: string + roleARN: + type: string + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + sessionTokenSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + useSDKCreds: + type: boolean + type: object + subPath: + type: string + required: + - name + type: object + type: array + parameters: + items: + properties: + default: + type: string + description: + type: string + enum: + items: + type: string + type: array + globalName: + type: string + name: + type: string + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + default: + type: string + event: + type: string + expression: + type: string + jqFilter: + type: string + jsonPath: + type: string + parameter: + type: string + path: + type: string + supplied: + type: object + type: object + required: + - name + type: object + type: array + type: object + metadata: + type: object + workflowTemplateRef: + properties: + clusterScope: + type: boolean + name: + type: string + type: object + required: + - workflowTemplateRef + type: object + required: + - event + type: object + required: + - metadata + - spec + type: object + served: true + storage: true \ No newline at end of file diff --git a/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflows.yaml b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflows.yaml new file mode 100644 index 000000000..41744ad63 --- /dev/null +++ b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflows.yaml @@ -0,0 +1,55 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: workflows.argoproj.io +spec: + group: argoproj.io + names: + kind: Workflow + listKind: WorkflowList + plural: workflows + shortNames: + - wf + singular: workflow + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Status of the workflow + jsonPath: .status.phase + name: Status + type: string + - description: When the workflow was started + format: date-time + jsonPath: .status.startedAt + name: Age + type: date + - description: Human readable message indicating details about why the workflow + is in this condition. + jsonPath: .status.message + name: Message + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + status: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + required: + - metadata + - spec + type: object + served: true + storage: true + subresources: {} \ No newline at end of file diff --git a/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowtaskresults.yaml b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowtaskresults.yaml new file mode 100644 index 000000000..e69875f8c --- /dev/null +++ b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowtaskresults.yaml @@ -0,0 +1,662 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: workflowtaskresults.argoproj.io +spec: + group: argoproj.io + names: + kind: WorkflowTaskResult + listKind: WorkflowTaskResultList + plural: workflowtaskresults + singular: workflowtaskresult + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + message: + type: string + metadata: + type: object + outputs: + properties: + artifacts: + items: + properties: + archive: + properties: + none: + type: object + tar: + properties: + compressionLevel: + format: int32 + type: integer + type: object + zip: + type: object + type: object + archiveLogs: + type: boolean + artifactGC: + properties: + podMetadata: + properties: + annotations: + additionalProperties: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + type: object + serviceAccountName: + type: string + strategy: + enum: + - "" + - OnWorkflowCompletion + - OnWorkflowDeletion + - Never + type: string + type: object + artifactory: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + url: + type: string + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - url + type: object + azure: + properties: + accountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + blob: + type: string + container: + type: string + endpoint: + type: string + useSDKCreds: + type: boolean + required: + - blob + - container + - endpoint + type: object + deleted: + type: boolean + from: + type: string + fromExpression: + type: string + gcs: + properties: + bucket: + type: string + key: + type: string + serviceAccountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - key + type: object + git: + properties: + branch: + type: string + depth: + format: int64 + type: integer + disableSubmodules: + type: boolean + fetch: + items: + type: string + type: array + insecureIgnoreHostKey: + type: boolean + insecureSkipTLS: + type: boolean + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + repo: + type: string + revision: + type: string + singleBranch: + type: boolean + sshPrivateKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - repo + type: object + globalName: + type: string + hdfs: + properties: + addresses: + items: + type: string + type: array + dataTransferProtection: + type: string + force: + type: boolean + hdfsUser: + type: string + krbCCacheSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbConfigConfigMap: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbKeytabSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbRealm: + type: string + krbServicePrincipalName: + type: string + krbUsername: + type: string + path: + type: string + required: + - path + type: object + http: + properties: + auth: + properties: + basicAuth: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientCert: + properties: + clientCertSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + oauth2: + properties: + clientIDSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientSecretSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + items: + properties: + key: + type: string + value: + type: string + required: + - key + type: object + type: array + scopes: + items: + type: string + type: array + tokenURLSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + headers: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + url: + type: string + required: + - url + type: object + mode: + format: int32 + type: integer + name: + type: string + optional: + type: boolean + oss: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + createBucketIfNotPresent: + type: boolean + endpoint: + type: string + key: + type: string + lifecycleRule: + properties: + markDeletionAfterDays: + format: int32 + type: integer + markInfrequentAccessAfterDays: + format: int32 + type: integer + type: object + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + securityToken: + type: string + useSDKCreds: + type: boolean + required: + - key + type: object + path: + type: string + raw: + properties: + data: + type: string + required: + - data + type: object + recurseMode: + type: boolean + s3: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + caSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + createBucketIfNotPresent: + properties: + objectLocking: + type: boolean + type: object + encryptionOptions: + properties: + enableEncryption: + type: boolean + kmsEncryptionContext: + type: string + kmsKeyId: + type: string + serverSideCustomerKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + endpoint: + type: string + insecure: + type: boolean + key: + type: string + region: + type: string + roleARN: + type: string + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + sessionTokenSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + useSDKCreds: + type: boolean + type: object + subPath: + type: string + required: + - name + type: object + type: array + exitCode: + type: string + parameters: + items: + properties: + default: + type: string + description: + type: string + enum: + items: + type: string + type: array + globalName: + type: string + name: + type: string + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + default: + type: string + event: + type: string + expression: + type: string + jqFilter: + type: string + jsonPath: + type: string + parameter: + type: string + path: + type: string + supplied: + type: object + type: object + required: + - name + type: object + type: array + result: + type: string + type: object + phase: + type: string + progress: + type: string + required: + - metadata + type: object + served: true + storage: true \ No newline at end of file diff --git a/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowtasksets.yaml b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowtasksets.yaml new file mode 100644 index 000000000..e7a5c5120 --- /dev/null +++ b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowtasksets.yaml @@ -0,0 +1,41 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: workflowtasksets.argoproj.io +spec: + group: argoproj.io + names: + kind: WorkflowTaskSet + listKind: WorkflowTaskSetList + plural: workflowtasksets + shortNames: + - wfts + singular: workflowtaskset + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + status: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + required: + - metadata + - spec + type: object + served: true + storage: true + subresources: + status: {} \ No newline at end of file diff --git a/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowtemplates.yaml b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowtemplates.yaml new file mode 100644 index 000000000..91aacaced --- /dev/null +++ b/hack/k8s-patch/k8s-remediation-patch/crds/argoproj.io_workflowtemplates.yaml @@ -0,0 +1,35 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: workflowtemplates.argoproj.io +spec: + group: argoproj.io + names: + kind: WorkflowTemplate + listKind: WorkflowTemplateList + plural: workflowtemplates + shortNames: + - wftmpl + singular: workflowtemplate + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + required: + - metadata + - spec + type: object + served: true + storage: true \ No newline at end of file diff --git a/hack/k8s-patch/k8s-remediation-patch/kustomization.yaml b/hack/k8s-patch/k8s-remediation-patch/kustomization.yaml new file mode 100644 index 000000000..d6792b2c6 --- /dev/null +++ b/hack/k8s-patch/k8s-remediation-patch/kustomization.yaml @@ -0,0 +1,18 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - crds/argoproj.io_clusterworkflowtemplates.yaml + - crds/argoproj.io_cronworkflows.yaml + - crds/argoproj.io_workflowartifactgctasks.yaml + - crds/argoproj.io_workfloweventbindings.yaml + - crds/argoproj.io_workflows.yaml + - crds/argoproj.io_workflowtaskresults.yaml + - crds/argoproj.io_workflowtasksets.yaml + - crds/argoproj.io_workflowtemplates.yaml + +labels: +- pairs: + app.kubernetes.io/component: remediation + app.kubernetes.io/name: remediation + app.kubernetes.io/part-of: remediation \ No newline at end of file diff --git a/hack/k8s-patch/k8s-remediation-patch/metadata-patch/Chart.yaml b/hack/k8s-patch/k8s-remediation-patch/metadata-patch/Chart.yaml index caf35c8e6..79d9d04fe 100644 --- a/hack/k8s-patch/k8s-remediation-patch/metadata-patch/Chart.yaml +++ b/hack/k8s-patch/k8s-remediation-patch/metadata-patch/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v1 -name: remediation-controller -description: A Helm chart for remediation workflow controller for AMD GPU Operator +name: remediation-crds +description: A Helm chart for installing Argo Workflows CRDs for remediation in AMD GPU Operator type: application version: v1.0.0 \ No newline at end of file diff --git a/hack/k8s-patch/k8s-remediation-patch/metadata-patch/values.yaml b/hack/k8s-patch/k8s-remediation-patch/metadata-patch/values.yaml index 83339b213..e69de29bb 100644 --- a/hack/k8s-patch/k8s-remediation-patch/metadata-patch/values.yaml +++ b/hack/k8s-patch/k8s-remediation-patch/metadata-patch/values.yaml @@ -1,11 +0,0 @@ -controller: - image: "quay.io/argoproj/workflow-controller:v3.6.5" - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 1 - preference: - matchExpressions: - - key: node-role.kubernetes.io/control-plane - operator: Exists - nodeSelector: {} \ No newline at end of file diff --git a/hack/k8s-patch/k8s-remediation-patch/template-patch/deployment.yaml b/hack/k8s-patch/k8s-remediation-patch/template-patch/deployment.yaml deleted file mode 100644 index a9bfc08a5..000000000 --- a/hack/k8s-patch/k8s-remediation-patch/template-patch/deployment.yaml +++ /dev/null @@ -1,3034 +0,0 @@ -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: clusterworkflowtemplates.argoproj.io -spec: - group: argoproj.io - names: - kind: ClusterWorkflowTemplate - listKind: ClusterWorkflowTemplateList - plural: clusterworkflowtemplates - shortNames: - - clusterwftmpl - - cwft - singular: clusterworkflowtemplate - scope: Cluster - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - required: - - metadata - - spec - type: object - served: true - storage: true ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: cronworkflows.argoproj.io -spec: - group: argoproj.io - names: - kind: CronWorkflow - listKind: CronWorkflowList - plural: cronworkflows - shortNames: - - cwf - - cronwf - singular: cronworkflow - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - status: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - required: - - metadata - - spec - type: object - served: true - storage: true ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: workflowartifactgctasks.argoproj.io -spec: - group: argoproj.io - names: - kind: WorkflowArtifactGCTask - listKind: WorkflowArtifactGCTaskList - plural: workflowartifactgctasks - shortNames: - - wfat - singular: workflowartifactgctask - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - properties: - artifactsByNode: - additionalProperties: - properties: - archiveLocation: - properties: - archiveLogs: - type: boolean - artifactory: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - url: - type: string - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - url - type: object - azure: - properties: - accountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - blob: - type: string - container: - type: string - endpoint: - type: string - useSDKCreds: - type: boolean - required: - - blob - - container - - endpoint - type: object - gcs: - properties: - bucket: - type: string - key: - type: string - serviceAccountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - key - type: object - git: - properties: - branch: - type: string - depth: - format: int64 - type: integer - disableSubmodules: - type: boolean - fetch: - items: - type: string - type: array - insecureIgnoreHostKey: - type: boolean - insecureSkipTLS: - type: boolean - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - repo: - type: string - revision: - type: string - singleBranch: - type: boolean - sshPrivateKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - repo - type: object - hdfs: - properties: - addresses: - items: - type: string - type: array - dataTransferProtection: - type: string - force: - type: boolean - hdfsUser: - type: string - krbCCacheSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbConfigConfigMap: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbKeytabSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbRealm: - type: string - krbServicePrincipalName: - type: string - krbUsername: - type: string - path: - type: string - required: - - path - type: object - http: - properties: - auth: - properties: - basicAuth: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - clientCert: - properties: - clientCertSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - oauth2: - properties: - clientIDSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientSecretSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - endpointParams: - items: - properties: - key: - type: string - value: - type: string - required: - - key - type: object - type: array - scopes: - items: - type: string - type: array - tokenURLSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - type: object - headers: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - url: - type: string - required: - - url - type: object - oss: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - createBucketIfNotPresent: - type: boolean - endpoint: - type: string - key: - type: string - lifecycleRule: - properties: - markDeletionAfterDays: - format: int32 - type: integer - markInfrequentAccessAfterDays: - format: int32 - type: integer - type: object - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - securityToken: - type: string - useSDKCreds: - type: boolean - required: - - key - type: object - raw: - properties: - data: - type: string - required: - - data - type: object - s3: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - caSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - createBucketIfNotPresent: - properties: - objectLocking: - type: boolean - type: object - encryptionOptions: - properties: - enableEncryption: - type: boolean - kmsEncryptionContext: - type: string - kmsKeyId: - type: string - serverSideCustomerKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - endpoint: - type: string - insecure: - type: boolean - key: - type: string - region: - type: string - roleARN: - type: string - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - sessionTokenSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - useSDKCreds: - type: boolean - type: object - type: object - artifacts: - additionalProperties: - properties: - archive: - properties: - none: - type: object - tar: - properties: - compressionLevel: - format: int32 - type: integer - type: object - zip: - type: object - type: object - archiveLogs: - type: boolean - artifactGC: - properties: - podMetadata: - properties: - annotations: - additionalProperties: - type: string - type: object - labels: - additionalProperties: - type: string - type: object - type: object - serviceAccountName: - type: string - strategy: - enum: - - "" - - OnWorkflowCompletion - - OnWorkflowDeletion - - Never - type: string - type: object - artifactory: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - url: - type: string - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - url - type: object - azure: - properties: - accountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - blob: - type: string - container: - type: string - endpoint: - type: string - useSDKCreds: - type: boolean - required: - - blob - - container - - endpoint - type: object - deleted: - type: boolean - from: - type: string - fromExpression: - type: string - gcs: - properties: - bucket: - type: string - key: - type: string - serviceAccountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - key - type: object - git: - properties: - branch: - type: string - depth: - format: int64 - type: integer - disableSubmodules: - type: boolean - fetch: - items: - type: string - type: array - insecureIgnoreHostKey: - type: boolean - insecureSkipTLS: - type: boolean - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - repo: - type: string - revision: - type: string - singleBranch: - type: boolean - sshPrivateKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - repo - type: object - globalName: - type: string - hdfs: - properties: - addresses: - items: - type: string - type: array - dataTransferProtection: - type: string - force: - type: boolean - hdfsUser: - type: string - krbCCacheSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbConfigConfigMap: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbKeytabSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbRealm: - type: string - krbServicePrincipalName: - type: string - krbUsername: - type: string - path: - type: string - required: - - path - type: object - http: - properties: - auth: - properties: - basicAuth: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - clientCert: - properties: - clientCertSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - oauth2: - properties: - clientIDSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientSecretSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - endpointParams: - items: - properties: - key: - type: string - value: - type: string - required: - - key - type: object - type: array - scopes: - items: - type: string - type: array - tokenURLSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - type: object - headers: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - url: - type: string - required: - - url - type: object - mode: - format: int32 - type: integer - name: - type: string - optional: - type: boolean - oss: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - createBucketIfNotPresent: - type: boolean - endpoint: - type: string - key: - type: string - lifecycleRule: - properties: - markDeletionAfterDays: - format: int32 - type: integer - markInfrequentAccessAfterDays: - format: int32 - type: integer - type: object - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - securityToken: - type: string - useSDKCreds: - type: boolean - required: - - key - type: object - path: - type: string - raw: - properties: - data: - type: string - required: - - data - type: object - recurseMode: - type: boolean - s3: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - caSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - createBucketIfNotPresent: - properties: - objectLocking: - type: boolean - type: object - encryptionOptions: - properties: - enableEncryption: - type: boolean - kmsEncryptionContext: - type: string - kmsKeyId: - type: string - serverSideCustomerKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - endpoint: - type: string - insecure: - type: boolean - key: - type: string - region: - type: string - roleARN: - type: string - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - sessionTokenSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - useSDKCreds: - type: boolean - type: object - subPath: - type: string - required: - - name - type: object - type: object - type: object - type: object - type: object - status: - properties: - artifactResultsByNode: - additionalProperties: - properties: - artifactResults: - additionalProperties: - properties: - error: - type: string - name: - type: string - success: - type: boolean - required: - - name - type: object - type: object - type: object - type: object - type: object - required: - - metadata - - spec - type: object - served: true - storage: true - subresources: - status: {} ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: workfloweventbindings.argoproj.io -spec: - group: argoproj.io - names: - kind: WorkflowEventBinding - listKind: WorkflowEventBindingList - plural: workfloweventbindings - shortNames: - - wfeb - singular: workfloweventbinding - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - properties: - event: - properties: - selector: - type: string - required: - - selector - type: object - submit: - properties: - arguments: - properties: - artifacts: - items: - properties: - archive: - properties: - none: - type: object - tar: - properties: - compressionLevel: - format: int32 - type: integer - type: object - zip: - type: object - type: object - archiveLogs: - type: boolean - artifactGC: - properties: - podMetadata: - properties: - annotations: - additionalProperties: - type: string - type: object - labels: - additionalProperties: - type: string - type: object - type: object - serviceAccountName: - type: string - strategy: - enum: - - "" - - OnWorkflowCompletion - - OnWorkflowDeletion - - Never - type: string - type: object - artifactory: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - url: - type: string - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - url - type: object - azure: - properties: - accountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - blob: - type: string - container: - type: string - endpoint: - type: string - useSDKCreds: - type: boolean - required: - - blob - - container - - endpoint - type: object - deleted: - type: boolean - from: - type: string - fromExpression: - type: string - gcs: - properties: - bucket: - type: string - key: - type: string - serviceAccountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - key - type: object - git: - properties: - branch: - type: string - depth: - format: int64 - type: integer - disableSubmodules: - type: boolean - fetch: - items: - type: string - type: array - insecureIgnoreHostKey: - type: boolean - insecureSkipTLS: - type: boolean - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - repo: - type: string - revision: - type: string - singleBranch: - type: boolean - sshPrivateKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - repo - type: object - globalName: - type: string - hdfs: - properties: - addresses: - items: - type: string - type: array - dataTransferProtection: - type: string - force: - type: boolean - hdfsUser: - type: string - krbCCacheSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbConfigConfigMap: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbKeytabSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbRealm: - type: string - krbServicePrincipalName: - type: string - krbUsername: - type: string - path: - type: string - required: - - path - type: object - http: - properties: - auth: - properties: - basicAuth: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - clientCert: - properties: - clientCertSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - oauth2: - properties: - clientIDSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientSecretSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - endpointParams: - items: - properties: - key: - type: string - value: - type: string - required: - - key - type: object - type: array - scopes: - items: - type: string - type: array - tokenURLSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - type: object - headers: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - url: - type: string - required: - - url - type: object - mode: - format: int32 - type: integer - name: - type: string - optional: - type: boolean - oss: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - createBucketIfNotPresent: - type: boolean - endpoint: - type: string - key: - type: string - lifecycleRule: - properties: - markDeletionAfterDays: - format: int32 - type: integer - markInfrequentAccessAfterDays: - format: int32 - type: integer - type: object - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - securityToken: - type: string - useSDKCreds: - type: boolean - required: - - key - type: object - path: - type: string - raw: - properties: - data: - type: string - required: - - data - type: object - recurseMode: - type: boolean - s3: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - caSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - createBucketIfNotPresent: - properties: - objectLocking: - type: boolean - type: object - encryptionOptions: - properties: - enableEncryption: - type: boolean - kmsEncryptionContext: - type: string - kmsKeyId: - type: string - serverSideCustomerKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - endpoint: - type: string - insecure: - type: boolean - key: - type: string - region: - type: string - roleARN: - type: string - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - sessionTokenSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - useSDKCreds: - type: boolean - type: object - subPath: - type: string - required: - - name - type: object - type: array - parameters: - items: - properties: - default: - type: string - description: - type: string - enum: - items: - type: string - type: array - globalName: - type: string - name: - type: string - value: - type: string - valueFrom: - properties: - configMapKeyRef: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - default: - type: string - event: - type: string - expression: - type: string - jqFilter: - type: string - jsonPath: - type: string - parameter: - type: string - path: - type: string - supplied: - type: object - type: object - required: - - name - type: object - type: array - type: object - metadata: - type: object - workflowTemplateRef: - properties: - clusterScope: - type: boolean - name: - type: string - type: object - required: - - workflowTemplateRef - type: object - required: - - event - type: object - required: - - metadata - - spec - type: object - served: true - storage: true ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: workflows.argoproj.io -spec: - group: argoproj.io - names: - kind: Workflow - listKind: WorkflowList - plural: workflows - shortNames: - - wf - singular: workflow - scope: Namespaced - versions: - - additionalPrinterColumns: - - description: Status of the workflow - jsonPath: .status.phase - name: Status - type: string - - description: When the workflow was started - format: date-time - jsonPath: .status.startedAt - name: Age - type: date - - description: Human readable message indicating details about why the workflow - is in this condition. - jsonPath: .status.message - name: Message - type: string - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - status: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - required: - - metadata - - spec - type: object - served: true - storage: true - subresources: {} ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: workflowtaskresults.argoproj.io -spec: - group: argoproj.io - names: - kind: WorkflowTaskResult - listKind: WorkflowTaskResultList - plural: workflowtaskresults - singular: workflowtaskresult - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - message: - type: string - metadata: - type: object - outputs: - properties: - artifacts: - items: - properties: - archive: - properties: - none: - type: object - tar: - properties: - compressionLevel: - format: int32 - type: integer - type: object - zip: - type: object - type: object - archiveLogs: - type: boolean - artifactGC: - properties: - podMetadata: - properties: - annotations: - additionalProperties: - type: string - type: object - labels: - additionalProperties: - type: string - type: object - type: object - serviceAccountName: - type: string - strategy: - enum: - - "" - - OnWorkflowCompletion - - OnWorkflowDeletion - - Never - type: string - type: object - artifactory: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - url: - type: string - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - url - type: object - azure: - properties: - accountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - blob: - type: string - container: - type: string - endpoint: - type: string - useSDKCreds: - type: boolean - required: - - blob - - container - - endpoint - type: object - deleted: - type: boolean - from: - type: string - fromExpression: - type: string - gcs: - properties: - bucket: - type: string - key: - type: string - serviceAccountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - key - type: object - git: - properties: - branch: - type: string - depth: - format: int64 - type: integer - disableSubmodules: - type: boolean - fetch: - items: - type: string - type: array - insecureIgnoreHostKey: - type: boolean - insecureSkipTLS: - type: boolean - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - repo: - type: string - revision: - type: string - singleBranch: - type: boolean - sshPrivateKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - repo - type: object - globalName: - type: string - hdfs: - properties: - addresses: - items: - type: string - type: array - dataTransferProtection: - type: string - force: - type: boolean - hdfsUser: - type: string - krbCCacheSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbConfigConfigMap: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbKeytabSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbRealm: - type: string - krbServicePrincipalName: - type: string - krbUsername: - type: string - path: - type: string - required: - - path - type: object - http: - properties: - auth: - properties: - basicAuth: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - clientCert: - properties: - clientCertSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - oauth2: - properties: - clientIDSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientSecretSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - endpointParams: - items: - properties: - key: - type: string - value: - type: string - required: - - key - type: object - type: array - scopes: - items: - type: string - type: array - tokenURLSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - type: object - headers: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - url: - type: string - required: - - url - type: object - mode: - format: int32 - type: integer - name: - type: string - optional: - type: boolean - oss: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - createBucketIfNotPresent: - type: boolean - endpoint: - type: string - key: - type: string - lifecycleRule: - properties: - markDeletionAfterDays: - format: int32 - type: integer - markInfrequentAccessAfterDays: - format: int32 - type: integer - type: object - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - securityToken: - type: string - useSDKCreds: - type: boolean - required: - - key - type: object - path: - type: string - raw: - properties: - data: - type: string - required: - - data - type: object - recurseMode: - type: boolean - s3: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - caSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - createBucketIfNotPresent: - properties: - objectLocking: - type: boolean - type: object - encryptionOptions: - properties: - enableEncryption: - type: boolean - kmsEncryptionContext: - type: string - kmsKeyId: - type: string - serverSideCustomerKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - endpoint: - type: string - insecure: - type: boolean - key: - type: string - region: - type: string - roleARN: - type: string - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - sessionTokenSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - useSDKCreds: - type: boolean - type: object - subPath: - type: string - required: - - name - type: object - type: array - exitCode: - type: string - parameters: - items: - properties: - default: - type: string - description: - type: string - enum: - items: - type: string - type: array - globalName: - type: string - name: - type: string - value: - type: string - valueFrom: - properties: - configMapKeyRef: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - default: - type: string - event: - type: string - expression: - type: string - jqFilter: - type: string - jsonPath: - type: string - parameter: - type: string - path: - type: string - supplied: - type: object - type: object - required: - - name - type: object - type: array - result: - type: string - type: object - phase: - type: string - progress: - type: string - required: - - metadata - type: object - served: true - storage: true ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: workflowtasksets.argoproj.io -spec: - group: argoproj.io - names: - kind: WorkflowTaskSet - listKind: WorkflowTaskSetList - plural: workflowtasksets - shortNames: - - wfts - singular: workflowtaskset - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - status: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - required: - - metadata - - spec - type: object - served: true - storage: true - subresources: - status: {} ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: workflowtemplates.argoproj.io -spec: - group: argoproj.io - names: - kind: WorkflowTemplate - listKind: WorkflowTemplateList - plural: workflowtemplates - shortNames: - - wftmpl - singular: workflowtemplate - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - required: - - metadata - - spec - type: object - served: true - storage: true ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: argo - namespace: '{{ .Release.Namespace }}' ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: argo-role - namespace: '{{ .Release.Namespace }}' -rules: -- apiGroups: - - coordination.k8s.io - resources: - - leases - verbs: - - create - - get - - update -- apiGroups: - - "" - resources: - - secrets - verbs: - - get ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - rbac.authorization.k8s.io/aggregate-to-admin: "true" - name: argo-aggregate-to-admin -rules: -- apiGroups: - - argoproj.io - resources: - - workflows - - workflows/finalizers - - workfloweventbindings - - workfloweventbindings/finalizers - - workflowtemplates - - workflowtemplates/finalizers - - cronworkflows - - cronworkflows/finalizers - - clusterworkflowtemplates - - clusterworkflowtemplates/finalizers - - workflowtasksets - - workflowtasksets/finalizers - - workflowtaskresults - - workflowtaskresults/finalizers - verbs: - - create - - delete - - deletecollection - - get - - list - - patch - - update - - watch ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - rbac.authorization.k8s.io/aggregate-to-edit: "true" - name: argo-aggregate-to-edit -rules: -- apiGroups: - - argoproj.io - resources: - - workflows - - workflows/finalizers - - workfloweventbindings - - workfloweventbindings/finalizers - - workflowtemplates - - workflowtemplates/finalizers - - cronworkflows - - cronworkflows/finalizers - - clusterworkflowtemplates - - clusterworkflowtemplates/finalizers - - workflowtaskresults - - workflowtaskresults/finalizers - verbs: - - create - - delete - - deletecollection - - get - - list - - patch - - update - - watch ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - rbac.authorization.k8s.io/aggregate-to-view: "true" - name: argo-aggregate-to-view -rules: -- apiGroups: - - argoproj.io - resources: - - workflows - - workflows/finalizers - - workfloweventbindings - - workfloweventbindings/finalizers - - workflowtemplates - - workflowtemplates/finalizers - - cronworkflows - - cronworkflows/finalizers - - clusterworkflowtemplates - - clusterworkflowtemplates/finalizers - - workflowtaskresults - - workflowtaskresults/finalizers - verbs: - - get - - list - - watch ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: argo-cluster-role -rules: -- apiGroups: - - "" - resources: - - pods - - pods/exec - verbs: - - create - - get - - list - - watch - - update - - patch - - delete -- apiGroups: - - "" - resources: - - configmaps - - nodes - verbs: - - get - - watch - - list -- apiGroups: - - "" - resources: - - persistentvolumeclaims - - persistentvolumeclaims/finalizers - verbs: - - create - - update - - delete - - get -- apiGroups: - - argoproj.io - resources: - - workflows - - workflows/finalizers - - workflowtasksets - - workflowtasksets/finalizers - - workflowartifactgctasks - verbs: - - get - - list - - watch - - update - - patch - - delete - - create -- apiGroups: - - argoproj.io - resources: - - workflowtemplates - - workflowtemplates/finalizers - - clusterworkflowtemplates - - clusterworkflowtemplates/finalizers - verbs: - - get - - list - - watch -- apiGroups: - - argoproj.io - resources: - - workflowtaskresults - verbs: - - get - - list - - watch - - create - - update - - patch - - delete - - deletecollection -- apiGroups: - - "" - resources: - - serviceaccounts - verbs: - - get - - list -- apiGroups: - - argoproj.io - resources: - - cronworkflows - - cronworkflows/finalizers - verbs: - - get - - list - - watch - - update - - patch - - delete -- apiGroups: - - "" - resources: - - events - verbs: - - create - - patch - - get - - list -- apiGroups: - - policy - resources: - - poddisruptionbudgets - verbs: - - create - - get - - delete -- apiGroups: - - "" - resourceNames: - - argo-workflows-agent-ca-certificates - resources: - - secrets - verbs: - - get ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: argo-binding - namespace: '{{ .Release.Namespace }}' -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: argo-role -subjects: -- kind: ServiceAccount - name: argo - namespace: '{{ .Release.Namespace }}' ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: argo-binding -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: argo-cluster-role -subjects: -- kind: ServiceAccount - name: argo - namespace: '{{ .Release.Namespace }}' ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: workflow-controller-configmap - namespace: '{{ .Release.Namespace }}' ---- -apiVersion: scheduling.k8s.io/v1 -kind: PriorityClass -metadata: - name: workflow-controller -value: 1000000 ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: amd-gpu-operator-workflow-controller - namespace: '{{ .Release.Namespace }}' -spec: - selector: - matchLabels: - app: amd-gpu-operator-workflow-controller - template: - metadata: - labels: - app: amd-gpu-operator-workflow-controller - spec: - {{- with .Values.controller.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - nodeSelector: {{- toYaml .Values.controller.nodeSelector | nindent 8 }} - containers: - - args: [] - command: - - workflow-controller - env: - - name: LEADER_ELECTION_IDENTITY - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.name - image: {{ .Values.controller.image }} - livenessProbe: - failureThreshold: 3 - httpGet: - path: /healthz - port: 6060 - initialDelaySeconds: 90 - periodSeconds: 60 - timeoutSeconds: 30 - name: workflow-controller - ports: - - containerPort: 9090 - name: metrics - - containerPort: 6060 - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - priorityClassName: workflow-controller - securityContext: - runAsNonRoot: true - serviceAccountName: argo - tolerations: - - key: "amd-gpu-unhealthy" - operator: "Exists" - effect: "NoSchedule" \ No newline at end of file diff --git a/hack/k8s-patch/metadata-patch/Chart.yaml b/hack/k8s-patch/metadata-patch/Chart.yaml index 6b9f0f177..41afb478b 100644 --- a/hack/k8s-patch/metadata-patch/Chart.yaml +++ b/hack/k8s-patch/metadata-patch/Chart.yaml @@ -31,7 +31,7 @@ dependencies: version: v1.0.0 repository: "file://./charts/kmm" condition: kmm.enabled -- name: remediation-controller +- name: remediation-crds version: v1.0.0 - repository: "file://./charts/remediation" - condition: remediation.enabled \ No newline at end of file + repository: "file://./charts/remediation-crds" + condition: remediation.installCRDs \ No newline at end of file diff --git a/hack/k8s-patch/metadata-patch/values.yaml b/hack/k8s-patch/metadata-patch/values.yaml index 6b14cebe6..943a3949d 100644 --- a/hack/k8s-patch/metadata-patch/values.yaml +++ b/hack/k8s-patch/metadata-patch/values.yaml @@ -23,6 +23,11 @@ kmm: remediation: # -- Set to true/false to enable/disable the installation of remediation workflow controller enabled: true + # -- Set to true/false to enable/disable the installation of Argo CRDs used by the remediation workflow controller + installCRDs: true + # -- Set the controller image for remediation workflow controller deployment + controller: + image: "quay.io/argoproj/workflow-controller:v3.6.5" # -- Default NFD rule will detect amd gpu based on pci vendor ID installdefaultNFDRule: true # -- CRD will be patched as pre-upgrade/pre-rollback hook when doing helm upgrade/rollback to current helm chart diff --git a/hack/k8s-patch/template-patch/post-delete-hook.yaml b/hack/k8s-patch/template-patch/post-delete-hook.yaml index da7c287d4..05fa5306d 100644 --- a/hack/k8s-patch/template-patch/post-delete-hook.yaml +++ b/hack/k8s-patch/template-patch/post-delete-hook.yaml @@ -83,9 +83,37 @@ spec: if kubectl get crds deviceconfigs.amd.com > /dev/null 2>&1; then kubectl delete crds deviceconfigs.amd.com fi + {{- if .Values.remediation.enabled }} if kubectl get crds remediationworkflowstatuses.amd.com > /dev/null 2>&1; then kubectl delete crds remediationworkflowstatuses.amd.com fi + {{- end }} + {{- if and .Values.remediation.enabled .Values.remediation.installCRDs }} + if kubectl get crds clusterworkflowtemplates.argoproj.io > /dev/null 2>&1; then + kubectl delete crds clusterworkflowtemplates.argoproj.io + fi + if kubectl get crds cronworkflows.argoproj.io > /dev/null 2>&1; then + kubectl delete crds cronworkflows.argoproj.io + fi + if kubectl get crds workflows.argoproj.io > /dev/null 2>&1; then + kubectl delete crds workflows.argoproj.io + fi + if kubectl get crds workflowartifactgctasks.argoproj.io > /dev/null 2>&1; then + kubectl delete crds workflowartifactgctasks.argoproj.io + fi + if kubectl get crds workfloweventbindings.argoproj.io > /dev/null 2>&1; then + kubectl delete crds workfloweventbindings.argoproj.io + fi + if kubectl get crds workflowtaskresults.argoproj.io > /dev/null 2>&1; then + kubectl delete crds workflowtaskresults.argoproj.io + fi + if kubectl get crds workflowtasksets.argoproj.io > /dev/null 2>&1; then + kubectl delete crds workflowtasksets.argoproj.io + fi + if kubectl get crds workflowtemplates.argoproj.io > /dev/null 2>&1; then + kubectl delete crds workflowtemplates.argoproj.io + fi + {{- end }} {{- if index .Values "node-feature-discovery" "enabled" }} if kubectl get crds nodefeaturegroups.nfd.k8s-sigs.io > /dev/null 2>&1; then kubectl delete crds nodefeaturegroups.nfd.k8s-sigs.io diff --git a/hack/k8s-patch/template-patch/pre-upgrade-hook.yaml b/hack/k8s-patch/template-patch/pre-upgrade-hook.yaml index 600edc47b..2fb5e37db 100644 --- a/hack/k8s-patch/template-patch/pre-upgrade-hook.yaml +++ b/hack/k8s-patch/template-patch/pre-upgrade-hook.yaml @@ -224,7 +224,19 @@ spec: kubectl apply -f /opt/helm-charts-crds-k8s/module-crd.yaml kubectl apply -f /opt/helm-charts-crds-k8s/nodemodulesconfig-crd.yaml {{- end }} + {{- if .Values.remediation.enabled }} kubectl apply -f /opt/helm-charts-crds-k8s/remediationworkflowstatus-crd.yaml + {{- if .Values.remediation.installCRDs }} + kubectl apply -f /opt/helm-charts-crds-k8s/clusterworkflowtemplate-crd.yaml + kubectl apply -f /opt/helm-charts-crds-k8s/cronworkflow-crd.yaml + kubectl apply -f /opt/helm-charts-crds-k8s/workflowartifactgctask-crd.yaml + kubectl apply -f /opt/helm-charts-crds-k8s/workflow-crd.yaml + kubectl apply -f /opt/helm-charts-crds-k8s/workfloweventbinding-crd.yaml + kubectl apply -f /opt/helm-charts-crds-k8s/workflowtaskresult-crd.yaml + kubectl apply -f /opt/helm-charts-crds-k8s/workflowtaskset-crd.yaml + kubectl apply -f /opt/helm-charts-crds-k8s/workflowtemplate-crd.yaml + {{- end }} + {{- end }} restartPolicy: OnFailure {{- end }} # Run helm upgrade with --no-hooks to bypass the pre-upgrade hook \ No newline at end of file diff --git a/hack/k8s-patch/template-patch/remediation-deployment.yaml b/hack/k8s-patch/template-patch/remediation-deployment.yaml new file mode 100644 index 000000000..6aaca100b --- /dev/null +++ b/hack/k8s-patch/template-patch/remediation-deployment.yaml @@ -0,0 +1,344 @@ +{{- if .Values.remediation.enabled }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: argo + namespace: '{{ .Release.Namespace }}' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: argo-role + namespace: '{{ .Release.Namespace }}' +rules: +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - update +- apiGroups: + - "" + resources: + - secrets + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + rbac.authorization.k8s.io/aggregate-to-admin: "true" + name: argo-aggregate-to-admin +rules: +- apiGroups: + - argoproj.io + resources: + - workflows + - workflows/finalizers + - workfloweventbindings + - workfloweventbindings/finalizers + - workflowtemplates + - workflowtemplates/finalizers + - cronworkflows + - cronworkflows/finalizers + - clusterworkflowtemplates + - clusterworkflowtemplates/finalizers + - workflowtasksets + - workflowtasksets/finalizers + - workflowtaskresults + - workflowtaskresults/finalizers + verbs: + - create + - delete + - deletecollection + - get + - list + - patch + - update + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + rbac.authorization.k8s.io/aggregate-to-edit: "true" + name: argo-aggregate-to-edit +rules: +- apiGroups: + - argoproj.io + resources: + - workflows + - workflows/finalizers + - workfloweventbindings + - workfloweventbindings/finalizers + - workflowtemplates + - workflowtemplates/finalizers + - cronworkflows + - cronworkflows/finalizers + - clusterworkflowtemplates + - clusterworkflowtemplates/finalizers + - workflowtaskresults + - workflowtaskresults/finalizers + verbs: + - create + - delete + - deletecollection + - get + - list + - patch + - update + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + rbac.authorization.k8s.io/aggregate-to-view: "true" + name: argo-aggregate-to-view +rules: +- apiGroups: + - argoproj.io + resources: + - workflows + - workflows/finalizers + - workfloweventbindings + - workfloweventbindings/finalizers + - workflowtemplates + - workflowtemplates/finalizers + - cronworkflows + - cronworkflows/finalizers + - clusterworkflowtemplates + - clusterworkflowtemplates/finalizers + - workflowtaskresults + - workflowtaskresults/finalizers + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: argo-cluster-role +rules: +- apiGroups: + - "" + resources: + - pods + - pods/exec + verbs: + - create + - get + - list + - watch + - update + - patch + - delete +- apiGroups: + - "" + resources: + - configmaps + - nodes + verbs: + - get + - watch + - list +- apiGroups: + - "" + resources: + - persistentvolumeclaims + - persistentvolumeclaims/finalizers + verbs: + - create + - update + - delete + - get +- apiGroups: + - argoproj.io + resources: + - workflows + - workflows/finalizers + - workflowtasksets + - workflowtasksets/finalizers + - workflowartifactgctasks + verbs: + - get + - list + - watch + - update + - patch + - delete + - create +- apiGroups: + - argoproj.io + resources: + - workflowtemplates + - workflowtemplates/finalizers + - clusterworkflowtemplates + - clusterworkflowtemplates/finalizers + verbs: + - get + - list + - watch +- apiGroups: + - argoproj.io + resources: + - workflowtaskresults + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - deletecollection +- apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - get + - list +- apiGroups: + - argoproj.io + resources: + - cronworkflows + - cronworkflows/finalizers + verbs: + - get + - list + - watch + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - get + - list +- apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - create + - get + - delete +- apiGroups: + - "" + resourceNames: + - argo-workflows-agent-ca-certificates + resources: + - secrets + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: argo-binding + namespace: '{{ .Release.Namespace }}' +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: argo-role +subjects: +- kind: ServiceAccount + name: argo + namespace: '{{ .Release.Namespace }}' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: argo-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: argo-cluster-role +subjects: +- kind: ServiceAccount + name: argo + namespace: '{{ .Release.Namespace }}' +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: amd-gpu-operator-workflow-controller-config + namespace: '{{ .Release.Namespace }}' +data: + instanceID: amd-gpu-operator-remediation-workflow +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: amd-gpu-operator-workflow-controller-pc +value: 1000000 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: amd-gpu-operator-workflow-controller + namespace: '{{ .Release.Namespace }}' +spec: + selector: + matchLabels: + app: amd-gpu-operator-workflow-controller + template: + metadata: + labels: + app: amd-gpu-operator-workflow-controller + spec: + {{- with .Values.controllerManager.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + nodeSelector: {{- toYaml .Values.controllerManager.nodeSelector | nindent 8 }} + containers: + - name: workflow-controller + command: [ "workflow-controller" ] + args: + - "--configmap" + - "amd-gpu-operator-workflow-controller-config" + env: + - name: LEADER_ELECTION_IDENTITY + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + image: {{ .Values.remediation.controller.image }} + livenessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: 6060 + initialDelaySeconds: 90 + periodSeconds: 60 + timeoutSeconds: 30 + ports: + - containerPort: 9090 + name: metrics + - containerPort: 6060 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + priorityClassName: amd-gpu-operator-workflow-controller-pc + securityContext: + runAsNonRoot: true + serviceAccountName: argo + tolerations: + - key: "amd-gpu-unhealthy" + operator: "Exists" + effect: "NoSchedule" +{{- end }} \ No newline at end of file diff --git a/helm-charts-k8s/Chart.lock b/helm-charts-k8s/Chart.lock index 21700a740..257836ff4 100644 --- a/helm-charts-k8s/Chart.lock +++ b/helm-charts-k8s/Chart.lock @@ -5,8 +5,8 @@ dependencies: - name: kmm repository: file://./charts/kmm version: v1.0.0 -- name: remediation-controller - repository: file://./charts/remediation +- name: remediation-crds + repository: file://./charts/remediation-crds version: v1.0.0 -digest: sha256:41fa6a6232514acebf6abdcb1bccaf087e134b9f413b8fa33a7fec1f58a99e07 -generated: "2026-02-01T12:58:13.380331409Z" +digest: sha256:4c6b1f3224839e54d1523759be597d20ca2fc6508eb17fda2992a95a00e1fd70 +generated: "2026-02-23T09:27:25.914887646Z" diff --git a/helm-charts-k8s/Chart.yaml b/helm-charts-k8s/Chart.yaml index 6b9f0f177..41afb478b 100644 --- a/helm-charts-k8s/Chart.yaml +++ b/helm-charts-k8s/Chart.yaml @@ -31,7 +31,7 @@ dependencies: version: v1.0.0 repository: "file://./charts/kmm" condition: kmm.enabled -- name: remediation-controller +- name: remediation-crds version: v1.0.0 - repository: "file://./charts/remediation" - condition: remediation.enabled \ No newline at end of file + repository: "file://./charts/remediation-crds" + condition: remediation.installCRDs \ No newline at end of file diff --git a/helm-charts-k8s/README.md b/helm-charts-k8s/README.md index cfe8d1dc2..cf13923a2 100644 --- a/helm-charts-k8s/README.md +++ b/helm-charts-k8s/README.md @@ -143,7 +143,7 @@ Kubernetes: `>= 1.29.0-0` | Repository | Name | Version | |------------|------|---------| | file://./charts/kmm | kmm | v1.0.0 | -| file://./charts/remediation | remediation-controller | v1.0.0 | +| file://./charts/remediation-crds | remediation-crds | v1.0.0 | | https://kubernetes-sigs.github.io/node-feature-discovery/charts | node-feature-discovery | v0.16.1 | ## Values @@ -252,7 +252,9 @@ Kubernetes: `>= 1.29.0-0` | node-feature-discovery.enabled | bool | `true` | Set to true/false to enable/disable the installation of node feature discovery (NFD) operator | | node-feature-discovery.worker.nodeSelector | object | `{}` | Set nodeSelector for NFD worker daemonset | | node-feature-discovery.worker.tolerations | list | `[{"effect":"NoExecute","key":"amd-dcm","operator":"Equal","value":"up"},{"effect":"NoSchedule","key":"amd-gpu-unhealthy","operator":"Exists"}]` | Set tolerations for NFD worker daemonset | +| remediation.controller | object | `{"image":"quay.io/argoproj/workflow-controller:v3.6.5"}` | Set the controller image for remediation workflow controller deployment | | remediation.enabled | bool | `true` | Set to true/false to enable/disable the installation of remediation workflow controller | +| remediation.installCRDs | bool | `true` | Set to true/false to enable/disable the installation of Argo CRDs used by the remediation workflow controller | | upgradeCRD | bool | `true` | CRD will be patched as pre-upgrade/pre-rollback hook when doing helm upgrade/rollback to current helm chart | | kmm.controller.affinity | object | `{"nodeAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"preference":{"matchExpressions":[{"key":"node-role.kubernetes.io/control-plane","operator":"Exists"}]},"weight":1}]}}` | Affinity for the KMM controller manager deployment | | kmm.controller.manager.args[0] | string | `"--config=controller_config.yaml"` | | @@ -317,8 +319,3 @@ Kubernetes: `>= 1.29.0-0` | kmm.webhookService.ports[0].protocol | string | `"TCP"` | | | kmm.webhookService.ports[0].targetPort | int | `9443` | | | kmm.webhookService.type | string | `"ClusterIP"` | | -| remediation-controller.controller.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].preference.matchExpressions[0].key | string | `"node-role.kubernetes.io/control-plane"` | | -| remediation-controller.controller.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].preference.matchExpressions[0].operator | string | `"Exists"` | | -| remediation-controller.controller.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution[0].weight | int | `1` | | -| remediation-controller.controller.image | string | `"quay.io/argoproj/workflow-controller:v3.6.5"` | | -| remediation-controller.controller.nodeSelector | object | `{}` | | diff --git a/helm-charts-k8s/charts/remediation-crds/.helmignore b/helm-charts-k8s/charts/remediation-crds/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts-k8s/charts/remediation-crds/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts-k8s/charts/remediation-crds/Chart.yaml b/helm-charts-k8s/charts/remediation-crds/Chart.yaml new file mode 100644 index 000000000..79d9d04fe --- /dev/null +++ b/helm-charts-k8s/charts/remediation-crds/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +name: remediation-crds +description: A Helm chart for installing Argo Workflows CRDs for remediation in AMD GPU Operator +type: application +version: v1.0.0 \ No newline at end of file diff --git a/helm-charts-k8s/charts/remediation-crds/crds/clusterworkflowtemplate-crd.yaml b/helm-charts-k8s/charts/remediation-crds/crds/clusterworkflowtemplate-crd.yaml new file mode 100644 index 000000000..d3b0d000f --- /dev/null +++ b/helm-charts-k8s/charts/remediation-crds/crds/clusterworkflowtemplate-crd.yaml @@ -0,0 +1,52 @@ +--- +# Source: remediation-crds/templates/clusterworkflowtemplate-crd.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clusterworkflowtemplates.argoproj.io + labels: + app.kubernetes.io/component: remediation + app.kubernetes.io/part-of: remediation + helm.sh/chart: remediation-crds-0.1.0 + app.kubernetes.io/name: remediation-crds + app.kubernetes.io/instance: amd-gpu + app.kubernetes.io/version: "0.1.0" + app.kubernetes.io/managed-by: Helm +spec: + group: argoproj.io + names: + kind: ClusterWorkflowTemplate + listKind: ClusterWorkflowTemplateList + plural: clusterworkflowtemplates + shortNames: + - clusterwftmpl + - cwft + singular: clusterworkflowtemplate + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + required: + - metadata + - spec + type: object + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/helm-charts-k8s/charts/remediation-crds/crds/cronworkflow-crd.yaml b/helm-charts-k8s/charts/remediation-crds/crds/cronworkflow-crd.yaml new file mode 100644 index 000000000..19e4fb02f --- /dev/null +++ b/helm-charts-k8s/charts/remediation-crds/crds/cronworkflow-crd.yaml @@ -0,0 +1,56 @@ +--- +# Source: remediation-crds/templates/cronworkflow-crd.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: cronworkflows.argoproj.io + labels: + app.kubernetes.io/component: remediation + app.kubernetes.io/part-of: remediation + helm.sh/chart: remediation-crds-0.1.0 + app.kubernetes.io/name: remediation-crds + app.kubernetes.io/instance: amd-gpu + app.kubernetes.io/version: "0.1.0" + app.kubernetes.io/managed-by: Helm +spec: + group: argoproj.io + names: + kind: CronWorkflow + listKind: CronWorkflowList + plural: cronworkflows + shortNames: + - cwf + - cronwf + singular: cronworkflow + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + status: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + required: + - metadata + - spec + type: object + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/helm-charts-k8s/charts/remediation-crds/crds/workflow-crd.yaml b/helm-charts-k8s/charts/remediation-crds/crds/workflow-crd.yaml new file mode 100644 index 000000000..f6b55ddf5 --- /dev/null +++ b/helm-charts-k8s/charts/remediation-crds/crds/workflow-crd.yaml @@ -0,0 +1,71 @@ +--- +# Source: remediation-crds/templates/workflow-crd.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: workflows.argoproj.io + labels: + app.kubernetes.io/component: remediation + app.kubernetes.io/part-of: remediation + helm.sh/chart: remediation-crds-0.1.0 + app.kubernetes.io/name: remediation-crds + app.kubernetes.io/instance: amd-gpu + app.kubernetes.io/version: "0.1.0" + app.kubernetes.io/managed-by: Helm +spec: + group: argoproj.io + names: + kind: Workflow + listKind: WorkflowList + plural: workflows + shortNames: + - wf + singular: workflow + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Status of the workflow + jsonPath: .status.phase + name: Status + type: string + - description: When the workflow was started + format: date-time + jsonPath: .status.startedAt + name: Age + type: date + - description: Human readable message indicating details about why the workflow + is in this condition. + jsonPath: .status.message + name: Message + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + status: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + required: + - metadata + - spec + type: object + served: true + storage: true + subresources: {} +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/helm-charts-k8s/charts/remediation-crds/crds/workflowartifactgctask-crd.yaml b/helm-charts-k8s/charts/remediation-crds/crds/workflowartifactgctask-crd.yaml new file mode 100644 index 000000000..c22eb3baf --- /dev/null +++ b/helm-charts-k8s/charts/remediation-crds/crds/workflowartifactgctask-crd.yaml @@ -0,0 +1,1153 @@ +--- +# Source: remediation-crds/templates/workflowartifactgctask-crd.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: workflowartifactgctasks.argoproj.io + labels: + app.kubernetes.io/component: remediation + app.kubernetes.io/part-of: remediation + helm.sh/chart: remediation-crds-0.1.0 + app.kubernetes.io/name: remediation-crds + app.kubernetes.io/instance: amd-gpu + app.kubernetes.io/version: "0.1.0" + app.kubernetes.io/managed-by: Helm +spec: + group: argoproj.io + names: + kind: WorkflowArtifactGCTask + listKind: WorkflowArtifactGCTaskList + plural: workflowartifactgctasks + shortNames: + - wfat + singular: workflowartifactgctask + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + properties: + artifactsByNode: + additionalProperties: + properties: + archiveLocation: + properties: + archiveLogs: + type: boolean + artifactory: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + url: + type: string + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - url + type: object + azure: + properties: + accountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + blob: + type: string + container: + type: string + endpoint: + type: string + useSDKCreds: + type: boolean + required: + - blob + - container + - endpoint + type: object + gcs: + properties: + bucket: + type: string + key: + type: string + serviceAccountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - key + type: object + git: + properties: + branch: + type: string + depth: + format: int64 + type: integer + disableSubmodules: + type: boolean + fetch: + items: + type: string + type: array + insecureIgnoreHostKey: + type: boolean + insecureSkipTLS: + type: boolean + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + repo: + type: string + revision: + type: string + singleBranch: + type: boolean + sshPrivateKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - repo + type: object + hdfs: + properties: + addresses: + items: + type: string + type: array + dataTransferProtection: + type: string + force: + type: boolean + hdfsUser: + type: string + krbCCacheSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbConfigConfigMap: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbKeytabSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbRealm: + type: string + krbServicePrincipalName: + type: string + krbUsername: + type: string + path: + type: string + required: + - path + type: object + http: + properties: + auth: + properties: + basicAuth: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientCert: + properties: + clientCertSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + oauth2: + properties: + clientIDSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientSecretSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + items: + properties: + key: + type: string + value: + type: string + required: + - key + type: object + type: array + scopes: + items: + type: string + type: array + tokenURLSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + headers: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + url: + type: string + required: + - url + type: object + oss: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + createBucketIfNotPresent: + type: boolean + endpoint: + type: string + key: + type: string + lifecycleRule: + properties: + markDeletionAfterDays: + format: int32 + type: integer + markInfrequentAccessAfterDays: + format: int32 + type: integer + type: object + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + securityToken: + type: string + useSDKCreds: + type: boolean + required: + - key + type: object + raw: + properties: + data: + type: string + required: + - data + type: object + s3: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + caSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + createBucketIfNotPresent: + properties: + objectLocking: + type: boolean + type: object + encryptionOptions: + properties: + enableEncryption: + type: boolean + kmsEncryptionContext: + type: string + kmsKeyId: + type: string + serverSideCustomerKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + endpoint: + type: string + insecure: + type: boolean + key: + type: string + region: + type: string + roleARN: + type: string + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + sessionTokenSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + useSDKCreds: + type: boolean + type: object + type: object + artifacts: + additionalProperties: + properties: + archive: + properties: + none: + type: object + tar: + properties: + compressionLevel: + format: int32 + type: integer + type: object + zip: + type: object + type: object + archiveLogs: + type: boolean + artifactGC: + properties: + podMetadata: + properties: + annotations: + additionalProperties: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + type: object + serviceAccountName: + type: string + strategy: + enum: + - "" + - OnWorkflowCompletion + - OnWorkflowDeletion + - Never + type: string + type: object + artifactory: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + url: + type: string + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - url + type: object + azure: + properties: + accountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + blob: + type: string + container: + type: string + endpoint: + type: string + useSDKCreds: + type: boolean + required: + - blob + - container + - endpoint + type: object + deleted: + type: boolean + from: + type: string + fromExpression: + type: string + gcs: + properties: + bucket: + type: string + key: + type: string + serviceAccountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - key + type: object + git: + properties: + branch: + type: string + depth: + format: int64 + type: integer + disableSubmodules: + type: boolean + fetch: + items: + type: string + type: array + insecureIgnoreHostKey: + type: boolean + insecureSkipTLS: + type: boolean + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + repo: + type: string + revision: + type: string + singleBranch: + type: boolean + sshPrivateKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - repo + type: object + globalName: + type: string + hdfs: + properties: + addresses: + items: + type: string + type: array + dataTransferProtection: + type: string + force: + type: boolean + hdfsUser: + type: string + krbCCacheSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbConfigConfigMap: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbKeytabSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbRealm: + type: string + krbServicePrincipalName: + type: string + krbUsername: + type: string + path: + type: string + required: + - path + type: object + http: + properties: + auth: + properties: + basicAuth: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientCert: + properties: + clientCertSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + oauth2: + properties: + clientIDSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientSecretSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + items: + properties: + key: + type: string + value: + type: string + required: + - key + type: object + type: array + scopes: + items: + type: string + type: array + tokenURLSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + headers: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + url: + type: string + required: + - url + type: object + mode: + format: int32 + type: integer + name: + type: string + optional: + type: boolean + oss: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + createBucketIfNotPresent: + type: boolean + endpoint: + type: string + key: + type: string + lifecycleRule: + properties: + markDeletionAfterDays: + format: int32 + type: integer + markInfrequentAccessAfterDays: + format: int32 + type: integer + type: object + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + securityToken: + type: string + useSDKCreds: + type: boolean + required: + - key + type: object + path: + type: string + raw: + properties: + data: + type: string + required: + - data + type: object + recurseMode: + type: boolean + s3: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + caSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + createBucketIfNotPresent: + properties: + objectLocking: + type: boolean + type: object + encryptionOptions: + properties: + enableEncryption: + type: boolean + kmsEncryptionContext: + type: string + kmsKeyId: + type: string + serverSideCustomerKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + endpoint: + type: string + insecure: + type: boolean + key: + type: string + region: + type: string + roleARN: + type: string + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + sessionTokenSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + useSDKCreds: + type: boolean + type: object + subPath: + type: string + required: + - name + type: object + type: object + type: object + type: object + type: object + status: + properties: + artifactResultsByNode: + additionalProperties: + properties: + artifactResults: + additionalProperties: + properties: + error: + type: string + name: + type: string + success: + type: boolean + required: + - name + type: object + type: object + type: object + type: object + type: object + required: + - metadata + - spec + type: object + served: true + storage: true + subresources: + status: {} +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/helm-charts-k8s/charts/remediation-crds/crds/workfloweventbinding-crd.yaml b/helm-charts-k8s/charts/remediation-crds/crds/workfloweventbinding-crd.yaml new file mode 100644 index 000000000..81d60fb47 --- /dev/null +++ b/helm-charts-k8s/charts/remediation-crds/crds/workfloweventbinding-crd.yaml @@ -0,0 +1,697 @@ +--- +# Source: remediation-crds/templates/workfloweventbinding-crd.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: workfloweventbindings.argoproj.io + labels: + app.kubernetes.io/component: remediation + app.kubernetes.io/part-of: remediation + helm.sh/chart: remediation-crds-0.1.0 + app.kubernetes.io/name: remediation-crds + app.kubernetes.io/instance: amd-gpu + app.kubernetes.io/version: "0.1.0" + app.kubernetes.io/managed-by: Helm +spec: + group: argoproj.io + names: + kind: WorkflowEventBinding + listKind: WorkflowEventBindingList + plural: workfloweventbindings + shortNames: + - wfeb + singular: workfloweventbinding + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + properties: + event: + properties: + selector: + type: string + required: + - selector + type: object + submit: + properties: + arguments: + properties: + artifacts: + items: + properties: + archive: + properties: + none: + type: object + tar: + properties: + compressionLevel: + format: int32 + type: integer + type: object + zip: + type: object + type: object + archiveLogs: + type: boolean + artifactGC: + properties: + podMetadata: + properties: + annotations: + additionalProperties: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + type: object + serviceAccountName: + type: string + strategy: + enum: + - "" + - OnWorkflowCompletion + - OnWorkflowDeletion + - Never + type: string + type: object + artifactory: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + url: + type: string + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - url + type: object + azure: + properties: + accountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + blob: + type: string + container: + type: string + endpoint: + type: string + useSDKCreds: + type: boolean + required: + - blob + - container + - endpoint + type: object + deleted: + type: boolean + from: + type: string + fromExpression: + type: string + gcs: + properties: + bucket: + type: string + key: + type: string + serviceAccountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - key + type: object + git: + properties: + branch: + type: string + depth: + format: int64 + type: integer + disableSubmodules: + type: boolean + fetch: + items: + type: string + type: array + insecureIgnoreHostKey: + type: boolean + insecureSkipTLS: + type: boolean + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + repo: + type: string + revision: + type: string + singleBranch: + type: boolean + sshPrivateKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - repo + type: object + globalName: + type: string + hdfs: + properties: + addresses: + items: + type: string + type: array + dataTransferProtection: + type: string + force: + type: boolean + hdfsUser: + type: string + krbCCacheSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbConfigConfigMap: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbKeytabSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbRealm: + type: string + krbServicePrincipalName: + type: string + krbUsername: + type: string + path: + type: string + required: + - path + type: object + http: + properties: + auth: + properties: + basicAuth: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientCert: + properties: + clientCertSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + oauth2: + properties: + clientIDSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientSecretSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + items: + properties: + key: + type: string + value: + type: string + required: + - key + type: object + type: array + scopes: + items: + type: string + type: array + tokenURLSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + headers: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + url: + type: string + required: + - url + type: object + mode: + format: int32 + type: integer + name: + type: string + optional: + type: boolean + oss: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + createBucketIfNotPresent: + type: boolean + endpoint: + type: string + key: + type: string + lifecycleRule: + properties: + markDeletionAfterDays: + format: int32 + type: integer + markInfrequentAccessAfterDays: + format: int32 + type: integer + type: object + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + securityToken: + type: string + useSDKCreds: + type: boolean + required: + - key + type: object + path: + type: string + raw: + properties: + data: + type: string + required: + - data + type: object + recurseMode: + type: boolean + s3: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + caSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + createBucketIfNotPresent: + properties: + objectLocking: + type: boolean + type: object + encryptionOptions: + properties: + enableEncryption: + type: boolean + kmsEncryptionContext: + type: string + kmsKeyId: + type: string + serverSideCustomerKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + endpoint: + type: string + insecure: + type: boolean + key: + type: string + region: + type: string + roleARN: + type: string + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + sessionTokenSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + useSDKCreds: + type: boolean + type: object + subPath: + type: string + required: + - name + type: object + type: array + parameters: + items: + properties: + default: + type: string + description: + type: string + enum: + items: + type: string + type: array + globalName: + type: string + name: + type: string + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + default: + type: string + event: + type: string + expression: + type: string + jqFilter: + type: string + jsonPath: + type: string + parameter: + type: string + path: + type: string + supplied: + type: object + type: object + required: + - name + type: object + type: array + type: object + metadata: + type: object + workflowTemplateRef: + properties: + clusterScope: + type: boolean + name: + type: string + type: object + required: + - workflowTemplateRef + type: object + required: + - event + type: object + required: + - metadata + - spec + type: object + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/helm-charts-k8s/charts/remediation-crds/crds/workflowtaskresult-crd.yaml b/helm-charts-k8s/charts/remediation-crds/crds/workflowtaskresult-crd.yaml new file mode 100644 index 000000000..994bd2266 --- /dev/null +++ b/helm-charts-k8s/charts/remediation-crds/crds/workflowtaskresult-crd.yaml @@ -0,0 +1,678 @@ +--- +# Source: remediation-crds/templates/workflowtaskresult-crd.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: workflowtaskresults.argoproj.io + labels: + app.kubernetes.io/component: remediation + app.kubernetes.io/part-of: remediation + helm.sh/chart: remediation-crds-0.1.0 + app.kubernetes.io/name: remediation-crds + app.kubernetes.io/instance: amd-gpu + app.kubernetes.io/version: "0.1.0" + app.kubernetes.io/managed-by: Helm +spec: + group: argoproj.io + names: + kind: WorkflowTaskResult + listKind: WorkflowTaskResultList + plural: workflowtaskresults + singular: workflowtaskresult + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + message: + type: string + metadata: + type: object + outputs: + properties: + artifacts: + items: + properties: + archive: + properties: + none: + type: object + tar: + properties: + compressionLevel: + format: int32 + type: integer + type: object + zip: + type: object + type: object + archiveLogs: + type: boolean + artifactGC: + properties: + podMetadata: + properties: + annotations: + additionalProperties: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + type: object + serviceAccountName: + type: string + strategy: + enum: + - "" + - OnWorkflowCompletion + - OnWorkflowDeletion + - Never + type: string + type: object + artifactory: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + url: + type: string + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - url + type: object + azure: + properties: + accountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + blob: + type: string + container: + type: string + endpoint: + type: string + useSDKCreds: + type: boolean + required: + - blob + - container + - endpoint + type: object + deleted: + type: boolean + from: + type: string + fromExpression: + type: string + gcs: + properties: + bucket: + type: string + key: + type: string + serviceAccountKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - key + type: object + git: + properties: + branch: + type: string + depth: + format: int64 + type: integer + disableSubmodules: + type: boolean + fetch: + items: + type: string + type: array + insecureIgnoreHostKey: + type: boolean + insecureSkipTLS: + type: boolean + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + repo: + type: string + revision: + type: string + singleBranch: + type: boolean + sshPrivateKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - repo + type: object + globalName: + type: string + hdfs: + properties: + addresses: + items: + type: string + type: array + dataTransferProtection: + type: string + force: + type: boolean + hdfsUser: + type: string + krbCCacheSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbConfigConfigMap: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbKeytabSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + krbRealm: + type: string + krbServicePrincipalName: + type: string + krbUsername: + type: string + path: + type: string + required: + - path + type: object + http: + properties: + auth: + properties: + basicAuth: + properties: + passwordSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + usernameSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + clientCert: + properties: + clientCertSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + oauth2: + properties: + clientIDSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + clientSecretSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + endpointParams: + items: + properties: + key: + type: string + value: + type: string + required: + - key + type: object + type: array + scopes: + items: + type: string + type: array + tokenURLSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + headers: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + url: + type: string + required: + - url + type: object + mode: + format: int32 + type: integer + name: + type: string + optional: + type: boolean + oss: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + createBucketIfNotPresent: + type: boolean + endpoint: + type: string + key: + type: string + lifecycleRule: + properties: + markDeletionAfterDays: + format: int32 + type: integer + markInfrequentAccessAfterDays: + format: int32 + type: integer + type: object + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + securityToken: + type: string + useSDKCreds: + type: boolean + required: + - key + type: object + path: + type: string + raw: + properties: + data: + type: string + required: + - data + type: object + recurseMode: + type: boolean + s3: + properties: + accessKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + bucket: + type: string + caSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + createBucketIfNotPresent: + properties: + objectLocking: + type: boolean + type: object + encryptionOptions: + properties: + enableEncryption: + type: boolean + kmsEncryptionContext: + type: string + kmsKeyId: + type: string + serverSideCustomerKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + endpoint: + type: string + insecure: + type: boolean + key: + type: string + region: + type: string + roleARN: + type: string + secretKeySecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + sessionTokenSecret: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + useSDKCreds: + type: boolean + type: object + subPath: + type: string + required: + - name + type: object + type: array + exitCode: + type: string + parameters: + items: + properties: + default: + type: string + description: + type: string + enum: + items: + type: string + type: array + globalName: + type: string + name: + type: string + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + default: + type: string + event: + type: string + expression: + type: string + jqFilter: + type: string + jsonPath: + type: string + parameter: + type: string + path: + type: string + supplied: + type: object + type: object + required: + - name + type: object + type: array + result: + type: string + type: object + phase: + type: string + progress: + type: string + required: + - metadata + type: object + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/helm-charts-k8s/charts/remediation-crds/crds/workflowtaskset-crd.yaml b/helm-charts-k8s/charts/remediation-crds/crds/workflowtaskset-crd.yaml new file mode 100644 index 000000000..3c1c8933b --- /dev/null +++ b/helm-charts-k8s/charts/remediation-crds/crds/workflowtaskset-crd.yaml @@ -0,0 +1,57 @@ +--- +# Source: remediation-crds/templates/workflowtaskset-crd.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: workflowtasksets.argoproj.io + labels: + app.kubernetes.io/component: remediation + app.kubernetes.io/part-of: remediation + helm.sh/chart: remediation-crds-0.1.0 + app.kubernetes.io/name: remediation-crds + app.kubernetes.io/instance: amd-gpu + app.kubernetes.io/version: "0.1.0" + app.kubernetes.io/managed-by: Helm +spec: + group: argoproj.io + names: + kind: WorkflowTaskSet + listKind: WorkflowTaskSetList + plural: workflowtasksets + shortNames: + - wfts + singular: workflowtaskset + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + status: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + required: + - metadata + - spec + type: object + served: true + storage: true + subresources: + status: {} +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/helm-charts-k8s/charts/remediation-crds/crds/workflowtemplate-crd.yaml b/helm-charts-k8s/charts/remediation-crds/crds/workflowtemplate-crd.yaml new file mode 100644 index 000000000..7af05742e --- /dev/null +++ b/helm-charts-k8s/charts/remediation-crds/crds/workflowtemplate-crd.yaml @@ -0,0 +1,51 @@ +--- +# Source: remediation-crds/templates/workflowtemplate-crd.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: workflowtemplates.argoproj.io + labels: + app.kubernetes.io/component: remediation + app.kubernetes.io/part-of: remediation + helm.sh/chart: remediation-crds-0.1.0 + app.kubernetes.io/name: remediation-crds + app.kubernetes.io/instance: amd-gpu + app.kubernetes.io/version: "0.1.0" + app.kubernetes.io/managed-by: Helm +spec: + group: argoproj.io + names: + kind: WorkflowTemplate + listKind: WorkflowTemplateList + plural: workflowtemplates + shortNames: + - wftmpl + singular: workflowtemplate + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + x-kubernetes-map-type: atomic + x-kubernetes-preserve-unknown-fields: true + required: + - metadata + - spec + type: object + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/helm-charts-k8s/charts/remediation-crds/templates/_helpers.tpl b/helm-charts-k8s/charts/remediation-crds/templates/_helpers.tpl new file mode 100644 index 000000000..9c133ea3e --- /dev/null +++ b/helm-charts-k8s/charts/remediation-crds/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "remediation-crds.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "remediation-crds.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "remediation-crds.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "remediation-crds.labels" -}} +helm.sh/chart: {{ include "remediation-crds.chart" . }} +{{ include "remediation-crds.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "remediation-crds.selectorLabels" -}} +app.kubernetes.io/name: {{ include "remediation-crds.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "remediation-crds.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "remediation-crds.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts-k8s/charts/remediation-crds/values.yaml b/helm-charts-k8s/charts/remediation-crds/values.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/helm-charts-k8s/charts/remediation/Chart.yaml b/helm-charts-k8s/charts/remediation/Chart.yaml deleted file mode 100644 index caf35c8e6..000000000 --- a/helm-charts-k8s/charts/remediation/Chart.yaml +++ /dev/null @@ -1,5 +0,0 @@ -apiVersion: v1 -name: remediation-controller -description: A Helm chart for remediation workflow controller for AMD GPU Operator -type: application -version: v1.0.0 \ No newline at end of file diff --git a/helm-charts-k8s/charts/remediation/templates/deployment.yaml b/helm-charts-k8s/charts/remediation/templates/deployment.yaml deleted file mode 100644 index a9bfc08a5..000000000 --- a/helm-charts-k8s/charts/remediation/templates/deployment.yaml +++ /dev/null @@ -1,3034 +0,0 @@ -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: clusterworkflowtemplates.argoproj.io -spec: - group: argoproj.io - names: - kind: ClusterWorkflowTemplate - listKind: ClusterWorkflowTemplateList - plural: clusterworkflowtemplates - shortNames: - - clusterwftmpl - - cwft - singular: clusterworkflowtemplate - scope: Cluster - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - required: - - metadata - - spec - type: object - served: true - storage: true ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: cronworkflows.argoproj.io -spec: - group: argoproj.io - names: - kind: CronWorkflow - listKind: CronWorkflowList - plural: cronworkflows - shortNames: - - cwf - - cronwf - singular: cronworkflow - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - status: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - required: - - metadata - - spec - type: object - served: true - storage: true ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: workflowartifactgctasks.argoproj.io -spec: - group: argoproj.io - names: - kind: WorkflowArtifactGCTask - listKind: WorkflowArtifactGCTaskList - plural: workflowartifactgctasks - shortNames: - - wfat - singular: workflowartifactgctask - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - properties: - artifactsByNode: - additionalProperties: - properties: - archiveLocation: - properties: - archiveLogs: - type: boolean - artifactory: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - url: - type: string - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - url - type: object - azure: - properties: - accountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - blob: - type: string - container: - type: string - endpoint: - type: string - useSDKCreds: - type: boolean - required: - - blob - - container - - endpoint - type: object - gcs: - properties: - bucket: - type: string - key: - type: string - serviceAccountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - key - type: object - git: - properties: - branch: - type: string - depth: - format: int64 - type: integer - disableSubmodules: - type: boolean - fetch: - items: - type: string - type: array - insecureIgnoreHostKey: - type: boolean - insecureSkipTLS: - type: boolean - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - repo: - type: string - revision: - type: string - singleBranch: - type: boolean - sshPrivateKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - repo - type: object - hdfs: - properties: - addresses: - items: - type: string - type: array - dataTransferProtection: - type: string - force: - type: boolean - hdfsUser: - type: string - krbCCacheSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbConfigConfigMap: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbKeytabSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbRealm: - type: string - krbServicePrincipalName: - type: string - krbUsername: - type: string - path: - type: string - required: - - path - type: object - http: - properties: - auth: - properties: - basicAuth: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - clientCert: - properties: - clientCertSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - oauth2: - properties: - clientIDSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientSecretSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - endpointParams: - items: - properties: - key: - type: string - value: - type: string - required: - - key - type: object - type: array - scopes: - items: - type: string - type: array - tokenURLSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - type: object - headers: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - url: - type: string - required: - - url - type: object - oss: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - createBucketIfNotPresent: - type: boolean - endpoint: - type: string - key: - type: string - lifecycleRule: - properties: - markDeletionAfterDays: - format: int32 - type: integer - markInfrequentAccessAfterDays: - format: int32 - type: integer - type: object - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - securityToken: - type: string - useSDKCreds: - type: boolean - required: - - key - type: object - raw: - properties: - data: - type: string - required: - - data - type: object - s3: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - caSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - createBucketIfNotPresent: - properties: - objectLocking: - type: boolean - type: object - encryptionOptions: - properties: - enableEncryption: - type: boolean - kmsEncryptionContext: - type: string - kmsKeyId: - type: string - serverSideCustomerKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - endpoint: - type: string - insecure: - type: boolean - key: - type: string - region: - type: string - roleARN: - type: string - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - sessionTokenSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - useSDKCreds: - type: boolean - type: object - type: object - artifacts: - additionalProperties: - properties: - archive: - properties: - none: - type: object - tar: - properties: - compressionLevel: - format: int32 - type: integer - type: object - zip: - type: object - type: object - archiveLogs: - type: boolean - artifactGC: - properties: - podMetadata: - properties: - annotations: - additionalProperties: - type: string - type: object - labels: - additionalProperties: - type: string - type: object - type: object - serviceAccountName: - type: string - strategy: - enum: - - "" - - OnWorkflowCompletion - - OnWorkflowDeletion - - Never - type: string - type: object - artifactory: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - url: - type: string - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - url - type: object - azure: - properties: - accountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - blob: - type: string - container: - type: string - endpoint: - type: string - useSDKCreds: - type: boolean - required: - - blob - - container - - endpoint - type: object - deleted: - type: boolean - from: - type: string - fromExpression: - type: string - gcs: - properties: - bucket: - type: string - key: - type: string - serviceAccountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - key - type: object - git: - properties: - branch: - type: string - depth: - format: int64 - type: integer - disableSubmodules: - type: boolean - fetch: - items: - type: string - type: array - insecureIgnoreHostKey: - type: boolean - insecureSkipTLS: - type: boolean - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - repo: - type: string - revision: - type: string - singleBranch: - type: boolean - sshPrivateKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - repo - type: object - globalName: - type: string - hdfs: - properties: - addresses: - items: - type: string - type: array - dataTransferProtection: - type: string - force: - type: boolean - hdfsUser: - type: string - krbCCacheSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbConfigConfigMap: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbKeytabSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbRealm: - type: string - krbServicePrincipalName: - type: string - krbUsername: - type: string - path: - type: string - required: - - path - type: object - http: - properties: - auth: - properties: - basicAuth: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - clientCert: - properties: - clientCertSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - oauth2: - properties: - clientIDSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientSecretSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - endpointParams: - items: - properties: - key: - type: string - value: - type: string - required: - - key - type: object - type: array - scopes: - items: - type: string - type: array - tokenURLSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - type: object - headers: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - url: - type: string - required: - - url - type: object - mode: - format: int32 - type: integer - name: - type: string - optional: - type: boolean - oss: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - createBucketIfNotPresent: - type: boolean - endpoint: - type: string - key: - type: string - lifecycleRule: - properties: - markDeletionAfterDays: - format: int32 - type: integer - markInfrequentAccessAfterDays: - format: int32 - type: integer - type: object - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - securityToken: - type: string - useSDKCreds: - type: boolean - required: - - key - type: object - path: - type: string - raw: - properties: - data: - type: string - required: - - data - type: object - recurseMode: - type: boolean - s3: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - caSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - createBucketIfNotPresent: - properties: - objectLocking: - type: boolean - type: object - encryptionOptions: - properties: - enableEncryption: - type: boolean - kmsEncryptionContext: - type: string - kmsKeyId: - type: string - serverSideCustomerKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - endpoint: - type: string - insecure: - type: boolean - key: - type: string - region: - type: string - roleARN: - type: string - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - sessionTokenSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - useSDKCreds: - type: boolean - type: object - subPath: - type: string - required: - - name - type: object - type: object - type: object - type: object - type: object - status: - properties: - artifactResultsByNode: - additionalProperties: - properties: - artifactResults: - additionalProperties: - properties: - error: - type: string - name: - type: string - success: - type: boolean - required: - - name - type: object - type: object - type: object - type: object - type: object - required: - - metadata - - spec - type: object - served: true - storage: true - subresources: - status: {} ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: workfloweventbindings.argoproj.io -spec: - group: argoproj.io - names: - kind: WorkflowEventBinding - listKind: WorkflowEventBindingList - plural: workfloweventbindings - shortNames: - - wfeb - singular: workfloweventbinding - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - properties: - event: - properties: - selector: - type: string - required: - - selector - type: object - submit: - properties: - arguments: - properties: - artifacts: - items: - properties: - archive: - properties: - none: - type: object - tar: - properties: - compressionLevel: - format: int32 - type: integer - type: object - zip: - type: object - type: object - archiveLogs: - type: boolean - artifactGC: - properties: - podMetadata: - properties: - annotations: - additionalProperties: - type: string - type: object - labels: - additionalProperties: - type: string - type: object - type: object - serviceAccountName: - type: string - strategy: - enum: - - "" - - OnWorkflowCompletion - - OnWorkflowDeletion - - Never - type: string - type: object - artifactory: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - url: - type: string - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - url - type: object - azure: - properties: - accountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - blob: - type: string - container: - type: string - endpoint: - type: string - useSDKCreds: - type: boolean - required: - - blob - - container - - endpoint - type: object - deleted: - type: boolean - from: - type: string - fromExpression: - type: string - gcs: - properties: - bucket: - type: string - key: - type: string - serviceAccountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - key - type: object - git: - properties: - branch: - type: string - depth: - format: int64 - type: integer - disableSubmodules: - type: boolean - fetch: - items: - type: string - type: array - insecureIgnoreHostKey: - type: boolean - insecureSkipTLS: - type: boolean - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - repo: - type: string - revision: - type: string - singleBranch: - type: boolean - sshPrivateKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - repo - type: object - globalName: - type: string - hdfs: - properties: - addresses: - items: - type: string - type: array - dataTransferProtection: - type: string - force: - type: boolean - hdfsUser: - type: string - krbCCacheSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbConfigConfigMap: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbKeytabSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbRealm: - type: string - krbServicePrincipalName: - type: string - krbUsername: - type: string - path: - type: string - required: - - path - type: object - http: - properties: - auth: - properties: - basicAuth: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - clientCert: - properties: - clientCertSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - oauth2: - properties: - clientIDSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientSecretSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - endpointParams: - items: - properties: - key: - type: string - value: - type: string - required: - - key - type: object - type: array - scopes: - items: - type: string - type: array - tokenURLSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - type: object - headers: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - url: - type: string - required: - - url - type: object - mode: - format: int32 - type: integer - name: - type: string - optional: - type: boolean - oss: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - createBucketIfNotPresent: - type: boolean - endpoint: - type: string - key: - type: string - lifecycleRule: - properties: - markDeletionAfterDays: - format: int32 - type: integer - markInfrequentAccessAfterDays: - format: int32 - type: integer - type: object - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - securityToken: - type: string - useSDKCreds: - type: boolean - required: - - key - type: object - path: - type: string - raw: - properties: - data: - type: string - required: - - data - type: object - recurseMode: - type: boolean - s3: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - caSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - createBucketIfNotPresent: - properties: - objectLocking: - type: boolean - type: object - encryptionOptions: - properties: - enableEncryption: - type: boolean - kmsEncryptionContext: - type: string - kmsKeyId: - type: string - serverSideCustomerKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - endpoint: - type: string - insecure: - type: boolean - key: - type: string - region: - type: string - roleARN: - type: string - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - sessionTokenSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - useSDKCreds: - type: boolean - type: object - subPath: - type: string - required: - - name - type: object - type: array - parameters: - items: - properties: - default: - type: string - description: - type: string - enum: - items: - type: string - type: array - globalName: - type: string - name: - type: string - value: - type: string - valueFrom: - properties: - configMapKeyRef: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - default: - type: string - event: - type: string - expression: - type: string - jqFilter: - type: string - jsonPath: - type: string - parameter: - type: string - path: - type: string - supplied: - type: object - type: object - required: - - name - type: object - type: array - type: object - metadata: - type: object - workflowTemplateRef: - properties: - clusterScope: - type: boolean - name: - type: string - type: object - required: - - workflowTemplateRef - type: object - required: - - event - type: object - required: - - metadata - - spec - type: object - served: true - storage: true ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: workflows.argoproj.io -spec: - group: argoproj.io - names: - kind: Workflow - listKind: WorkflowList - plural: workflows - shortNames: - - wf - singular: workflow - scope: Namespaced - versions: - - additionalPrinterColumns: - - description: Status of the workflow - jsonPath: .status.phase - name: Status - type: string - - description: When the workflow was started - format: date-time - jsonPath: .status.startedAt - name: Age - type: date - - description: Human readable message indicating details about why the workflow - is in this condition. - jsonPath: .status.message - name: Message - type: string - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - status: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - required: - - metadata - - spec - type: object - served: true - storage: true - subresources: {} ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: workflowtaskresults.argoproj.io -spec: - group: argoproj.io - names: - kind: WorkflowTaskResult - listKind: WorkflowTaskResultList - plural: workflowtaskresults - singular: workflowtaskresult - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - message: - type: string - metadata: - type: object - outputs: - properties: - artifacts: - items: - properties: - archive: - properties: - none: - type: object - tar: - properties: - compressionLevel: - format: int32 - type: integer - type: object - zip: - type: object - type: object - archiveLogs: - type: boolean - artifactGC: - properties: - podMetadata: - properties: - annotations: - additionalProperties: - type: string - type: object - labels: - additionalProperties: - type: string - type: object - type: object - serviceAccountName: - type: string - strategy: - enum: - - "" - - OnWorkflowCompletion - - OnWorkflowDeletion - - Never - type: string - type: object - artifactory: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - url: - type: string - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - url - type: object - azure: - properties: - accountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - blob: - type: string - container: - type: string - endpoint: - type: string - useSDKCreds: - type: boolean - required: - - blob - - container - - endpoint - type: object - deleted: - type: boolean - from: - type: string - fromExpression: - type: string - gcs: - properties: - bucket: - type: string - key: - type: string - serviceAccountKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - key - type: object - git: - properties: - branch: - type: string - depth: - format: int64 - type: integer - disableSubmodules: - type: boolean - fetch: - items: - type: string - type: array - insecureIgnoreHostKey: - type: boolean - insecureSkipTLS: - type: boolean - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - repo: - type: string - revision: - type: string - singleBranch: - type: boolean - sshPrivateKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - required: - - repo - type: object - globalName: - type: string - hdfs: - properties: - addresses: - items: - type: string - type: array - dataTransferProtection: - type: string - force: - type: boolean - hdfsUser: - type: string - krbCCacheSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbConfigConfigMap: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbKeytabSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - krbRealm: - type: string - krbServicePrincipalName: - type: string - krbUsername: - type: string - path: - type: string - required: - - path - type: object - http: - properties: - auth: - properties: - basicAuth: - properties: - passwordSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - usernameSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - clientCert: - properties: - clientCertSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - oauth2: - properties: - clientIDSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - clientSecretSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - endpointParams: - items: - properties: - key: - type: string - value: - type: string - required: - - key - type: object - type: array - scopes: - items: - type: string - type: array - tokenURLSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - type: object - headers: - items: - properties: - name: - type: string - value: - type: string - required: - - name - - value - type: object - type: array - url: - type: string - required: - - url - type: object - mode: - format: int32 - type: integer - name: - type: string - optional: - type: boolean - oss: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - createBucketIfNotPresent: - type: boolean - endpoint: - type: string - key: - type: string - lifecycleRule: - properties: - markDeletionAfterDays: - format: int32 - type: integer - markInfrequentAccessAfterDays: - format: int32 - type: integer - type: object - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - securityToken: - type: string - useSDKCreds: - type: boolean - required: - - key - type: object - path: - type: string - raw: - properties: - data: - type: string - required: - - data - type: object - recurseMode: - type: boolean - s3: - properties: - accessKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - bucket: - type: string - caSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - createBucketIfNotPresent: - properties: - objectLocking: - type: boolean - type: object - encryptionOptions: - properties: - enableEncryption: - type: boolean - kmsEncryptionContext: - type: string - kmsKeyId: - type: string - serverSideCustomerKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - endpoint: - type: string - insecure: - type: boolean - key: - type: string - region: - type: string - roleARN: - type: string - secretKeySecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - sessionTokenSecret: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - useSDKCreds: - type: boolean - type: object - subPath: - type: string - required: - - name - type: object - type: array - exitCode: - type: string - parameters: - items: - properties: - default: - type: string - description: - type: string - enum: - items: - type: string - type: array - globalName: - type: string - name: - type: string - value: - type: string - valueFrom: - properties: - configMapKeyRef: - properties: - key: - type: string - name: - default: "" - type: string - optional: - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - default: - type: string - event: - type: string - expression: - type: string - jqFilter: - type: string - jsonPath: - type: string - parameter: - type: string - path: - type: string - supplied: - type: object - type: object - required: - - name - type: object - type: array - result: - type: string - type: object - phase: - type: string - progress: - type: string - required: - - metadata - type: object - served: true - storage: true ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: workflowtasksets.argoproj.io -spec: - group: argoproj.io - names: - kind: WorkflowTaskSet - listKind: WorkflowTaskSetList - plural: workflowtasksets - shortNames: - - wfts - singular: workflowtaskset - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - status: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - required: - - metadata - - spec - type: object - served: true - storage: true - subresources: - status: {} ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: workflowtemplates.argoproj.io -spec: - group: argoproj.io - names: - kind: WorkflowTemplate - listKind: WorkflowTemplateList - plural: workflowtemplates - shortNames: - - wftmpl - singular: workflowtemplate - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - type: object - x-kubernetes-map-type: atomic - x-kubernetes-preserve-unknown-fields: true - required: - - metadata - - spec - type: object - served: true - storage: true ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: argo - namespace: '{{ .Release.Namespace }}' ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: argo-role - namespace: '{{ .Release.Namespace }}' -rules: -- apiGroups: - - coordination.k8s.io - resources: - - leases - verbs: - - create - - get - - update -- apiGroups: - - "" - resources: - - secrets - verbs: - - get ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - rbac.authorization.k8s.io/aggregate-to-admin: "true" - name: argo-aggregate-to-admin -rules: -- apiGroups: - - argoproj.io - resources: - - workflows - - workflows/finalizers - - workfloweventbindings - - workfloweventbindings/finalizers - - workflowtemplates - - workflowtemplates/finalizers - - cronworkflows - - cronworkflows/finalizers - - clusterworkflowtemplates - - clusterworkflowtemplates/finalizers - - workflowtasksets - - workflowtasksets/finalizers - - workflowtaskresults - - workflowtaskresults/finalizers - verbs: - - create - - delete - - deletecollection - - get - - list - - patch - - update - - watch ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - rbac.authorization.k8s.io/aggregate-to-edit: "true" - name: argo-aggregate-to-edit -rules: -- apiGroups: - - argoproj.io - resources: - - workflows - - workflows/finalizers - - workfloweventbindings - - workfloweventbindings/finalizers - - workflowtemplates - - workflowtemplates/finalizers - - cronworkflows - - cronworkflows/finalizers - - clusterworkflowtemplates - - clusterworkflowtemplates/finalizers - - workflowtaskresults - - workflowtaskresults/finalizers - verbs: - - create - - delete - - deletecollection - - get - - list - - patch - - update - - watch ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - rbac.authorization.k8s.io/aggregate-to-view: "true" - name: argo-aggregate-to-view -rules: -- apiGroups: - - argoproj.io - resources: - - workflows - - workflows/finalizers - - workfloweventbindings - - workfloweventbindings/finalizers - - workflowtemplates - - workflowtemplates/finalizers - - cronworkflows - - cronworkflows/finalizers - - clusterworkflowtemplates - - clusterworkflowtemplates/finalizers - - workflowtaskresults - - workflowtaskresults/finalizers - verbs: - - get - - list - - watch ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: argo-cluster-role -rules: -- apiGroups: - - "" - resources: - - pods - - pods/exec - verbs: - - create - - get - - list - - watch - - update - - patch - - delete -- apiGroups: - - "" - resources: - - configmaps - - nodes - verbs: - - get - - watch - - list -- apiGroups: - - "" - resources: - - persistentvolumeclaims - - persistentvolumeclaims/finalizers - verbs: - - create - - update - - delete - - get -- apiGroups: - - argoproj.io - resources: - - workflows - - workflows/finalizers - - workflowtasksets - - workflowtasksets/finalizers - - workflowartifactgctasks - verbs: - - get - - list - - watch - - update - - patch - - delete - - create -- apiGroups: - - argoproj.io - resources: - - workflowtemplates - - workflowtemplates/finalizers - - clusterworkflowtemplates - - clusterworkflowtemplates/finalizers - verbs: - - get - - list - - watch -- apiGroups: - - argoproj.io - resources: - - workflowtaskresults - verbs: - - get - - list - - watch - - create - - update - - patch - - delete - - deletecollection -- apiGroups: - - "" - resources: - - serviceaccounts - verbs: - - get - - list -- apiGroups: - - argoproj.io - resources: - - cronworkflows - - cronworkflows/finalizers - verbs: - - get - - list - - watch - - update - - patch - - delete -- apiGroups: - - "" - resources: - - events - verbs: - - create - - patch - - get - - list -- apiGroups: - - policy - resources: - - poddisruptionbudgets - verbs: - - create - - get - - delete -- apiGroups: - - "" - resourceNames: - - argo-workflows-agent-ca-certificates - resources: - - secrets - verbs: - - get ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: argo-binding - namespace: '{{ .Release.Namespace }}' -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: argo-role -subjects: -- kind: ServiceAccount - name: argo - namespace: '{{ .Release.Namespace }}' ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: argo-binding -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: argo-cluster-role -subjects: -- kind: ServiceAccount - name: argo - namespace: '{{ .Release.Namespace }}' ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: workflow-controller-configmap - namespace: '{{ .Release.Namespace }}' ---- -apiVersion: scheduling.k8s.io/v1 -kind: PriorityClass -metadata: - name: workflow-controller -value: 1000000 ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: amd-gpu-operator-workflow-controller - namespace: '{{ .Release.Namespace }}' -spec: - selector: - matchLabels: - app: amd-gpu-operator-workflow-controller - template: - metadata: - labels: - app: amd-gpu-operator-workflow-controller - spec: - {{- with .Values.controller.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - nodeSelector: {{- toYaml .Values.controller.nodeSelector | nindent 8 }} - containers: - - args: [] - command: - - workflow-controller - env: - - name: LEADER_ELECTION_IDENTITY - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.name - image: {{ .Values.controller.image }} - livenessProbe: - failureThreshold: 3 - httpGet: - path: /healthz - port: 6060 - initialDelaySeconds: 90 - periodSeconds: 60 - timeoutSeconds: 30 - name: workflow-controller - ports: - - containerPort: 9090 - name: metrics - - containerPort: 6060 - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - priorityClassName: workflow-controller - securityContext: - runAsNonRoot: true - serviceAccountName: argo - tolerations: - - key: "amd-gpu-unhealthy" - operator: "Exists" - effect: "NoSchedule" \ No newline at end of file diff --git a/helm-charts-k8s/charts/remediation/values.yaml b/helm-charts-k8s/charts/remediation/values.yaml deleted file mode 100644 index 83339b213..000000000 --- a/helm-charts-k8s/charts/remediation/values.yaml +++ /dev/null @@ -1,11 +0,0 @@ -controller: - image: "quay.io/argoproj/workflow-controller:v3.6.5" - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 1 - preference: - matchExpressions: - - key: node-role.kubernetes.io/control-plane - operator: Exists - nodeSelector: {} \ No newline at end of file diff --git a/helm-charts-k8s/templates/post-delete-hook.yaml b/helm-charts-k8s/templates/post-delete-hook.yaml index da7c287d4..05fa5306d 100644 --- a/helm-charts-k8s/templates/post-delete-hook.yaml +++ b/helm-charts-k8s/templates/post-delete-hook.yaml @@ -83,9 +83,37 @@ spec: if kubectl get crds deviceconfigs.amd.com > /dev/null 2>&1; then kubectl delete crds deviceconfigs.amd.com fi + {{- if .Values.remediation.enabled }} if kubectl get crds remediationworkflowstatuses.amd.com > /dev/null 2>&1; then kubectl delete crds remediationworkflowstatuses.amd.com fi + {{- end }} + {{- if and .Values.remediation.enabled .Values.remediation.installCRDs }} + if kubectl get crds clusterworkflowtemplates.argoproj.io > /dev/null 2>&1; then + kubectl delete crds clusterworkflowtemplates.argoproj.io + fi + if kubectl get crds cronworkflows.argoproj.io > /dev/null 2>&1; then + kubectl delete crds cronworkflows.argoproj.io + fi + if kubectl get crds workflows.argoproj.io > /dev/null 2>&1; then + kubectl delete crds workflows.argoproj.io + fi + if kubectl get crds workflowartifactgctasks.argoproj.io > /dev/null 2>&1; then + kubectl delete crds workflowartifactgctasks.argoproj.io + fi + if kubectl get crds workfloweventbindings.argoproj.io > /dev/null 2>&1; then + kubectl delete crds workfloweventbindings.argoproj.io + fi + if kubectl get crds workflowtaskresults.argoproj.io > /dev/null 2>&1; then + kubectl delete crds workflowtaskresults.argoproj.io + fi + if kubectl get crds workflowtasksets.argoproj.io > /dev/null 2>&1; then + kubectl delete crds workflowtasksets.argoproj.io + fi + if kubectl get crds workflowtemplates.argoproj.io > /dev/null 2>&1; then + kubectl delete crds workflowtemplates.argoproj.io + fi + {{- end }} {{- if index .Values "node-feature-discovery" "enabled" }} if kubectl get crds nodefeaturegroups.nfd.k8s-sigs.io > /dev/null 2>&1; then kubectl delete crds nodefeaturegroups.nfd.k8s-sigs.io diff --git a/helm-charts-k8s/templates/pre-upgrade-hook.yaml b/helm-charts-k8s/templates/pre-upgrade-hook.yaml index 600edc47b..2fb5e37db 100644 --- a/helm-charts-k8s/templates/pre-upgrade-hook.yaml +++ b/helm-charts-k8s/templates/pre-upgrade-hook.yaml @@ -224,7 +224,19 @@ spec: kubectl apply -f /opt/helm-charts-crds-k8s/module-crd.yaml kubectl apply -f /opt/helm-charts-crds-k8s/nodemodulesconfig-crd.yaml {{- end }} + {{- if .Values.remediation.enabled }} kubectl apply -f /opt/helm-charts-crds-k8s/remediationworkflowstatus-crd.yaml + {{- if .Values.remediation.installCRDs }} + kubectl apply -f /opt/helm-charts-crds-k8s/clusterworkflowtemplate-crd.yaml + kubectl apply -f /opt/helm-charts-crds-k8s/cronworkflow-crd.yaml + kubectl apply -f /opt/helm-charts-crds-k8s/workflowartifactgctask-crd.yaml + kubectl apply -f /opt/helm-charts-crds-k8s/workflow-crd.yaml + kubectl apply -f /opt/helm-charts-crds-k8s/workfloweventbinding-crd.yaml + kubectl apply -f /opt/helm-charts-crds-k8s/workflowtaskresult-crd.yaml + kubectl apply -f /opt/helm-charts-crds-k8s/workflowtaskset-crd.yaml + kubectl apply -f /opt/helm-charts-crds-k8s/workflowtemplate-crd.yaml + {{- end }} + {{- end }} restartPolicy: OnFailure {{- end }} # Run helm upgrade with --no-hooks to bypass the pre-upgrade hook \ No newline at end of file diff --git a/helm-charts-k8s/templates/remediation-deployment.yaml b/helm-charts-k8s/templates/remediation-deployment.yaml new file mode 100644 index 000000000..6aaca100b --- /dev/null +++ b/helm-charts-k8s/templates/remediation-deployment.yaml @@ -0,0 +1,344 @@ +{{- if .Values.remediation.enabled }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: argo + namespace: '{{ .Release.Namespace }}' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: argo-role + namespace: '{{ .Release.Namespace }}' +rules: +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - update +- apiGroups: + - "" + resources: + - secrets + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + rbac.authorization.k8s.io/aggregate-to-admin: "true" + name: argo-aggregate-to-admin +rules: +- apiGroups: + - argoproj.io + resources: + - workflows + - workflows/finalizers + - workfloweventbindings + - workfloweventbindings/finalizers + - workflowtemplates + - workflowtemplates/finalizers + - cronworkflows + - cronworkflows/finalizers + - clusterworkflowtemplates + - clusterworkflowtemplates/finalizers + - workflowtasksets + - workflowtasksets/finalizers + - workflowtaskresults + - workflowtaskresults/finalizers + verbs: + - create + - delete + - deletecollection + - get + - list + - patch + - update + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + rbac.authorization.k8s.io/aggregate-to-edit: "true" + name: argo-aggregate-to-edit +rules: +- apiGroups: + - argoproj.io + resources: + - workflows + - workflows/finalizers + - workfloweventbindings + - workfloweventbindings/finalizers + - workflowtemplates + - workflowtemplates/finalizers + - cronworkflows + - cronworkflows/finalizers + - clusterworkflowtemplates + - clusterworkflowtemplates/finalizers + - workflowtaskresults + - workflowtaskresults/finalizers + verbs: + - create + - delete + - deletecollection + - get + - list + - patch + - update + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + rbac.authorization.k8s.io/aggregate-to-view: "true" + name: argo-aggregate-to-view +rules: +- apiGroups: + - argoproj.io + resources: + - workflows + - workflows/finalizers + - workfloweventbindings + - workfloweventbindings/finalizers + - workflowtemplates + - workflowtemplates/finalizers + - cronworkflows + - cronworkflows/finalizers + - clusterworkflowtemplates + - clusterworkflowtemplates/finalizers + - workflowtaskresults + - workflowtaskresults/finalizers + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: argo-cluster-role +rules: +- apiGroups: + - "" + resources: + - pods + - pods/exec + verbs: + - create + - get + - list + - watch + - update + - patch + - delete +- apiGroups: + - "" + resources: + - configmaps + - nodes + verbs: + - get + - watch + - list +- apiGroups: + - "" + resources: + - persistentvolumeclaims + - persistentvolumeclaims/finalizers + verbs: + - create + - update + - delete + - get +- apiGroups: + - argoproj.io + resources: + - workflows + - workflows/finalizers + - workflowtasksets + - workflowtasksets/finalizers + - workflowartifactgctasks + verbs: + - get + - list + - watch + - update + - patch + - delete + - create +- apiGroups: + - argoproj.io + resources: + - workflowtemplates + - workflowtemplates/finalizers + - clusterworkflowtemplates + - clusterworkflowtemplates/finalizers + verbs: + - get + - list + - watch +- apiGroups: + - argoproj.io + resources: + - workflowtaskresults + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - deletecollection +- apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - get + - list +- apiGroups: + - argoproj.io + resources: + - cronworkflows + - cronworkflows/finalizers + verbs: + - get + - list + - watch + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - get + - list +- apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - create + - get + - delete +- apiGroups: + - "" + resourceNames: + - argo-workflows-agent-ca-certificates + resources: + - secrets + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: argo-binding + namespace: '{{ .Release.Namespace }}' +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: argo-role +subjects: +- kind: ServiceAccount + name: argo + namespace: '{{ .Release.Namespace }}' +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: argo-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: argo-cluster-role +subjects: +- kind: ServiceAccount + name: argo + namespace: '{{ .Release.Namespace }}' +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: amd-gpu-operator-workflow-controller-config + namespace: '{{ .Release.Namespace }}' +data: + instanceID: amd-gpu-operator-remediation-workflow +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: amd-gpu-operator-workflow-controller-pc +value: 1000000 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: amd-gpu-operator-workflow-controller + namespace: '{{ .Release.Namespace }}' +spec: + selector: + matchLabels: + app: amd-gpu-operator-workflow-controller + template: + metadata: + labels: + app: amd-gpu-operator-workflow-controller + spec: + {{- with .Values.controllerManager.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + nodeSelector: {{- toYaml .Values.controllerManager.nodeSelector | nindent 8 }} + containers: + - name: workflow-controller + command: [ "workflow-controller" ] + args: + - "--configmap" + - "amd-gpu-operator-workflow-controller-config" + env: + - name: LEADER_ELECTION_IDENTITY + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + image: {{ .Values.remediation.controller.image }} + livenessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: 6060 + initialDelaySeconds: 90 + periodSeconds: 60 + timeoutSeconds: 30 + ports: + - containerPort: 9090 + name: metrics + - containerPort: 6060 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + priorityClassName: amd-gpu-operator-workflow-controller-pc + securityContext: + runAsNonRoot: true + serviceAccountName: argo + tolerations: + - key: "amd-gpu-unhealthy" + operator: "Exists" + effect: "NoSchedule" +{{- end }} \ No newline at end of file diff --git a/helm-charts-k8s/values.yaml b/helm-charts-k8s/values.yaml index 6b14cebe6..943a3949d 100644 --- a/helm-charts-k8s/values.yaml +++ b/helm-charts-k8s/values.yaml @@ -23,6 +23,11 @@ kmm: remediation: # -- Set to true/false to enable/disable the installation of remediation workflow controller enabled: true + # -- Set to true/false to enable/disable the installation of Argo CRDs used by the remediation workflow controller + installCRDs: true + # -- Set the controller image for remediation workflow controller deployment + controller: + image: "quay.io/argoproj/workflow-controller:v3.6.5" # -- Default NFD rule will detect amd gpu based on pci vendor ID installdefaultNFDRule: true # -- CRD will be patched as pre-upgrade/pre-rollback hook when doing helm upgrade/rollback to current helm chart diff --git a/internal/controllers/mock_remediation_handler.go b/internal/controllers/mock_remediation_handler.go index de0fca3b5..4afadfddf 100644 --- a/internal/controllers/mock_remediation_handler.go +++ b/internal/controllers/mock_remediation_handler.go @@ -33,6 +33,7 @@ import ( v1alpha10 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" gomock "go.uber.org/mock/gomock" v1 "k8s.io/api/core/v1" + labels "k8s.io/apimachinery/pkg/labels" controllerruntime "sigs.k8s.io/controller-runtime" ) @@ -267,6 +268,20 @@ func (mr *MockremediationMgrHelperAPIMockRecorder) createWorkflow(ctx, workflow return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "createWorkflow", reflect.TypeOf((*MockremediationMgrHelperAPI)(nil).createWorkflow), ctx, workflow) } +// customTaintsChanged mocks base method. +func (m *MockremediationMgrHelperAPI) customTaintsChanged(devConfig *v1alpha1.DeviceConfig) bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "customTaintsChanged", devConfig) + ret0, _ := ret[0].(bool) + return ret0 +} + +// customTaintsChanged indicates an expected call of customTaintsChanged. +func (mr *MockremediationMgrHelperAPIMockRecorder) customTaintsChanged(devConfig any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "customTaintsChanged", reflect.TypeOf((*MockremediationMgrHelperAPI)(nil).customTaintsChanged), devConfig) +} + // deleteConfigMap mocks base method. func (m *MockremediationMgrHelperAPI) deleteConfigMap(ctx context.Context, name, namespace string) error { m.ctrl.T.Helper() @@ -510,6 +525,18 @@ func (mr *MockremediationMgrHelperAPIMockRecorder) getWorkflowUtilityImage(devCo return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "getWorkflowUtilityImage", reflect.TypeOf((*MockremediationMgrHelperAPI)(nil).getWorkflowUtilityImage), devConfig) } +// handleDeviceConfigChanges mocks base method. +func (m *MockremediationMgrHelperAPI) handleDeviceConfigChanges(ctx context.Context, devConfig *v1alpha1.DeviceConfig) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "handleDeviceConfigChanges", ctx, devConfig) +} + +// handleDeviceConfigChanges indicates an expected call of handleDeviceConfigChanges. +func (mr *MockremediationMgrHelperAPIMockRecorder) handleDeviceConfigChanges(ctx, devConfig any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "handleDeviceConfigChanges", reflect.TypeOf((*MockremediationMgrHelperAPI)(nil).handleDeviceConfigChanges), ctx, devConfig) +} + // handleExistingWorkflowsOnNode mocks base method. func (m *MockremediationMgrHelperAPI) handleExistingWorkflowsOnNode(ctx context.Context, devConfig *v1alpha1.DeviceConfig, node *v1.Node, mapping ConditionWorkflowMapping) bool { m.ctrl.T.Helper() @@ -735,6 +762,60 @@ func (mr *MockremediationMgrHelperAPIMockRecorder) syncInternalMapFromStatusCR(c return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "syncInternalMapFromStatusCR", reflect.TypeOf((*MockremediationMgrHelperAPI)(nil).syncInternalMapFromStatusCR), ctx, namespace) } +// updateCustomTolerations mocks base method. +func (m *MockremediationMgrHelperAPI) updateCustomTolerations(ctx context.Context, devConfig *v1alpha1.DeviceConfig) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "updateCustomTolerations", ctx, devConfig) + ret0, _ := ret[0].(error) + return ret0 +} + +// updateCustomTolerations indicates an expected call of updateCustomTolerations. +func (mr *MockremediationMgrHelperAPIMockRecorder) updateCustomTolerations(ctx, devConfig any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "updateCustomTolerations", reflect.TypeOf((*MockremediationMgrHelperAPI)(nil).updateCustomTolerations), ctx, devConfig) +} + +// updateCustomTolerationsCache mocks base method. +func (m *MockremediationMgrHelperAPI) updateCustomTolerationsCache(devConfig *v1alpha1.DeviceConfig) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "updateCustomTolerationsCache", devConfig) +} + +// updateCustomTolerationsCache indicates an expected call of updateCustomTolerationsCache. +func (mr *MockremediationMgrHelperAPIMockRecorder) updateCustomTolerationsCache(devConfig any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "updateCustomTolerationsCache", reflect.TypeOf((*MockremediationMgrHelperAPI)(nil).updateCustomTolerationsCache), devConfig) +} + +// updateCustomTolerationsOnDaemonset mocks base method. +func (m *MockremediationMgrHelperAPI) updateCustomTolerationsOnDaemonset(ctx context.Context, devConfig *v1alpha1.DeviceConfig, daemonsetLabelSelector labels.Selector, tolerations []v1.Toleration) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "updateCustomTolerationsOnDaemonset", ctx, devConfig, daemonsetLabelSelector, tolerations) + ret0, _ := ret[0].(error) + return ret0 +} + +// updateCustomTolerationsOnDaemonset indicates an expected call of updateCustomTolerationsOnDaemonset. +func (mr *MockremediationMgrHelperAPIMockRecorder) updateCustomTolerationsOnDaemonset(ctx, devConfig, daemonsetLabelSelector, tolerations any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "updateCustomTolerationsOnDaemonset", reflect.TypeOf((*MockremediationMgrHelperAPI)(nil).updateCustomTolerationsOnDaemonset), ctx, devConfig, daemonsetLabelSelector, tolerations) +} + +// updateCustomTolerationsOnDeployment mocks base method. +func (m *MockremediationMgrHelperAPI) updateCustomTolerationsOnDeployment(ctx context.Context, devConfig *v1alpha1.DeviceConfig, deploymentSelector labels.Selector, tolerations []v1.Toleration) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "updateCustomTolerationsOnDeployment", ctx, devConfig, deploymentSelector, tolerations) + ret0, _ := ret[0].(error) + return ret0 +} + +// updateCustomTolerationsOnDeployment indicates an expected call of updateCustomTolerationsOnDeployment. +func (mr *MockremediationMgrHelperAPIMockRecorder) updateCustomTolerationsOnDeployment(ctx, devConfig, deploymentSelector, tolerations any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "updateCustomTolerationsOnDeployment", reflect.TypeOf((*MockremediationMgrHelperAPI)(nil).updateCustomTolerationsOnDeployment), ctx, devConfig, deploymentSelector, tolerations) +} + // updateMaxParallelWorkflows mocks base method. func (m *MockremediationMgrHelperAPI) updateMaxParallelWorkflows(ctx context.Context, devConfig *v1alpha1.DeviceConfig) error { m.ctrl.T.Helper() diff --git a/internal/controllers/remediation_handler.go b/internal/controllers/remediation_handler.go index fc423c11d..f9922d46b 100644 --- a/internal/controllers/remediation_handler.go +++ b/internal/controllers/remediation_handler.go @@ -49,8 +49,10 @@ import ( amdv1alpha1 "github.com/ROCm/gpu-operator/api/v1alpha1" workflowv1alpha1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/util/retry" @@ -85,7 +87,9 @@ const ( AbortWorkflowLabelValue = "true" RemediationFilesPath = "/remediation" DefaultInitContainerImage = "busybox:1.36" - ArgoWorkflowControllerConfigMap = "workflow-controller-configmap" + ArgoWorkflowControllerConfigMap = "amd-gpu-operator-workflow-controller-config" + ArgoWorkflowInstaceIDLabelKey = "workflows.argoproj.io/controller-instanceid" + ArgoWorkflowInstaceIDLabelValue = "amd-gpu-operator-remediation-workflow" ) type RecoveryPolicyConfig struct { @@ -212,6 +216,10 @@ func (n *remediationMgr) HandleRemediation(ctx context.Context, devConfig *amdv1 // Populate Workflow Object wf := n.helper.populateWorkflow(ctx, wfTemplate, &mapping, node.Name, devConfig) + // Handle custom taints present in Device config. + // this needs to be done before creating the workflow as taints are applied as part of workflow execution and if there are custom taints defined, those need to be added to workflow tolerations as well + n.helper.handleDeviceConfigChanges(ctx, devConfig) + // Create Workflow if err := n.helper.createWorkflow(ctx, wf); err != nil { logger.Error(err, fmt.Sprintf("Failed to create remediation workflow %s on node %s", mapping.WorkflowTemplate, node.Name)) @@ -316,6 +324,12 @@ type remediationMgrHelperAPI interface { getNodeLabelsFromCR(ctx context.Context, devConfig *amdv1alpha1.DeviceConfig) []string getNodeTaints(ctx context.Context, devConfig *amdv1alpha1.DeviceConfig, nodeCondition string) []string applyTolerationsToWorkflow(wf *workflowv1alpha1.Workflow, devConfig *amdv1alpha1.DeviceConfig, nodeCondition string) + handleDeviceConfigChanges(ctx context.Context, devConfig *amdv1alpha1.DeviceConfig) + updateCustomTolerations(ctx context.Context, devConfig *amdv1alpha1.DeviceConfig) error + updateCustomTolerationsCache(devConfig *amdv1alpha1.DeviceConfig) + updateCustomTolerationsOnDeployment(ctx context.Context, devConfig *amdv1alpha1.DeviceConfig, deploymentSelector labels.Selector, tolerations []v1.Toleration) error + updateCustomTolerationsOnDaemonset(ctx context.Context, devConfig *amdv1alpha1.DeviceConfig, daemonsetLabelSelector labels.Selector, tolerations []v1.Toleration) error + customTaintsChanged(devConfig *amdv1alpha1.DeviceConfig) bool } type remediationMgrHelper struct { @@ -324,14 +338,16 @@ type remediationMgrHelper struct { recoveryTracker *sync.Map serviceAccountName string maxParallelWorkflows int + tolerationsCache *sync.Map } // Initialize remediation manager helper interface func newRemediationMgrHelperHandler(client client.Client, k8sInterface kubernetes.Interface) remediationMgrHelperAPI { return &remediationMgrHelper{ - client: client, - k8sInterface: k8sInterface, - recoveryTracker: new(sync.Map), + client: client, + k8sInterface: k8sInterface, + recoveryTracker: new(sync.Map), + tolerationsCache: new(sync.Map), } } @@ -533,6 +549,12 @@ func (h *remediationMgrHelper) createDefaultWorkflowTemplate(ctx context.Context return nil, err } + instanceIDMeta := workflowv1alpha1.Metadata{ + Labels: map[string]string{ + ArgoWorkflowInstaceIDLabelKey: ArgoWorkflowInstaceIDLabelValue, + }, + } + notifyTemplate := &workflowv1alpha1.WorkflowTemplate{ ObjectMeta: metav1.ObjectMeta{ Name: "event-notify-template", @@ -542,7 +564,8 @@ func (h *remediationMgrHelper) createDefaultWorkflowTemplate(ctx context.Context Entrypoint: "notify", Templates: []workflowv1alpha1.Template{ { - Name: "notify", + Name: "notify", + Metadata: instanceIDMeta, Inputs: workflowv1alpha1.Inputs{ Parameters: []workflowv1alpha1.Parameter{ { @@ -606,7 +629,8 @@ func (h *remediationMgrHelper) createDefaultWorkflowTemplate(ctx context.Context Entrypoint: "inbuilt", Templates: []workflowv1alpha1.Template{ { - Name: "inbuilt", + Name: "inbuilt", + Metadata: instanceIDMeta, Steps: []workflowv1alpha1.ParallelSteps{ {Steps: []workflowv1alpha1.WorkflowStep{{Name: "autostart", Template: "suspend", When: "{{workflow.parameters.auto_start}} == 'false'"}}}, // If auto start is disabled, workflow will be created in suspended state and needs to be manually resumed by user {Steps: []workflowv1alpha1.WorkflowStep{{Name: "applylabels", Template: "applylabels"}}}, @@ -667,7 +691,8 @@ func (h *remediationMgrHelper) createDefaultWorkflowTemplate(ctx context.Context }, }, { - Name: "taint", + Name: "taint", + Metadata: instanceIDMeta, Inputs: workflowv1alpha1.Inputs{ Parameters: []workflowv1alpha1.Parameter{ { @@ -686,11 +711,13 @@ func (h *remediationMgrHelper) createDefaultWorkflowTemplate(ctx context.Context }, }, { - Name: "suspend", - Suspend: &workflowv1alpha1.SuspendTemplate{}, + Name: "suspend", + Metadata: instanceIDMeta, + Suspend: &workflowv1alpha1.SuspendTemplate{}, }, { - Name: "drain", + Name: "drain", + Metadata: instanceIDMeta, Inputs: workflowv1alpha1.Inputs{ Parameters: []workflowv1alpha1.Parameter{ { @@ -706,6 +733,7 @@ func (h *remediationMgrHelper) createDefaultWorkflowTemplate(ctx context.Context }, { Name: "reboot", + Metadata: instanceIDMeta, Container: &rebootContainer, PodSpecPatch: ` hostPID: true @@ -717,7 +745,8 @@ containers: `, }, { - Name: "test", + Name: "test", + Metadata: instanceIDMeta, Inputs: workflowv1alpha1.Inputs{ Parameters: []workflowv1alpha1.Parameter{ { @@ -768,7 +797,8 @@ containers: }, }, { - Name: "wait", + Name: "wait", + Metadata: instanceIDMeta, Inputs: workflowv1alpha1.Inputs{ Parameters: []workflowv1alpha1.Parameter{ { @@ -787,7 +817,8 @@ containers: }, }, { - Name: "untaint", + Name: "untaint", + Metadata: instanceIDMeta, Inputs: workflowv1alpha1.Inputs{ Parameters: []workflowv1alpha1.Parameter{ { @@ -802,14 +833,16 @@ containers: }, }, { - Name: "failworkflow", + Name: "failworkflow", + Metadata: instanceIDMeta, Script: &workflowv1alpha1.ScriptTemplate{ Source: `echo "Failing workflow" && exit 1`, Container: utilityContainer, }, }, { - Name: "applylabels", + Name: "applylabels", + Metadata: instanceIDMeta, Inputs: workflowv1alpha1.Inputs{ Parameters: []workflowv1alpha1.Parameter{ { @@ -828,7 +861,8 @@ containers: }, }, { - Name: "removelabels", + Name: "removelabels", + Metadata: instanceIDMeta, Inputs: workflowv1alpha1.Inputs{ Parameters: []workflowv1alpha1.Parameter{ { @@ -918,7 +952,7 @@ func (h *remediationMgrHelper) updateMaxParallelWorkflows(ctx context.Context, d } // Update parallelism in Argo workflow controller configmap. // https://github.com/argoproj/argo-workflows/blob/main/config/config.go#L69 - acm.Data["parallelism"] = strconv.Itoa(devConfig.Spec.RemediationWorkflow.MaxParallelWorkflows) + acm.Data["namespaceParallelism"] = strconv.Itoa(devConfig.Spec.RemediationWorkflow.MaxParallelWorkflows) return h.client.Update(ctx, acm) }) if err != nil { @@ -936,6 +970,9 @@ func (h *remediationMgrHelper) populateWorkflow(ctx context.Context, wfTemplate ObjectMeta: metav1.ObjectMeta{ GenerateName: fmt.Sprintf("%s-%s-", nodeName, mapping.WorkflowTemplate), Namespace: devConfig.Namespace, + Labels: map[string]string{ + ArgoWorkflowInstaceIDLabelKey: ArgoWorkflowInstaceIDLabelValue, + }, }, Spec: *wfTemplate.Spec.DeepCopy(), } @@ -1640,3 +1677,162 @@ func (h *remediationMgrHelper) applyTolerationsToWorkflow(wf *workflowv1alpha1.W } } } + +func (h *remediationMgrHelper) customTaintsChanged(devConfig *amdv1alpha1.DeviceConfig) bool { + if len(devConfig.Spec.RemediationWorkflow.NodeRemediationTaints) > 0 { + for _, taint := range devConfig.Spec.RemediationWorkflow.NodeRemediationTaints { + if val, ok := h.tolerationsCache.Load(taint.Key); !ok || taint.Value != val.(string) { + return true + } + } + } + return false +} + +func getTolerations(devConfig *amdv1alpha1.DeviceConfig, controller bool) []v1.Toleration { + tolerations := make([]v1.Toleration, 0) + //add default remediation taint + tolerations = append(tolerations, v1.Toleration{ + Key: RemediationTaintKey, + Operator: v1.TolerationOpExists, + Effect: v1.TaintEffectNoSchedule, + }) + //add tolerations for user configured taints + for _, taint := range devConfig.Spec.RemediationWorkflow.NodeRemediationTaints { + tolerations = append(tolerations, v1.Toleration{ + Key: taint.Key, + Operator: v1.TolerationOpExists, + Effect: taint.Effect, + }) + } + if controller { + controllerTolerations := []v1.Toleration{ + v1.Toleration{ + Key: "amd-gpu-driver-upgrade", + Value: "true", + Operator: v1.TolerationOpEqual, + Effect: v1.TaintEffectNoSchedule, + }, + v1.Toleration{ + Key: "amd-dcm", + Value: "up", + Operator: v1.TolerationOpEqual, + }, + } + tolerations = append(tolerations, controllerTolerations...) + } + + return tolerations +} + +func (h *remediationMgrHelper) updateCustomTolerationsOnDeployment(ctx context.Context, devConfig *amdv1alpha1.DeviceConfig, deploymentSelector labels.Selector, tolerations []v1.Toleration) error { + logger := log.FromContext(ctx) + + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + deploymentList := &appsv1.DeploymentList{} + err := h.client.List(ctx, deploymentList, &client.ListOptions{Namespace: devConfig.Namespace, LabelSelector: deploymentSelector}) + if err != nil { + logger.Error(err, fmt.Sprintf("Failed to list deployments for given selector: %v", deploymentSelector)) + return err + } + if len(deploymentList.Items) > 0 { + for i := range deploymentList.Items { + deploymentList.Items[i].Spec.Template.Spec.Tolerations = tolerations + err = h.client.Update(ctx, &deploymentList.Items[i]) + if err != nil { + return err + } + } + } + return nil + }) + if err != nil { + logger.Error(err, fmt.Sprintf("Failed to update tolerations in deployment with selector: %v", deploymentSelector)) + } else { + logger.Info(fmt.Sprintf("Updated tolerations in deployment with selector: %v successfully", deploymentSelector)) + } + return err +} + +func (h *remediationMgrHelper) updateCustomTolerationsOnDaemonset(ctx context.Context, devConfig *amdv1alpha1.DeviceConfig, daemonsetLabelSelector labels.Selector, tolerations []v1.Toleration) error { + logger := log.FromContext(ctx) + + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + daemonsetList := &appsv1.DaemonSetList{} + err := h.client.List(ctx, daemonsetList, &client.ListOptions{Namespace: devConfig.Namespace, LabelSelector: daemonsetLabelSelector}) + if err != nil { + logger.Error(err, fmt.Sprintf("Failed to list daemonsets for given selector: %v", daemonsetLabelSelector)) + return err + } + if len(daemonsetList.Items) > 0 { + for i := range daemonsetList.Items { + daemonsetList.Items[i].Spec.Template.Spec.Tolerations = tolerations + err = h.client.Update(ctx, &daemonsetList.Items[i]) + if err != nil { + return err + } + } + } + return nil + }) + if err != nil { + logger.Error(err, fmt.Sprintf("Failed to update tolerations in daemonset with selector: %v", daemonsetLabelSelector)) + } else { + logger.Info(fmt.Sprintf("Updated tolerations in daemonset with selector: %v successfully", daemonsetLabelSelector)) + } + + return err +} + +func (h *remediationMgrHelper) updateCustomTolerationsCache(devConfig *amdv1alpha1.DeviceConfig) { + // sync in-memory cache + h.tolerationsCache = new(sync.Map) + h.tolerationsCache.Store(RemediationTaintKey, string(v1.TaintEffectNoSchedule)) + for _, taint := range devConfig.Spec.RemediationWorkflow.NodeRemediationTaints { + h.tolerationsCache.Store(taint.Key, string(taint.Effect)) + } +} + +func (h *remediationMgrHelper) updateCustomTolerations(ctx context.Context, devConfig *amdv1alpha1.DeviceConfig) error { + logger := log.FromContext(ctx) + tolerations := getTolerations(devConfig, false) + controllerTolerations := getTolerations(devConfig, true) + + wfControllerSelector := labels.SelectorFromSet(map[string]string{ + "app": "amd-gpu-operator-workflow-controller", + }) + controllerSelector := labels.SelectorFromSet(map[string]string{ + "control-plane": "controller-manager", + }) + daemonsetLabelSelector := labels.SelectorFromSet(map[string]string{ + "app.kubernetes.io/name": "node-feature-discovery", + "role": "worker", + }) + allUpdatesSuccessful := true + err := h.updateCustomTolerationsOnDeployment(ctx, devConfig, wfControllerSelector, tolerations) + if err != nil { + allUpdatesSuccessful = false + } + err = h.updateCustomTolerationsOnDeployment(ctx, devConfig, controllerSelector, controllerTolerations) + if err != nil { + allUpdatesSuccessful = false + } + err = h.updateCustomTolerationsOnDaemonset(ctx, devConfig, daemonsetLabelSelector, tolerations) + if err != nil { + allUpdatesSuccessful = false + } + if allUpdatesSuccessful { + h.updateCustomTolerationsCache(devConfig) + } + logger.Info("Updated custom tolerations successfully") + return nil +} + +func (h *remediationMgrHelper) handleDeviceConfigChanges(ctx context.Context, devConfig *amdv1alpha1.DeviceConfig) { + logger := log.FromContext(ctx) + if h.customTaintsChanged(devConfig) { + if err := h.updateCustomTolerations(ctx, devConfig); err != nil { + logger.Error(err, "Failed to update custom tolerations") + } + } +}