From bd01ca2dce002e66098030d9e0d02b21435cc746 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Thu, 26 Mar 2026 18:28:37 +0100 Subject: [PATCH 01/16] Add smoke test infrastructure for end-to-end testing Implements comprehensive smoke testing infrastructure that deploys StackRox Central in a Kind cluster and runs integration tests against real APIs. ## Key Components - Smoke test suite (`smoke/smoke_test.go`): - Tests cluster listing, CVE queries, and deployment detection - Runs against real StackRox deployment in CI - Uses build tag `smoke` to separate from unit tests - Authentication helpers (`smoke/token_helper.go`): - API token generation using HTTP basic auth - Health check polling with exponential backoff - Proper context handling and HTTP method constants - GitHub Actions workflow (`.github/workflows/smoke.yml`): - Deploys StackRox Central and vulnerable workload in Kind - Optimized for CI resources (reduced replicas, disabled features) - Waits for cluster health before running tests - Uploads test results and coverage to Codecov - Test utilities (`internal/testutil/test_helpers.go`): - Moved from integration_helpers.go for better organization - Port allocation for tests - Server readiness polling ## CI Optimizations - Kind cluster configured to maximize available CPU - Minimal StackRox deployment (no admission controller, no collector) - Resource constraints removed from sensor and scanner pods - Scanner image scanning skipped in CI to save resources - Port-forwarding to Central for API access ## Testing - Smoke tests run in dedicated workflow - Excluded from standard test target (uses build tags) - Test artifacts and logs collected on failure - Integration with Codecov for coverage tracking Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/smoke.yml | 202 ++++++++++++++++++++++ .gitignore | 6 + Makefile | 2 +- internal/testutil/integration_helpers.go | 36 ---- internal/testutil/test_helpers.go | 48 +++++ smoke/smoke_test.go | 198 +++++++++++++++++++++ smoke/testdata/vulnerable-deployment.yaml | 31 ++++ smoke/token_helper.go | 131 ++++++++++++++ 8 files changed, 617 insertions(+), 37 deletions(-) create mode 100644 .github/workflows/smoke.yml create mode 100644 internal/testutil/test_helpers.go create mode 100644 smoke/smoke_test.go create mode 100644 smoke/testdata/vulnerable-deployment.yaml create mode 100644 smoke/token_helper.go diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml new file mode 100644 index 0000000..d5a03c4 --- /dev/null +++ b/.github/workflows/smoke.yml @@ -0,0 +1,202 @@ +name: Smoke Tests + +on: + push: + branches: + - main + pull_request: + types: + - opened + - reopened + - synchronize + +jobs: + smoke: + name: Run Smoke Tests + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: Download dependencies + run: go mod download + + - name: Create kind config + run: | + cat <<'EOF' > kind-config.yaml + kind: Cluster + apiVersion: kind.x-k8s.io/v1alpha4 + nodes: + - role: control-plane + kubeadmConfigPatches: + - | + kind: InitConfiguration + nodeRegistration: + kubeletExtraArgs: + # Set reserved to 0 to maximize allocatable CPU for pods + system-reserved: "cpu=0m,memory=256Mi" + kube-reserved: "cpu=0m,memory=256Mi" + EOF + + - name: Create kind cluster + uses: helm/kind-action@v1 + with: + cluster_name: stackrox-mcp-smoke + config: kind-config.yaml + + - name: Show node resources + run: kubectl describe node | grep -A 5 "Allocated resources" + + - name: Clone StackRox repository + run: git clone --depth 1 https://github.com/stackrox/stackrox.git stackrox-repo + + - name: Deploy StackRox Central + env: + MAIN_IMAGE_TAG: latest + SENSOR_HELM_DEPLOY: "true" + ROX_SCANNER_V4: "false" + ADMISSION_CONTROLLER: "false" + SCANNER_REPLICAS: "1" + COLLECTION_METHOD: "no_collection" + run: | + cd stackrox-repo + ./deploy/k8s/deploy-local.sh + + - name: Remove resource requests to allow scheduling + run: | + # Remove resource constraints from sensor + kubectl set resources deployment/sensor -n stackrox \ + --requests=cpu=0,memory=0 \ + --limits=cpu=0,memory=0 + # Remove resource constraints from scanner for faster image scanning + kubectl set resources deployment/scanner -n stackrox \ + --requests=cpu=0,memory=0 \ + --limits=cpu=0,memory=0 + # Delete pods to force recreation + kubectl delete pods -n stackrox -l app=sensor + kubectl delete pods -n stackrox -l app=scanner + sleep 10 + + - name: Deploy vulnerable workload + run: kubectl apply -f smoke/testdata/vulnerable-deployment.yaml + + - name: Wait for vulnerable deployment + run: kubectl wait --for=condition=available --timeout=120s deployment/vulnerable-app -n vulnerable-apps + + - name: Wait for Central pods ready + run: kubectl wait --for=condition=ready --timeout=180s pod -l app=central -n stackrox + + - name: Wait for Sensor pods ready + run: kubectl wait --for=condition=ready --timeout=300s pod -l app=sensor -n stackrox || echo "Sensor pods not ready yet, will check cluster health" + + - name: Extract Central password + id: extract-password + run: | + PASSWORD="$(cat stackrox-repo/deploy/k8s/central-deploy/password)" + echo "::add-mask::${PASSWORD}" + echo "password=${PASSWORD}" >> "$GITHUB_OUTPUT" + + - name: Setup port-forward to Central + run: | + # Kill any existing port-forward on port 8000 + pkill -f "port-forward.*8000" || true + sleep 2 + # Start port-forward in background + kubectl port-forward -n stackrox svc/central 8000:443 > /tmp/port-forward.log 2>&1 & + sleep 5 + # Verify port-forward is working + if ! curl -k -s https://localhost:8000/v1/ping > /dev/null 2>&1; then + echo "Port-forward failed to start. Log:" + cat /tmp/port-forward.log || true + exit 1 + fi + echo "Port-forward established successfully" + + - name: Wait for cluster to be healthy + run: | + echo "Waiting for cluster to register and become healthy..." + PASSWORD="$(cat stackrox-repo/deploy/k8s/central-deploy/password)" + + for i in {1..180}; do + CLUSTER_HEALTH=$(curl -k -s -u "admin:${PASSWORD}" \ + https://localhost:8000/v1/clusters 2>/dev/null | \ + jq -r '.clusters[0].healthStatus.overallHealthStatus // "NOT_FOUND"') + + echo "Attempt $i/180: Cluster health status: $CLUSTER_HEALTH" + + if [ "$CLUSTER_HEALTH" = "HEALTHY" ]; then + echo "Cluster is healthy and ready for testing" + break + fi + + if [ "$i" -eq 180 ]; then + echo "ERROR: Cluster did not become healthy after 180 attempts (6 minutes)" + echo "Current status: $CLUSTER_HEALTH" + exit 1 + fi + + sleep 2 + done + + - name: Install go-junit-report + run: go install github.com/jstemmer/go-junit-report/v2@v2.1.0 + + - name: Run smoke tests with JUnit output + env: + ROX_ENDPOINT: localhost:8000 + ROX_PASSWORD: ${{ steps.extract-password.outputs.password }} + run: | + go test -v -tags=smoke -cover -race -coverprofile=coverage-smoke.out -timeout=20m ./smoke 2>&1 | \ + tee /dev/stderr | \ + go-junit-report -set-exit-code -out junit-smoke.xml + + - name: Upload JUnit test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: junit-smoke-results + path: junit-smoke.xml + if-no-files-found: error + + - name: Upload test results to Codecov + if: always() + uses: codecov/test-results-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: junit-smoke.xml + + - name: Upload coverage to Codecov + if: always() + uses: codecov/codecov-action@v5 + with: + files: ./coverage-smoke.out + token: ${{ secrets.CODECOV_TOKEN }} + fail_ci_if_error: false + flags: smoke + name: smoke-tests + + - name: Collect logs + if: always() + run: | + mkdir -p logs + kubectl get pods -A > logs/pods.txt || true + kubectl get events -A --sort-by='.lastTimestamp' > logs/events.txt || true + kubectl logs -n vulnerable-apps deployment/vulnerable-app --all-containers=true > logs/vulnerable-app.log || true + kubectl logs -n stackrox deployment/central > logs/central.log || true + kubectl logs -n stackrox deployment/scanner > logs/scanner.log || true + kubectl describe pod -n vulnerable-apps > logs/vulnerable-app-describe.txt || true + + - name: Upload logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: smoke-test-logs + path: logs/ + if-no-files-found: ignore diff --git a/.gitignore b/.gitignore index 0e487cd..cfd074a 100644 --- a/.gitignore +++ b/.gitignore @@ -10,9 +10,14 @@ # Test output /*.out /*junit.xml +/coverage-report.html # Build output /stackrox-mcp +/bin/ + +# Virtual environments +/ENV_DIR/ # Lint output /report.xml @@ -24,6 +29,7 @@ /e2e-tests/.env /e2e-tests/mcp-reports/ /e2e-tests/bin/ +/e2e-tests/**/mcpchecker /e2e-tests/**/*-out.json # WireMock diff --git a/Makefile b/Makefile index 6226b4a..011f318 100644 --- a/Makefile +++ b/Makefile @@ -78,7 +78,7 @@ e2e-test: ## Run E2E tests (uses WireMock) .PHONY: test-coverage-and-junit test-coverage-and-junit: ## Run unit tests with coverage and junit output go install github.com/jstemmer/go-junit-report/v2@v2.1.0 - $(GOTEST) -v -cover -race -coverprofile=$(COVERAGE_OUT) ./... 2>&1 | go-junit-report -set-exit-code -iocopy -out $(JUNIT_OUT) + $(GOTEST) -v -cover -race -coverprofile=$(COVERAGE_OUT) $(shell go list ./... | grep -v '/smoke$$') 2>&1 | go-junit-report -set-exit-code -iocopy -out $(JUNIT_OUT) .PHONY: test-integration-coverage test-integration-coverage: ## Run integration tests with coverage diff --git a/internal/testutil/integration_helpers.go b/internal/testutil/integration_helpers.go index 21b48e7..260f146 100644 --- a/internal/testutil/integration_helpers.go +++ b/internal/testutil/integration_helpers.go @@ -8,10 +8,8 @@ import ( "testing" "time" - "github.com/modelcontextprotocol/go-sdk/mcp" "github.com/stackrox/stackrox-mcp/internal/app" "github.com/stackrox/stackrox-mcp/internal/config" - "github.com/stretchr/testify/require" ) // CreateIntegrationTestConfig creates a test configuration for integration tests. @@ -55,37 +53,3 @@ func CreateIntegrationMCPClient(t *testing.T) (*MCPTestClient, error) { return NewMCPTestClient(t, runFunc) } - -// SetupInitializedClient creates an initialized MCP client for testing with automatic cleanup. -func SetupInitializedClient(t *testing.T, createClient func(*testing.T) (*MCPTestClient, error)) *MCPTestClient { - t.Helper() - - client, err := createClient(t) - require.NoError(t, err, "Failed to create MCP client") - t.Cleanup(func() { client.Close() }) - - return client -} - -// CallToolAndGetResult calls a tool and verifies it succeeds. -func CallToolAndGetResult(t *testing.T, client *MCPTestClient, toolName string, args map[string]any) *mcp.CallToolResult { - t.Helper() - - ctx := context.Background() - result, err := client.CallTool(ctx, toolName, args) - require.NoError(t, err) - RequireNoError(t, result) - - return result -} - -// GetTextContent extracts text from the first content item. -func GetTextContent(t *testing.T, result *mcp.CallToolResult) string { - t.Helper() - require.NotEmpty(t, result.Content, "should have content in response") - - textContent, ok := result.Content[0].(*mcp.TextContent) - require.True(t, ok, "expected TextContent, got %T", result.Content[0]) - - return textContent.Text -} diff --git a/internal/testutil/test_helpers.go b/internal/testutil/test_helpers.go new file mode 100644 index 0000000..5f995f5 --- /dev/null +++ b/internal/testutil/test_helpers.go @@ -0,0 +1,48 @@ +package testutil + +import ( + "context" + "testing" + + "github.com/modelcontextprotocol/go-sdk/mcp" + "github.com/stretchr/testify/require" +) + +// SetupInitializedClient creates an initialized MCP client with automatic cleanup. +func SetupInitializedClient(t *testing.T, createClient func(*testing.T) (*MCPTestClient, error)) *MCPTestClient { + t.Helper() + + client, err := createClient(t) + require.NoError(t, err, "Failed to create MCP client") + t.Cleanup(func() { _ = client.Close() }) + + return client +} + +// CallToolAndGetResult calls a tool and verifies it succeeds. +func CallToolAndGetResult( + t *testing.T, + client *MCPTestClient, + toolName string, + args map[string]any, +) *mcp.CallToolResult { + t.Helper() + + ctx := context.Background() + result, err := client.CallTool(ctx, toolName, args) + require.NoError(t, err) + RequireNoError(t, result) + + return result +} + +// GetTextContent extracts text from the first content item. +func GetTextContent(t *testing.T, result *mcp.CallToolResult) string { + t.Helper() + require.NotEmpty(t, result.Content, "should have content in response") + + textContent, ok := result.Content[0].(*mcp.TextContent) + require.True(t, ok, "expected TextContent, got %T", result.Content[0]) + + return textContent.Text +} diff --git a/smoke/smoke_test.go b/smoke/smoke_test.go new file mode 100644 index 0000000..71c1341 --- /dev/null +++ b/smoke/smoke_test.go @@ -0,0 +1,198 @@ +//go:build smoke + +package smoke + +import ( + "context" + "encoding/json" + "io" + "os" + "testing" + "time" + + "github.com/stackrox/stackrox-mcp/internal/app" + "github.com/stackrox/stackrox-mcp/internal/config" + "github.com/stackrox/stackrox-mcp/internal/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func waitForImageScan(t *testing.T, client *testutil.MCPTestClient, cveName string) { + t.Helper() + + // Skip image scan wait in CI - scanner is too resource-intensive for GitHub Actions + if os.Getenv("CI") == "true" { + t.Log("Skipping image scan wait in CI environment") + return + } + + assert.Eventually(t, func() bool { + ctx := context.Background() + result, err := client.CallTool(ctx, "get_deployments_for_cve", map[string]any{ + "cveName": cveName, + }) + + if err != nil || result.IsError { + return false + } + + responseText := testutil.GetTextContent(t, result) + var data struct { + Deployments []any `json:"deployments"` + } + + if err := json.Unmarshal([]byte(responseText), &data); err != nil { + return false + } + + if len(data.Deployments) > 0 { + t.Logf("Image scan completed, found %d deployment(s) with CVE %s", len(data.Deployments), cveName) + return true + } + + t.Logf("Waiting for image scan (CVE: %s)...", cveName) + return false + }, 10*time.Minute, 5*time.Second, "Image scan did not complete for CVE %s", cveName) +} + +func TestSmoke_RealCluster(t *testing.T) { + if testing.Short() { + t.Skip("Skipping smoke test in short mode") + } + + endpoint := os.Getenv("ROX_ENDPOINT") + apiToken := os.Getenv("ROX_API_TOKEN") + password := os.Getenv("ROX_PASSWORD") + + if endpoint == "" { + t.Fatal("ROX_ENDPOINT environment variable must be set") + } + + // Generate token if password provided but no token + if apiToken == "" && password != "" { + t.Log("No API token provided, generating one using password...") + + // Wait for Central to be ready + if err := WaitForCentralReady(endpoint, password, 12); err != nil { + t.Fatalf("Failed waiting for Central: %v", err) + } + t.Log("Central API is ready") + + // Generate token + token, err := GenerateAPIToken(endpoint, password) + if err != nil { + t.Fatalf("Failed to generate API token: %v", err) + } + apiToken = token + t.Log("Successfully generated API token") + } + + if apiToken == "" { + t.Fatal("Either ROX_API_TOKEN or ROX_PASSWORD must be set") + } + + client := createSmokeTestClient(t, endpoint, apiToken) + + // nginx:1.14 has CVE-2019-9511 (HTTP/2 vulnerabilities) + waitForImageScan(t, client, "CVE-2019-9511") + + tests := map[string]struct { + toolName string + args map[string]any + validateFunc func(*testing.T, string) + }{ + "list_clusters": { + toolName: "list_clusters", + args: map[string]any{}, + validateFunc: func(t *testing.T, result string) { + t.Helper() + var data struct { + Clusters []struct { + Name string `json:"name"` + } `json:"clusters"` + } + require.NoError(t, json.Unmarshal([]byte(result), &data)) + assert.NotEmpty(t, data.Clusters, "should have at least one cluster") + t.Logf("Found %d cluster(s)", len(data.Clusters)) + }, + }, + "get_deployments_for_cve with known CVE": { + toolName: "get_deployments_for_cve", + args: map[string]any{"cveName": "CVE-2019-11043"}, + validateFunc: func(t *testing.T, result string) { + t.Helper() + var data struct { + Deployments []struct { + Name string `json:"name"` + Namespace string `json:"namespace"` + } `json:"deployments"` + } + require.NoError(t, json.Unmarshal([]byte(result), &data)) + + if len(data.Deployments) == 0 { + t.Log("Warning: No deployments found with CVE. Deployment may not be scanned yet.") + } else { + t.Logf("Found %d deployment(s) with CVE", len(data.Deployments)) + } + }, + }, + "get_deployments_for_cve with non-existent CVE": { + toolName: "get_deployments_for_cve", + args: map[string]any{"cveName": "CVE-9999-99999"}, + validateFunc: func(t *testing.T, result string) { + t.Helper() + var data struct { + Deployments []any `json:"deployments"` + } + require.NoError(t, json.Unmarshal([]byte(result), &data)) + assert.Empty(t, data.Deployments, "should have no deployments for non-existent CVE") + }, + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + result := testutil.CallToolAndGetResult(t, client, tt.toolName, tt.args) + responseText := testutil.GetTextContent(t, result) + tt.validateFunc(t, responseText) + }) + } +} + +func createSmokeTestClient(t *testing.T, endpoint, apiToken string) *testutil.MCPTestClient { + t.Helper() + + cfg := &config.Config{ + Central: config.CentralConfig{ + URL: endpoint, + AuthType: "static", + APIToken: apiToken, + InsecureSkipTLSVerify: true, + RequestTimeout: 30 * time.Second, + MaxRetries: 3, + InitialBackoff: time.Second, + MaxBackoff: 10 * time.Second, + }, + Server: config.ServerConfig{ + Type: "stdio", + }, + Tools: config.ToolsConfig{ + Vulnerability: config.ToolsetVulnerabilityConfig{ + Enabled: true, + }, + ConfigManager: config.ToolConfigManagerConfig{ + Enabled: true, + }, + }, + } + + runFunc := func(ctx context.Context, stdin io.ReadCloser, stdout io.WriteCloser) error { + return app.Run(ctx, cfg, stdin, stdout) + } + + client, err := testutil.NewMCPTestClient(t, runFunc) + require.NoError(t, err, "Failed to create MCP client") + t.Cleanup(func() { client.Close() }) + + return client +} diff --git a/smoke/testdata/vulnerable-deployment.yaml b/smoke/testdata/vulnerable-deployment.yaml new file mode 100644 index 0000000..bc7bdf4 --- /dev/null +++ b/smoke/testdata/vulnerable-deployment.yaml @@ -0,0 +1,31 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: vulnerable-apps +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vulnerable-app + namespace: vulnerable-apps + labels: + app: vulnerable-app +spec: + replicas: 1 + selector: + matchLabels: + app: vulnerable-app + template: + metadata: + labels: + app: vulnerable-app + spec: + containers: + - name: vulnerable-app + image: nginx:1.14 + ports: + - containerPort: 80 + resources: + requests: + cpu: "0" + memory: "0" diff --git a/smoke/token_helper.go b/smoke/token_helper.go new file mode 100644 index 0000000..bafefd4 --- /dev/null +++ b/smoke/token_helper.go @@ -0,0 +1,131 @@ +// Package smoke provides smoke test utilities for testing StackRox MCP server. +package smoke + +import ( + "bytes" + "context" + "crypto/tls" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "time" +) + +const ( + // Timeouts for HTTP requests. + tokenGenerationTimeout = 30 * time.Second + pingTimeout = 5 * time.Second + maxSleepTime = 30 +) + +// GenerateTokenRequest represents the request body for API token generation. +type GenerateTokenRequest struct { + Name string `json:"name"` + Role string `json:"role,omitempty"` +} + +// GenerateTokenResponse represents the response from API token generation. +type GenerateTokenResponse struct { + Token string `json:"token"` +} + +// GenerateAPIToken generates an API token using basic authentication. +// It calls the /v1/apitokens/generate endpoint with admin credentials. +func GenerateAPIToken(endpoint, password string) (string, error) { + tokenReq := GenerateTokenRequest{ + Name: "smoke-test-token", + Role: "Admin", + } + + reqBody, err := json.Marshal(tokenReq) + if err != nil { + return "", fmt.Errorf("failed to marshal request: %w", err) + } + + url := fmt.Sprintf("https://%s/v1/apitokens/generate", endpoint) + + req, err := http.NewRequestWithContext(context.Background(), http.MethodPost, url, bytes.NewReader(reqBody)) + if err != nil { + return "", fmt.Errorf("failed to create request: %w", err) + } + + req.SetBasicAuth("admin", password) + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{ + Timeout: tokenGenerationTimeout, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, //nolint:gosec // Testing with self-signed certificates + }, + } + + resp, err := client.Do(req) + if err != nil { + return "", fmt.Errorf("failed to make request: %w", err) + } + + defer func() { + _ = resp.Body.Close() + }() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("failed to read response: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("token generation failed (status %d): %s", resp.StatusCode, string(body)) + } + + var tokenResp GenerateTokenResponse + if err := json.Unmarshal(body, &tokenResp); err != nil { + return "", fmt.Errorf("failed to parse response: %w", err) + } + + if tokenResp.Token == "" { + return "", errors.New("received empty token in response") + } + + return tokenResp.Token, nil +} + +// WaitForCentralReady polls the /v1/ping endpoint until Central is ready. +func WaitForCentralReady(endpoint, password string, maxAttempts int) error { + url := fmt.Sprintf("https://%s/v1/ping", endpoint) + + client := &http.Client{ + Timeout: pingTimeout, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, //nolint:gosec // Testing with self-signed certificates + }, + } + + for attempt := range maxAttempts { + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, url, nil) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + req.SetBasicAuth("admin", password) + + resp, err := client.Do(req) + if err == nil && resp.StatusCode == http.StatusOK { + _ = resp.Body.Close() + + return nil + } + + if resp != nil { + _ = resp.Body.Close() + } + + // Exponential backoff: 2, 4, 8, 16... seconds (max 30) + sleepTime := min(1< Date: Thu, 26 Mar 2026 18:43:05 +0100 Subject: [PATCH 02/16] Simplify smoke tests by removing CVE testing Remove unnecessary CVE vulnerability testing and scanner deployment to simplify smoke tests and reduce CI resource usage. Changes: - Remove CVE test cases (get_deployments_for_cve tests) - Remove waitForImageScan function - Delete vulnerable workload deployment (nginx:1.14) - Disable scanner deployment (SCANNER_REPLICAS: 0) - Replace bash cluster health check with Go code using testify Eventually - Add IsClusterHealthy function for cleaner cluster status checking - Remove scanner resource constraints and log collection - Keep only list_clusters test for basic connectivity verification Benefits: - Faster test execution (no image scanning wait) - Lower CI resource usage (no scanner pod) - Simpler test code (1 test case instead of 3) - Better error handling with testify Eventually - Less bash scripting, more Go code Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/smoke.yml | 44 +----------- smoke/smoke_test.go | 83 +++-------------------- smoke/testdata/vulnerable-deployment.yaml | 31 --------- smoke/token_helper.go | 55 +++++++++++++++ 4 files changed, 67 insertions(+), 146 deletions(-) delete mode 100644 smoke/testdata/vulnerable-deployment.yaml diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index d5a03c4..4f372c8 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -63,33 +63,22 @@ jobs: SENSOR_HELM_DEPLOY: "true" ROX_SCANNER_V4: "false" ADMISSION_CONTROLLER: "false" - SCANNER_REPLICAS: "1" + SCANNER_REPLICAS: "0" COLLECTION_METHOD: "no_collection" run: | cd stackrox-repo ./deploy/k8s/deploy-local.sh - - name: Remove resource requests to allow scheduling + - name: Remove resource requests from sensor run: | # Remove resource constraints from sensor kubectl set resources deployment/sensor -n stackrox \ --requests=cpu=0,memory=0 \ --limits=cpu=0,memory=0 - # Remove resource constraints from scanner for faster image scanning - kubectl set resources deployment/scanner -n stackrox \ - --requests=cpu=0,memory=0 \ - --limits=cpu=0,memory=0 # Delete pods to force recreation kubectl delete pods -n stackrox -l app=sensor - kubectl delete pods -n stackrox -l app=scanner sleep 10 - - name: Deploy vulnerable workload - run: kubectl apply -f smoke/testdata/vulnerable-deployment.yaml - - - name: Wait for vulnerable deployment - run: kubectl wait --for=condition=available --timeout=120s deployment/vulnerable-app -n vulnerable-apps - - name: Wait for Central pods ready run: kubectl wait --for=condition=ready --timeout=180s pod -l app=central -n stackrox @@ -119,32 +108,6 @@ jobs: fi echo "Port-forward established successfully" - - name: Wait for cluster to be healthy - run: | - echo "Waiting for cluster to register and become healthy..." - PASSWORD="$(cat stackrox-repo/deploy/k8s/central-deploy/password)" - - for i in {1..180}; do - CLUSTER_HEALTH=$(curl -k -s -u "admin:${PASSWORD}" \ - https://localhost:8000/v1/clusters 2>/dev/null | \ - jq -r '.clusters[0].healthStatus.overallHealthStatus // "NOT_FOUND"') - - echo "Attempt $i/180: Cluster health status: $CLUSTER_HEALTH" - - if [ "$CLUSTER_HEALTH" = "HEALTHY" ]; then - echo "Cluster is healthy and ready for testing" - break - fi - - if [ "$i" -eq 180 ]; then - echo "ERROR: Cluster did not become healthy after 180 attempts (6 minutes)" - echo "Current status: $CLUSTER_HEALTH" - exit 1 - fi - - sleep 2 - done - - name: Install go-junit-report run: go install github.com/jstemmer/go-junit-report/v2@v2.1.0 @@ -188,10 +151,7 @@ jobs: mkdir -p logs kubectl get pods -A > logs/pods.txt || true kubectl get events -A --sort-by='.lastTimestamp' > logs/events.txt || true - kubectl logs -n vulnerable-apps deployment/vulnerable-app --all-containers=true > logs/vulnerable-app.log || true kubectl logs -n stackrox deployment/central > logs/central.log || true - kubectl logs -n stackrox deployment/scanner > logs/scanner.log || true - kubectl describe pod -n vulnerable-apps > logs/vulnerable-app-describe.txt || true - name: Upload logs if: always() diff --git a/smoke/smoke_test.go b/smoke/smoke_test.go index 71c1341..b7d9310 100644 --- a/smoke/smoke_test.go +++ b/smoke/smoke_test.go @@ -17,44 +17,6 @@ import ( "github.com/stretchr/testify/require" ) -func waitForImageScan(t *testing.T, client *testutil.MCPTestClient, cveName string) { - t.Helper() - - // Skip image scan wait in CI - scanner is too resource-intensive for GitHub Actions - if os.Getenv("CI") == "true" { - t.Log("Skipping image scan wait in CI environment") - return - } - - assert.Eventually(t, func() bool { - ctx := context.Background() - result, err := client.CallTool(ctx, "get_deployments_for_cve", map[string]any{ - "cveName": cveName, - }) - - if err != nil || result.IsError { - return false - } - - responseText := testutil.GetTextContent(t, result) - var data struct { - Deployments []any `json:"deployments"` - } - - if err := json.Unmarshal([]byte(responseText), &data); err != nil { - return false - } - - if len(data.Deployments) > 0 { - t.Logf("Image scan completed, found %d deployment(s) with CVE %s", len(data.Deployments), cveName) - return true - } - - t.Logf("Waiting for image scan (CVE: %s)...", cveName) - return false - }, 10*time.Minute, 5*time.Second, "Image scan did not complete for CVE %s", cveName) -} - func TestSmoke_RealCluster(t *testing.T) { if testing.Short() { t.Skip("Skipping smoke test in short mode") @@ -91,10 +53,17 @@ func TestSmoke_RealCluster(t *testing.T) { t.Fatal("Either ROX_API_TOKEN or ROX_PASSWORD must be set") } - client := createSmokeTestClient(t, endpoint, apiToken) + // Wait for cluster to be registered and healthy + assert.Eventually(t, func() bool { + healthy := IsClusterHealthy(endpoint, password) + if !healthy { + t.Log("Waiting for cluster to be registered and healthy...") + } + return healthy + }, 6*time.Minute, 2*time.Second, "Cluster did not become healthy") + t.Log("Cluster is healthy and ready for testing") - // nginx:1.14 has CVE-2019-9511 (HTTP/2 vulnerabilities) - waitForImageScan(t, client, "CVE-2019-9511") + client := createSmokeTestClient(t, endpoint, apiToken) tests := map[string]struct { toolName string @@ -116,38 +85,6 @@ func TestSmoke_RealCluster(t *testing.T) { t.Logf("Found %d cluster(s)", len(data.Clusters)) }, }, - "get_deployments_for_cve with known CVE": { - toolName: "get_deployments_for_cve", - args: map[string]any{"cveName": "CVE-2019-11043"}, - validateFunc: func(t *testing.T, result string) { - t.Helper() - var data struct { - Deployments []struct { - Name string `json:"name"` - Namespace string `json:"namespace"` - } `json:"deployments"` - } - require.NoError(t, json.Unmarshal([]byte(result), &data)) - - if len(data.Deployments) == 0 { - t.Log("Warning: No deployments found with CVE. Deployment may not be scanned yet.") - } else { - t.Logf("Found %d deployment(s) with CVE", len(data.Deployments)) - } - }, - }, - "get_deployments_for_cve with non-existent CVE": { - toolName: "get_deployments_for_cve", - args: map[string]any{"cveName": "CVE-9999-99999"}, - validateFunc: func(t *testing.T, result string) { - t.Helper() - var data struct { - Deployments []any `json:"deployments"` - } - require.NoError(t, json.Unmarshal([]byte(result), &data)) - assert.Empty(t, data.Deployments, "should have no deployments for non-existent CVE") - }, - }, } for name, tt := range tests { diff --git a/smoke/testdata/vulnerable-deployment.yaml b/smoke/testdata/vulnerable-deployment.yaml deleted file mode 100644 index bc7bdf4..0000000 --- a/smoke/testdata/vulnerable-deployment.yaml +++ /dev/null @@ -1,31 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: vulnerable-apps ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vulnerable-app - namespace: vulnerable-apps - labels: - app: vulnerable-app -spec: - replicas: 1 - selector: - matchLabels: - app: vulnerable-app - template: - metadata: - labels: - app: vulnerable-app - spec: - containers: - - name: vulnerable-app - image: nginx:1.14 - ports: - - containerPort: 80 - resources: - requests: - cpu: "0" - memory: "0" diff --git a/smoke/token_helper.go b/smoke/token_helper.go index bafefd4..570c6a8 100644 --- a/smoke/token_helper.go +++ b/smoke/token_helper.go @@ -31,6 +31,15 @@ type GenerateTokenResponse struct { Token string `json:"token"` } +// ClusterHealthResponse represents the response from /v1/clusters endpoint. +type ClusterHealthResponse struct { + Clusters []struct { + HealthStatus struct { + OverallHealthStatus string `json:"overallHealthStatus"` + } `json:"healthStatus"` + } `json:"clusters"` +} + // GenerateAPIToken generates an API token using basic authentication. // It calls the /v1/apitokens/generate endpoint with admin credentials. func GenerateAPIToken(endpoint, password string) (string, error) { @@ -129,3 +138,49 @@ func WaitForCentralReady(endpoint, password string, maxAttempts int) error { return fmt.Errorf("central did not become ready after %d attempts", maxAttempts) } + +// IsClusterHealthy checks if the first cluster registered with Central is in HEALTHY status. +// Returns true if healthy, false otherwise. +func IsClusterHealthy(endpoint, password string) bool { + url := fmt.Sprintf("https://%s/v1/clusters", endpoint) + + client := &http.Client{ + Timeout: pingTimeout, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, //nolint:gosec // Testing with self-signed certificates + }, + } + + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, url, nil) + if err != nil { + return false + } + + req.SetBasicAuth("admin", password) + + resp, err := client.Do(req) + if err != nil { + return false + } + + defer func() { + _ = resp.Body.Close() + }() + + if resp.StatusCode != http.StatusOK { + return false + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return false + } + + var healthResp ClusterHealthResponse + if err := json.Unmarshal(body, &healthResp); err != nil { + return false + } + + return len(healthResp.Clusters) > 0 && + healthResp.Clusters[0].HealthStatus.OverallHealthStatus == "HEALTHY" +} From dc2090fb1e97c6a9ba0db518bd057b897e144697 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Thu, 26 Mar 2026 18:46:26 +0100 Subject: [PATCH 03/16] Remove custom kind config since sensor resources are removed anyway The custom kind config was setting CPU reservations to 0 to maximize allocatable resources, but since we remove sensor resource constraints in a later step anyway, the custom config is unnecessary. Using default kind cluster configuration simplifies the workflow. Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/smoke.yml | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index 4f372c8..2bfd70e 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -28,31 +28,10 @@ jobs: - name: Download dependencies run: go mod download - - name: Create kind config - run: | - cat <<'EOF' > kind-config.yaml - kind: Cluster - apiVersion: kind.x-k8s.io/v1alpha4 - nodes: - - role: control-plane - kubeadmConfigPatches: - - | - kind: InitConfiguration - nodeRegistration: - kubeletExtraArgs: - # Set reserved to 0 to maximize allocatable CPU for pods - system-reserved: "cpu=0m,memory=256Mi" - kube-reserved: "cpu=0m,memory=256Mi" - EOF - - name: Create kind cluster uses: helm/kind-action@v1 with: cluster_name: stackrox-mcp-smoke - config: kind-config.yaml - - - name: Show node resources - run: kubectl describe node | grep -A 5 "Allocated resources" - name: Clone StackRox repository run: git clone --depth 1 https://github.com/stackrox/stackrox.git stackrox-repo From b66e3f458b996fcf0a2ba3059af337ea222eeef0 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Thu, 26 Mar 2026 18:55:31 +0100 Subject: [PATCH 04/16] Use checkout action and remove sensor resource constraints Replace git clone with actions/checkout@v4 for better GitHub Actions integration. Remove manual sensor resource constraint removal since simplified deployment (no scanner, no vulnerable workload) should allow sensor to schedule without intervention. Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/smoke.yml | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index 2bfd70e..a518acb 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -33,8 +33,11 @@ jobs: with: cluster_name: stackrox-mcp-smoke - - name: Clone StackRox repository - run: git clone --depth 1 https://github.com/stackrox/stackrox.git stackrox-repo + - name: Checkout StackRox repository + uses: actions/checkout@v4 + with: + repository: stackrox/stackrox + path: stackrox-repo - name: Deploy StackRox Central env: @@ -48,16 +51,6 @@ jobs: cd stackrox-repo ./deploy/k8s/deploy-local.sh - - name: Remove resource requests from sensor - run: | - # Remove resource constraints from sensor - kubectl set resources deployment/sensor -n stackrox \ - --requests=cpu=0,memory=0 \ - --limits=cpu=0,memory=0 - # Delete pods to force recreation - kubectl delete pods -n stackrox -l app=sensor - sleep 10 - - name: Wait for Central pods ready run: kubectl wait --for=condition=ready --timeout=180s pod -l app=central -n stackrox From 272bef7b8ef8c0935df1bc22ec416780ae34964e Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Thu, 26 Mar 2026 18:57:03 +0100 Subject: [PATCH 05/16] Remove redundant testing.Short check from smoke test The smoke tests already use build tags (//go:build smoke) to separate them from regular unit tests, so the testing.Short() check is redundant and unnecessary. Co-Authored-By: Claude Sonnet 4.5 --- smoke/smoke_test.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/smoke/smoke_test.go b/smoke/smoke_test.go index b7d9310..02f0578 100644 --- a/smoke/smoke_test.go +++ b/smoke/smoke_test.go @@ -18,10 +18,6 @@ import ( ) func TestSmoke_RealCluster(t *testing.T) { - if testing.Short() { - t.Skip("Skipping smoke test in short mode") - } - endpoint := os.Getenv("ROX_ENDPOINT") apiToken := os.Getenv("ROX_API_TOKEN") password := os.Getenv("ROX_PASSWORD") From a2edccf645dbcd507f303b595f7a4cee3ac91c82 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Thu, 26 Mar 2026 18:57:44 +0100 Subject: [PATCH 06/16] Use require.NotEmpty for environment variable checks Replace manual if checks with require.NotEmpty from testify for cleaner and more idiomatic test code. Co-Authored-By: Claude Sonnet 4.5 --- smoke/smoke_test.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/smoke/smoke_test.go b/smoke/smoke_test.go index 02f0578..030fea4 100644 --- a/smoke/smoke_test.go +++ b/smoke/smoke_test.go @@ -22,9 +22,7 @@ func TestSmoke_RealCluster(t *testing.T) { apiToken := os.Getenv("ROX_API_TOKEN") password := os.Getenv("ROX_PASSWORD") - if endpoint == "" { - t.Fatal("ROX_ENDPOINT environment variable must be set") - } + require.NotEmpty(t, endpoint, "ROX_ENDPOINT environment variable must be set") // Generate token if password provided but no token if apiToken == "" && password != "" { @@ -45,9 +43,7 @@ func TestSmoke_RealCluster(t *testing.T) { t.Log("Successfully generated API token") } - if apiToken == "" { - t.Fatal("Either ROX_API_TOKEN or ROX_PASSWORD must be set") - } + require.NotEmpty(t, apiToken, "Either ROX_API_TOKEN or ROX_PASSWORD must be set") // Wait for cluster to be registered and healthy assert.Eventually(t, func() bool { From a3149aaaae53b4d5dadd34b7672d8c7c10d74ec0 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Thu, 26 Mar 2026 18:58:33 +0100 Subject: [PATCH 07/16] Use require.NoError for error checks in smoke test Replace manual error checks with require.NoError from testify for consistent error handling throughout the test. Co-Authored-By: Claude Sonnet 4.5 --- smoke/smoke_test.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/smoke/smoke_test.go b/smoke/smoke_test.go index 030fea4..1e80560 100644 --- a/smoke/smoke_test.go +++ b/smoke/smoke_test.go @@ -29,16 +29,13 @@ func TestSmoke_RealCluster(t *testing.T) { t.Log("No API token provided, generating one using password...") // Wait for Central to be ready - if err := WaitForCentralReady(endpoint, password, 12); err != nil { - t.Fatalf("Failed waiting for Central: %v", err) - } + err := WaitForCentralReady(endpoint, password, 12) + require.NoError(t, err, "Failed waiting for Central") t.Log("Central API is ready") // Generate token token, err := GenerateAPIToken(endpoint, password) - if err != nil { - t.Fatalf("Failed to generate API token: %v", err) - } + require.NoError(t, err, "Failed to generate API token") apiToken = token t.Log("Successfully generated API token") } From 266be46cc9f4addd36df0ab5349658bb9b48b988 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Thu, 26 Mar 2026 19:01:33 +0100 Subject: [PATCH 08/16] Simplify smoke test and add build tag to token_helper - Add //go:build smoke tag to token_helper.go - Remove parameterized test structure since there's only one test case - Directly call list_clusters test without map iteration Co-Authored-By: Claude Sonnet 4.5 --- smoke/smoke_test.go | 36 +++++++++--------------------------- smoke/token_helper.go | 2 ++ 2 files changed, 11 insertions(+), 27 deletions(-) diff --git a/smoke/smoke_test.go b/smoke/smoke_test.go index 1e80560..9f87abf 100644 --- a/smoke/smoke_test.go +++ b/smoke/smoke_test.go @@ -54,35 +54,17 @@ func TestSmoke_RealCluster(t *testing.T) { client := createSmokeTestClient(t, endpoint, apiToken) - tests := map[string]struct { - toolName string - args map[string]any - validateFunc func(*testing.T, string) - }{ - "list_clusters": { - toolName: "list_clusters", - args: map[string]any{}, - validateFunc: func(t *testing.T, result string) { - t.Helper() - var data struct { - Clusters []struct { - Name string `json:"name"` - } `json:"clusters"` - } - require.NoError(t, json.Unmarshal([]byte(result), &data)) - assert.NotEmpty(t, data.Clusters, "should have at least one cluster") - t.Logf("Found %d cluster(s)", len(data.Clusters)) - }, - }, - } + result := testutil.CallToolAndGetResult(t, client, "list_clusters", map[string]any{}) + responseText := testutil.GetTextContent(t, result) - for name, tt := range tests { - t.Run(name, func(t *testing.T) { - result := testutil.CallToolAndGetResult(t, client, tt.toolName, tt.args) - responseText := testutil.GetTextContent(t, result) - tt.validateFunc(t, responseText) - }) + var data struct { + Clusters []struct { + Name string `json:"name"` + } `json:"clusters"` } + require.NoError(t, json.Unmarshal([]byte(responseText), &data)) + assert.NotEmpty(t, data.Clusters, "should have at least one cluster") + t.Logf("Found %d cluster(s)", len(data.Clusters)) } func createSmokeTestClient(t *testing.T, endpoint, apiToken string) *testutil.MCPTestClient { diff --git a/smoke/token_helper.go b/smoke/token_helper.go index 570c6a8..422d085 100644 --- a/smoke/token_helper.go +++ b/smoke/token_helper.go @@ -1,3 +1,5 @@ +//go:build smoke + // Package smoke provides smoke test utilities for testing StackRox MCP server. package smoke From 42944d3764b407a166caf81ac0f998318f348685 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Thu, 26 Mar 2026 19:06:46 +0100 Subject: [PATCH 09/16] Refactor smoke test helpers to use testify assertions - Rename token_helper.go to central_helper_test.go - Change helper functions to accept testing.T and use require.NoError - Replace custom backoff loop in WaitForCentralReady with assert.Eventually - Add t.Helper() to all helper functions - Simplify error handling by using require instead of returning errors Co-Authored-By: Claude Sonnet 4.5 --- ...token_helper.go => central_helper_test.go} | 107 +++++++----------- smoke/smoke_test.go | 9 +- 2 files changed, 42 insertions(+), 74 deletions(-) rename smoke/{token_helper.go => central_helper_test.go} (52%) diff --git a/smoke/token_helper.go b/smoke/central_helper_test.go similarity index 52% rename from smoke/token_helper.go rename to smoke/central_helper_test.go index 422d085..b09c95a 100644 --- a/smoke/token_helper.go +++ b/smoke/central_helper_test.go @@ -1,6 +1,5 @@ //go:build smoke -// Package smoke provides smoke test utilities for testing StackRox MCP server. package smoke import ( @@ -8,32 +7,30 @@ import ( "context" "crypto/tls" "encoding/json" - "errors" "fmt" "io" "net/http" + "testing" "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) const ( - // Timeouts for HTTP requests. tokenGenerationTimeout = 30 * time.Second pingTimeout = 5 * time.Second - maxSleepTime = 30 ) -// GenerateTokenRequest represents the request body for API token generation. type GenerateTokenRequest struct { Name string `json:"name"` Role string `json:"role,omitempty"` } -// GenerateTokenResponse represents the response from API token generation. type GenerateTokenResponse struct { Token string `json:"token"` } -// ClusterHealthResponse represents the response from /v1/clusters endpoint. type ClusterHealthResponse struct { Clusters []struct { HealthStatus struct { @@ -43,24 +40,21 @@ type ClusterHealthResponse struct { } // GenerateAPIToken generates an API token using basic authentication. -// It calls the /v1/apitokens/generate endpoint with admin credentials. -func GenerateAPIToken(endpoint, password string) (string, error) { +func GenerateAPIToken(t *testing.T, endpoint, password string) string { + t.Helper() + tokenReq := GenerateTokenRequest{ Name: "smoke-test-token", Role: "Admin", } reqBody, err := json.Marshal(tokenReq) - if err != nil { - return "", fmt.Errorf("failed to marshal request: %w", err) - } + require.NoError(t, err, "Failed to marshal token request") url := fmt.Sprintf("https://%s/v1/apitokens/generate", endpoint) req, err := http.NewRequestWithContext(context.Background(), http.MethodPost, url, bytes.NewReader(reqBody)) - if err != nil { - return "", fmt.Errorf("failed to create request: %w", err) - } + require.NoError(t, err, "Failed to create request") req.SetBasicAuth("admin", password) req.Header.Set("Content-Type", "application/json") @@ -68,88 +62,70 @@ func GenerateAPIToken(endpoint, password string) (string, error) { client := &http.Client{ Timeout: tokenGenerationTimeout, Transport: &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, //nolint:gosec // Testing with self-signed certificates + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, //nolint:gosec }, } resp, err := client.Do(req) - if err != nil { - return "", fmt.Errorf("failed to make request: %w", err) - } - - defer func() { - _ = resp.Body.Close() - }() + require.NoError(t, err, "Failed to make token generation request") + defer func() { _ = resp.Body.Close() }() body, err := io.ReadAll(resp.Body) - if err != nil { - return "", fmt.Errorf("failed to read response: %w", err) - } + require.NoError(t, err, "Failed to read response body") - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("token generation failed (status %d): %s", resp.StatusCode, string(body)) - } + require.Equal(t, http.StatusOK, resp.StatusCode, "Token generation failed: %s", string(body)) var tokenResp GenerateTokenResponse - if err := json.Unmarshal(body, &tokenResp); err != nil { - return "", fmt.Errorf("failed to parse response: %w", err) - } + require.NoError(t, json.Unmarshal(body, &tokenResp), "Failed to parse token response") + require.NotEmpty(t, tokenResp.Token, "Received empty token in response") - if tokenResp.Token == "" { - return "", errors.New("received empty token in response") - } + return tokenResp.Token +} + +// WaitForCentralReady waits for Central API to be ready by polling /v1/ping. +func WaitForCentralReady(t *testing.T, endpoint, password string) { + t.Helper() - return tokenResp.Token, nil + assert.Eventually(t, func() bool { + return isCentralReady(endpoint, password) + }, 2*time.Minute, 2*time.Second, "Central API did not become ready") } -// WaitForCentralReady polls the /v1/ping endpoint until Central is ready. -func WaitForCentralReady(endpoint, password string, maxAttempts int) error { +// isCentralReady checks if Central API responds to /v1/ping. +func isCentralReady(endpoint, password string) bool { url := fmt.Sprintf("https://%s/v1/ping", endpoint) client := &http.Client{ Timeout: pingTimeout, Transport: &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, //nolint:gosec // Testing with self-signed certificates + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, //nolint:gosec }, } - for attempt := range maxAttempts { - req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, url, nil) - if err != nil { - return fmt.Errorf("failed to create request: %w", err) - } - - req.SetBasicAuth("admin", password) - - resp, err := client.Do(req) - if err == nil && resp.StatusCode == http.StatusOK { - _ = resp.Body.Close() - - return nil - } - - if resp != nil { - _ = resp.Body.Close() - } + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, url, nil) + if err != nil { + return false + } - // Exponential backoff: 2, 4, 8, 16... seconds (max 30) - sleepTime := min(1< Date: Thu, 26 Mar 2026 19:15:53 +0100 Subject: [PATCH 10/16] Use require.Eventually instead of assert.Eventually in smoke tests The failed smoke tests showed that assert.Eventually doesn't stop the test when the condition times out - it just logs an error and continues. This caused confusing secondary failures when the cluster health check timed out but the test continued to run list_clusters. Changes: - Use require.Eventually for cluster health check (fail fast on timeout) - Use require.Eventually for Central ready check (fail fast on timeout) - Use require.NotEmpty for cluster list validation - Remove unused assert import from both test files This ensures the test fails immediately with a clear message when prerequisites aren't met, rather than continuing and generating misleading secondary failures. Co-Authored-By: Claude Sonnet 4.5 --- smoke/central_helper_test.go | 3 +-- smoke/smoke_test.go | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/smoke/central_helper_test.go b/smoke/central_helper_test.go index b09c95a..0abe328 100644 --- a/smoke/central_helper_test.go +++ b/smoke/central_helper_test.go @@ -13,7 +13,6 @@ import ( "testing" "time" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -86,7 +85,7 @@ func GenerateAPIToken(t *testing.T, endpoint, password string) string { func WaitForCentralReady(t *testing.T, endpoint, password string) { t.Helper() - assert.Eventually(t, func() bool { + require.Eventually(t, func() bool { return isCentralReady(endpoint, password) }, 2*time.Minute, 2*time.Second, "Central API did not become ready") } diff --git a/smoke/smoke_test.go b/smoke/smoke_test.go index 48371c3..fc3448a 100644 --- a/smoke/smoke_test.go +++ b/smoke/smoke_test.go @@ -13,7 +13,6 @@ import ( "github.com/stackrox/stackrox-mcp/internal/app" "github.com/stackrox/stackrox-mcp/internal/config" "github.com/stackrox/stackrox-mcp/internal/testutil" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -38,7 +37,7 @@ func TestSmoke_RealCluster(t *testing.T) { require.NotEmpty(t, apiToken, "Either ROX_API_TOKEN or ROX_PASSWORD must be set") // Wait for cluster to be registered and healthy - assert.Eventually(t, func() bool { + require.Eventually(t, func() bool { healthy := IsClusterHealthy(endpoint, password) if !healthy { t.Log("Waiting for cluster to be registered and healthy...") @@ -58,7 +57,7 @@ func TestSmoke_RealCluster(t *testing.T) { } `json:"clusters"` } require.NoError(t, json.Unmarshal([]byte(responseText), &data)) - assert.NotEmpty(t, data.Clusters, "should have at least one cluster") + require.NotEmpty(t, data.Clusters, "should have at least one cluster") t.Logf("Found %d cluster(s)", len(data.Clusters)) } From 82186d766abd9127d6b6c81cabca9157e6357b8c Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Fri, 27 Mar 2026 12:42:38 +0100 Subject: [PATCH 11/16] Fix sensor pod scheduling by removing CPU resource constraints Root cause: Sensor pods were failing to schedule due to insufficient CPU on kind cluster nodes. The deployment had resource requests that exceeded available capacity. Error: "0/1 nodes are available: 1 Insufficient cpu" Fix: - Add step to remove resource requests from sensor deployment - This triggers a rollout with pods that can be scheduled - Wait for sensor pods to become ready after constraint removal - Remove fallback message from sensor wait (fail fast if not ready) This restores functionality that was accidentally removed when we eliminated the custom kind config. Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/smoke.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index a518acb..08694a6 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -54,8 +54,12 @@ jobs: - name: Wait for Central pods ready run: kubectl wait --for=condition=ready --timeout=180s pod -l app=central -n stackrox + - name: Remove resource constraints from Sensor + run: | + kubectl patch deployment sensor -n stackrox --type='json' -p='[{"op": "remove", "path": "/spec/template/spec/containers/0/resources/requests"}]' + - name: Wait for Sensor pods ready - run: kubectl wait --for=condition=ready --timeout=300s pod -l app=sensor -n stackrox || echo "Sensor pods not ready yet, will check cluster health" + run: kubectl wait --for=condition=ready --timeout=300s pod -l app=sensor -n stackrox - name: Extract Central password id: extract-password @@ -70,12 +74,12 @@ jobs: pkill -f "port-forward.*8000" || true sleep 2 # Start port-forward in background - kubectl port-forward -n stackrox svc/central 8000:443 > /tmp/port-forward.log 2>&1 & + kubectl port-forward -n stackrox svc/central 8000:443 > logs/port-forward.log 2>&1 & sleep 5 # Verify port-forward is working if ! curl -k -s https://localhost:8000/v1/ping > /dev/null 2>&1; then echo "Port-forward failed to start. Log:" - cat /tmp/port-forward.log || true + cat logs/port-forward.log || true exit 1 fi echo "Port-forward established successfully" From 831a0b7b28f0d505e45575b38d2bb5e65e241f3e Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Fri, 27 Mar 2026 12:48:59 +0100 Subject: [PATCH 12/16] Fix sensor resource constraints removal to use replace instead of remove The previous approach tried to remove the requests field, but the pod still failed with insufficient CPU even after patching. This suggests either the limits field was still present, or the remove operation didn't work as expected with the Helm-deployed sensor. New approach: - Use JSON patch "replace" operation instead of "remove" - Set entire resources field to empty object {} - This removes both limits and requests in one operation - Use kubectl rollout status to wait for deployment to complete instead of kubectl wait on pods (which fails when pods don't exist) The replace operation is more reliable than remove when the structure might vary (Helm vs direct deployment). Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/smoke.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index 08694a6..e5c2b0a 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -56,10 +56,12 @@ jobs: - name: Remove resource constraints from Sensor run: | - kubectl patch deployment sensor -n stackrox --type='json' -p='[{"op": "remove", "path": "/spec/template/spec/containers/0/resources/requests"}]' + # Replace resources with empty object to remove all limits and requests + kubectl patch deployment sensor -n stackrox --type='json' \ + -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/resources", "value": {}}]' - - name: Wait for Sensor pods ready - run: kubectl wait --for=condition=ready --timeout=300s pod -l app=sensor -n stackrox + # Wait for the rollout to complete + kubectl rollout status deployment/sensor -n stackrox --timeout=300s - name: Extract Central password id: extract-password From 681bb2b78955d5cadc1a5a163a4fbfe493425990 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Fri, 27 Mar 2026 13:36:05 +0100 Subject: [PATCH 13/16] Add debugging and patch all containers in sensor deployment Previous attempts failed even after patching because: 1. Only patched container[0], but there may be multiple containers 2. Need to verify patch actually removed resources 3. May have LimitRange enforcing minimum resources Changes: - Add debugging to show containers and resources before/after patch - Check for LimitRange in stackrox namespace - Dynamically detect number of containers and patch all of them - Show resources before and after patching to verify it worked This will help diagnose why pods still fail with "Insufficient cpu" even after resources field is set to empty object. Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/smoke.yml | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index e5c2b0a..a01bc6b 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -56,9 +56,26 @@ jobs: - name: Remove resource constraints from Sensor run: | - # Replace resources with empty object to remove all limits and requests - kubectl patch deployment sensor -n stackrox --type='json' \ - -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/resources", "value": {}}]' + echo "Checking sensor deployment before patch..." + kubectl get deployment sensor -n stackrox -o jsonpath='{.spec.template.spec.containers[*].name}' && echo + kubectl get deployment sensor -n stackrox -o jsonpath='{.spec.template.spec.containers[*].resources}' && echo + + # Check for LimitRange in namespace + kubectl get limitrange -n stackrox || echo "No LimitRange found" + + # Replace resources with empty object to remove all limits and requests from all containers + num_containers=$(kubectl get deployment sensor -n stackrox -o jsonpath='{.spec.template.spec.containers[*].name}' | wc -w) + echo "Found $num_containers container(s) in sensor deployment" + + # Patch all containers + for i in $(seq 0 $((num_containers - 1))); do + echo "Patching container $i..." + kubectl patch deployment sensor -n stackrox --type='json' \ + -p='[{"op": "replace", "path": "/spec/template/spec/containers/'$i'/resources", "value": {}}]' || echo "Failed to patch container $i" + done + + echo "Checking sensor deployment after patch..." + kubectl get deployment sensor -n stackrox -o jsonpath='{.spec.template.spec.containers[*].resources}' && echo # Wait for the rollout to complete kubectl rollout status deployment/sensor -n stackrox --timeout=300s From cfaba4795d1626f278465012ffad6594dbc37673 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Fri, 27 Mar 2026 13:45:46 +0100 Subject: [PATCH 14/16] Restore working sensor resource removal approach The previous debugging revealed that the patch was successful (resources were set to {}), but pods still couldn't schedule. The issue is that we need to force pod recreation after removing resources. Restore the approach that was working before commit b66e3f4: 1. Use `kubectl set resources` with cpu=0,memory=0 (cleaner than patch) 2. Delete sensor pods to force immediate recreation 3. Wait for new pods with empty resources to be created and ready This approach worked in earlier successful runs. The key insight is that just patching the deployment and waiting for rollout isn't enough - we need to delete and recreate the pods to ensure they schedule with the updated (empty) resource spec. Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/smoke.yml | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index a01bc6b..0502dbd 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -56,29 +56,19 @@ jobs: - name: Remove resource constraints from Sensor run: | - echo "Checking sensor deployment before patch..." - kubectl get deployment sensor -n stackrox -o jsonpath='{.spec.template.spec.containers[*].name}' && echo - kubectl get deployment sensor -n stackrox -o jsonpath='{.spec.template.spec.containers[*].resources}' && echo + # Use kubectl set resources to remove all resource constraints + kubectl set resources deployment/sensor -n stackrox \ + --requests=cpu=0,memory=0 \ + --limits=cpu=0,memory=0 - # Check for LimitRange in namespace - kubectl get limitrange -n stackrox || echo "No LimitRange found" + # Delete sensor pods to force recreation with new (empty) resources + kubectl delete pods -n stackrox -l app=sensor - # Replace resources with empty object to remove all limits and requests from all containers - num_containers=$(kubectl get deployment sensor -n stackrox -o jsonpath='{.spec.template.spec.containers[*].name}' | wc -w) - echo "Found $num_containers container(s) in sensor deployment" + # Wait a bit for pods to be deleted and recreated + sleep 10 - # Patch all containers - for i in $(seq 0 $((num_containers - 1))); do - echo "Patching container $i..." - kubectl patch deployment sensor -n stackrox --type='json' \ - -p='[{"op": "replace", "path": "/spec/template/spec/containers/'$i'/resources", "value": {}}]' || echo "Failed to patch container $i" - done - - echo "Checking sensor deployment after patch..." - kubectl get deployment sensor -n stackrox -o jsonpath='{.spec.template.spec.containers[*].resources}' && echo - - # Wait for the rollout to complete - kubectl rollout status deployment/sensor -n stackrox --timeout=300s + # Wait for new pods to be ready + kubectl wait --for=condition=ready --timeout=300s pod -l app=sensor -n stackrox - name: Extract Central password id: extract-password From c2e4c36e4b17488ac8d23ab8ddadb44e89087480 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Fri, 27 Mar 2026 13:50:01 +0100 Subject: [PATCH 15/16] Create logs directory before port-forward The sensor resource removal now works correctly - sensor pod became ready! But the port-forward step failed because it tried to redirect output to logs/port-forward.log before the logs directory existed. The "Collect logs" step creates the logs directory, but that happens later. The port-forward step runs earlier and needs the directory to exist. Simple fix: Create logs directory at the start of the port-forward step. Co-Authored-By: Claude Sonnet 4.5 --- .github/workflows/smoke.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index 0502dbd..a223544 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -79,12 +79,17 @@ jobs: - name: Setup port-forward to Central run: | + # Create logs directory + mkdir -p logs + # Kill any existing port-forward on port 8000 pkill -f "port-forward.*8000" || true sleep 2 + # Start port-forward in background kubectl port-forward -n stackrox svc/central 8000:443 > logs/port-forward.log 2>&1 & sleep 5 + # Verify port-forward is working if ! curl -k -s https://localhost:8000/v1/ping > /dev/null 2>&1; then echo "Port-forward failed to start. Log:" From 42cd1b6723d7fbd4c582e09b337f5c1a7da3df90 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Fri, 27 Mar 2026 17:34:54 +0100 Subject: [PATCH 16/16] fix Signed-off-by: Tomasz Janiszewski --- smoke/smoke_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/smoke/smoke_test.go b/smoke/smoke_test.go index fc3448a..a2bbe44 100644 --- a/smoke/smoke_test.go +++ b/smoke/smoke_test.go @@ -36,7 +36,6 @@ func TestSmoke_RealCluster(t *testing.T) { require.NotEmpty(t, apiToken, "Either ROX_API_TOKEN or ROX_PASSWORD must be set") - // Wait for cluster to be registered and healthy require.Eventually(t, func() bool { healthy := IsClusterHealthy(endpoint, password) if !healthy {