diff --git a/.github/workflows/int-test-workflow.yml b/.github/workflows/int-test-workflow.yml index 001a34cee..8ffc0ec0c 100644 --- a/.github/workflows/int-test-workflow.yml +++ b/.github/workflows/int-test-workflow.yml @@ -194,9 +194,9 @@ jobs: mkdir -p ./bin cp /snap/bin/kustomize ./bin/kustomize - name: Run Integration test - timeout-minutes: 240 + timeout-minutes: 270 env: - TEST_TIMEOUT: 225m + TEST_TIMEOUT: 255m TEST_S3_ACCESS_KEY_ID: ${{ vars.TEST_S3_ACCESS_KEY_ID }} TEST_S3_SECRET_ACCESS_KEY: ${{ secrets.TEST_S3_SECRET_ACCESS_KEY }} run: | diff --git a/pkg/splunk/enterprise/afwscheduler.go b/pkg/splunk/enterprise/afwscheduler.go index 17cfb0ce4..699a59f3e 100644 --- a/pkg/splunk/enterprise/afwscheduler.go +++ b/pkg/splunk/enterprise/afwscheduler.go @@ -1688,13 +1688,45 @@ func (shcPlaybookContext *SHCPlaybookContext) isBundlePushComplete(ctx context.C return false, err } - // Check if we did not get the desired output in the status file. There can be 2 scenarios - + // Check if we did not get the desired output in the status file. There can be 3 scenarios - // 1. stdOut is empty, which means bundle push is still in progress - // 2. stdOut has some other string other than the bundle push success message + // 2. stdOut contains only informational lines (e.g. the FIPS provider banner written to + // stderr by the Splunk CLI on FIPS-enabled clusters, captured via the &> shell redirect + // in applySHCBundleCmdStr before the actual push output is written) + // 3. stdOut has some other string other than the bundle push success message if stdOut == "" { scopedLog.Info("SHC Bundle Push is still in progress") return false, nil } else if !strings.Contains(stdOut, shcBundlePushCompleteStr) { + // Check whether the file contains only known informational lines. On FIPS-enabled + // clusters the Splunk binary immediately writes the FIPS provider banner (and SSL + // warnings) to stderr at startup; because the bundle push command uses &> to + // redirect all output to the status file, these lines appear in the file before the + // actual push result. Treat such content as "still in progress" so we do not + // prematurely abort a running push and trigger a retry storm. + // + // IMPORTANT: SSL certificate warnings ("WARNING: Server Certificate ...") are only + // treated as informational when the FIPS provider banner is also present. On non-FIPS + // clusters the Splunk CLI can also emit SSL warnings (e.g. when hostname validation is + // disabled), but if those warnings are the only content in the status file it means the + // push failed silently — we must fall through to error/retry rather than waiting forever. + hasFIPSContent := strings.Contains(stdOut, splunkFIPSProviderBannerStr) + hasMeaningfulContent := false + for _, line := range strings.Split(stdOut, "\n") { + trimmed := strings.TrimSpace(line) + if trimmed == "" || + strings.HasPrefix(trimmed, splunkFIPSProviderBannerStr) || + (hasFIPSContent && strings.HasPrefix(trimmed, splunkSSLCertWarnStr)) { + continue + } + hasMeaningfulContent = true + break + } + if !hasMeaningfulContent { + scopedLog.Info("SHC Bundle Push is still in progress (status file contains only informational messages)") + return false, nil + } + // this means there was an error in bundle push command err = fmt.Errorf("there was an error in applying SHC Bundle, err=\"%v\"", stdOut) scopedLog.Error(err, "SHC Bundle push status file reported an error while applying bundle") @@ -2078,7 +2110,16 @@ func handleEsappPostinstall(rctx context.Context, preCtx *premiumAppScopePlayboo streamOptions := splutil.NewStreamOptionsObject(command) stdOut, stdErr, err := preCtx.localCtx.podExecClient.RunPodExecCommand(rctx, streamOptions, []string{"/bin/sh"}) - if stdErr != "" || err != nil { + + // Log stderr content for debugging but don't use it for error detection. + // On FIPS-enabled clusters the Splunk CLI always writes the FIPS provider + // banner and related informational messages to stderr on every invocation, + // so a non-empty stderr does not indicate failure. + if stdErr != "" { + scopedLog.Info("Post install command stderr output (informational only)", "stdout", stdOut, "stderr", stdErr, "post install command", command) + } + + if err != nil { phaseInfo.FailCount++ scopedLog.Error(err, "premium scoped app package install failed", "stdout", stdOut, "stderr", stdErr, "post install command", command, "failCount", phaseInfo.FailCount) return fmt.Errorf("premium scoped app package install failed. stdOut: %s, stdErr: %s, post install command: %s, failCount: %d", stdOut, stdErr, command, phaseInfo.FailCount) diff --git a/pkg/splunk/enterprise/afwscheduler_test.go b/pkg/splunk/enterprise/afwscheduler_test.go index 53d8eeef1..b055e5f59 100644 --- a/pkg/splunk/enterprise/afwscheduler_test.go +++ b/pkg/splunk/enterprise/afwscheduler_test.go @@ -4741,3 +4741,366 @@ func TestIsAppAlreadyInstalled(t *testing.T) { }) } } + +func TestSHCIsBundlePushComplete(t *testing.T) { + ctx := context.TODO() + cr := &enterpriseApi.SearchHeadCluster{ + TypeMeta: metav1.TypeMeta{ + Kind: "SearchHeadCluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "stack1", + Namespace: "test", + }, + } + + c := spltest.NewMockClient() + + catCmd := fmt.Sprintf("cat %s", shcBundlePushStatusCheckFile) + rmCmd := fmt.Sprintf("rm %s", shcBundlePushStatusCheckFile) + + tests := []struct { + name string + catStdOut string + catStdErr string + catErr error + expectsRemoval bool + removalStdErr string + expectedResult bool + expectedError bool + description string + }{ + { + name: "empty stdOut - bundle push still in progress", + catStdOut: "", + catStdErr: "", + catErr: nil, + expectsRemoval: false, + expectedResult: false, + expectedError: false, + description: "Empty status file means push still in progress", + }, + { + name: "FIPS provider banner only - treated as still in progress", + catStdOut: splunkFIPSProviderBannerStr, + catStdErr: "", + catErr: nil, + expectsRemoval: false, + expectedResult: false, + expectedError: false, + description: "Status file with only FIPS banner should not be treated as an error", + }, + { + name: "FIPS banner and WARNING lines only - treated as still in progress", + catStdOut: splunkFIPSProviderBannerStr + "\n" + splunkSSLCertWarnStr + " Validation Disabled\n", + catStdErr: "", + catErr: nil, + expectsRemoval: false, + expectedResult: false, + expectedError: false, + description: "Status file with FIPS banner and SSL warnings should not be treated as an error", + }, + { + name: "FIPS banner and blank lines only - treated as still in progress", + catStdOut: "\n" + splunkFIPSProviderBannerStr + "\n\n", + catStdErr: "", + catErr: nil, + expectsRemoval: false, + expectedResult: false, + expectedError: false, + description: "Blank lines alongside FIPS banner should still be treated as informational", + }, + { + name: "FIPS banner followed by real error content - treated as error", + catStdOut: splunkFIPSProviderBannerStr + "\nError applying bundle: permission denied", + catStdErr: "", + catErr: nil, + expectsRemoval: true, + removalStdErr: "", + expectedResult: false, + expectedError: true, + description: "Meaningful error content after FIPS banner should cause an error", + }, + { + name: "SSL WARNING only without FIPS banner - treated as error", + catStdOut: splunkSSLCertWarnStr + " Hostname Validation is disabled.", + catStdErr: "", + catErr: nil, + expectsRemoval: true, + removalStdErr: "", + expectedResult: false, + expectedError: true, + description: "SSL warning without FIPS banner means a silent failure on non-FIPS clusters; must not hang waiting for a push that already exited", + }, + { + name: "SSL WARNING only without FIPS banner (multiple lines) - treated as error", + catStdOut: splunkSSLCertWarnStr + " Hostname Validation is disabled.\n" + splunkSSLCertWarnStr + " Validation Disabled\n", + catStdErr: "", + catErr: nil, + expectsRemoval: true, + removalStdErr: "", + expectedResult: false, + expectedError: true, + description: "Multiple SSL warnings without FIPS banner must not suppress error detection on non-FIPS clusters", + }, + { + name: "meaningful error in stdOut - treated as error", + catStdOut: "Error while deploying apps", + catStdErr: "", + catErr: nil, + expectsRemoval: true, + removalStdErr: "", + expectedResult: false, + expectedError: true, + description: "Non-success, non-FIPS content is a real bundle push error", + }, + { + name: "stdErr from cat command - error", + catStdOut: "", + catStdErr: "cat: no such file or directory", + catErr: nil, + expectsRemoval: true, + removalStdErr: "", + expectedResult: false, + expectedError: true, + description: "Stderr from status file read indicates a failure", + }, + { + name: "exec error from cat command - error", + catStdOut: "", + catStdErr: "", + catErr: fmt.Errorf("pod exec failed"), + expectsRemoval: true, + removalStdErr: "", + expectedResult: false, + expectedError: true, + description: "Exec error when reading status file should propagate", + }, + { + name: "bundle push complete success string - complete", + catStdOut: shcBundlePushCompleteStr, + catStdErr: "", + catErr: nil, + expectsRemoval: true, + removalStdErr: "", + expectedResult: true, + expectedError: false, + description: "Status file with success string means push complete", + }, + { + name: "FIPS banner preceding success string - complete", + catStdOut: splunkFIPSProviderBannerStr + "\n" + shcBundlePushCompleteStr, + catStdErr: "", + catErr: nil, + expectsRemoval: true, + removalStdErr: "", + expectedResult: true, + expectedError: false, + description: "FIPS banner before success string should still be recognized as complete", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + appDeployContext := &enterpriseApi.AppDeploymentContext{ + BundlePushStatus: enterpriseApi.BundlePushTracker{ + BundlePushStage: enterpriseApi.BundlePushInProgress, + }, + } + afwPipeline := &AppInstallPipeline{ + appDeployContext: appDeployContext, + } + + mockPodExecClient := &spltest.MockPodExecClient{Cr: cr} + + podExecCmds := []string{catCmd} + mockReturnCtxts := []*spltest.MockPodExecReturnContext{ + {StdOut: tt.catStdOut, StdErr: tt.catStdErr, Err: tt.catErr}, + } + + if tt.expectsRemoval { + podExecCmds = append(podExecCmds, rmCmd) + mockReturnCtxts = append(mockReturnCtxts, &spltest.MockPodExecReturnContext{ + StdOut: "", + StdErr: tt.removalStdErr, + }) + } + + mockPodExecClient.AddMockPodExecReturnContexts(ctx, podExecCmds, mockReturnCtxts...) + + shcCtx := &SHCPlaybookContext{ + client: c, + cr: cr, + afwPipeline: afwPipeline, + targetPodName: "splunk-stack1-searchheadcluster-0", + podExecClient: mockPodExecClient, + } + + result, err := shcCtx.isBundlePushComplete(ctx) + + if tt.expectedError { + if err == nil { + t.Errorf("Expected error for %q but got none", tt.description) + } + } else { + if err != nil { + t.Errorf("Unexpected error for %q: %v", tt.description, err) + } + } + + if result != tt.expectedResult { + t.Errorf("Expected result %v but got %v for %q", tt.expectedResult, result, tt.description) + } + }) + } +} + +func TestHandleEsappPostinstallFipsAware(t *testing.T) { + ctx := context.TODO() + + cr := enterpriseApi.Standalone{ + TypeMeta: metav1.TypeMeta{ + Kind: "Standalone", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "stack1", + Namespace: "test", + }, + Spec: enterpriseApi.StandaloneSpec{ + AppFrameworkConfig: enterpriseApi.AppFrameworkSpec{ + AppSources: []enterpriseApi.AppSourceSpec{ + { + Name: "appSrc1", + AppSourceDefaultSpec: enterpriseApi.AppSourceDefaultSpec{ + Scope: enterpriseApi.ScopePremiumApps, + PremiumAppsProps: enterpriseApi.PremiumAppsProps{ + Type: enterpriseApi.PremiumAppsTypeEs, + }, + }, + }, + }, + }, + }, + } + + appSrcSpec := &cr.Spec.AppFrameworkConfig.AppSources[0] + // The command registered in the mock only needs a prefix since matching uses strings.Contains. + esPostInstallCmdPrefix := "/opt/splunk/bin/splunk search" + + tests := []struct { + name string + stdOut string + stdErr string + execErr error + expectedError bool + description string + }{ + { + name: "success with no stderr - no error", + stdOut: "Successfully installed", + stdErr: "", + execErr: nil, + expectedError: false, + description: "Clean success should return nil", + }, + { + name: "success with FIPS stderr - no error", + stdOut: "Successfully installed", + stdErr: splunkFIPSProviderBannerStr, + execErr: nil, + expectedError: false, + description: "Stderr content alone should not cause failure on FIPS-enabled clusters", + }, + { + name: "success with WARNING stderr - no error", + stdOut: "Successfully installed", + stdErr: splunkSSLCertWarnStr + " Validation Disabled", + execErr: nil, + expectedError: false, + description: "SSL warning in stderr alone should not cause failure", + }, + { + name: "exec error with no stderr - error", + stdOut: "", + stdErr: "", + execErr: fmt.Errorf("command terminated with exit code 1"), + expectedError: true, + description: "A real exec error must be surfaced", + }, + { + name: "exec error with FIPS stderr - error", + stdOut: "", + stdErr: splunkFIPSProviderBannerStr, + execErr: fmt.Errorf("essinstall failed"), + expectedError: true, + description: "Exec error takes precedence even when stderr carries only FIPS banner", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mockPodExecClient := &spltest.MockPodExecClient{} + + mockPodExecClient.AddMockPodExecReturnContext(ctx, esPostInstallCmdPrefix, &spltest.MockPodExecReturnContext{ + StdOut: tt.stdOut, + StdErr: tt.stdErr, + Err: tt.execErr, + }) + + var replicas int32 = 1 + sts := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "splunk-stack1", + Namespace: "test", + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: &replicas, + }, + } + + c := spltest.NewMockClient() + var client splcommon.ControllerClient = getConvertedClient(c) + var waiter sync.WaitGroup + + localInstallCtxt := &localScopePlaybookContext{ + worker: &PipelineWorker{ + appSrcName: appSrcSpec.Name, + targetPodName: "splunk-stack1-standalone-0", + sts: sts, + cr: &cr, + appDeployInfo: &enterpriseApi.AppDeploymentInfo{ + AppName: "app1.tgz", + ObjectHash: "abcdef12345", + AuxPhaseInfo: make([]enterpriseApi.PhaseInfo, 1), + }, + afwConfig: &cr.Spec.AppFrameworkConfig, + client: client, + waiter: &waiter, + }, + sem: make(chan struct{}, 1), + podExecClient: mockPodExecClient, + } + + pCtx := premiumAppScopePlaybookContext{ + localCtx: localInstallCtxt, + client: client, + appSrcSpec: appSrcSpec, + cr: &cr, + afwPipeline: &AppInstallPipeline{}, + } + + phaseInfo := &enterpriseApi.PhaseInfo{} + err := handleEsappPostinstall(ctx, &pCtx, phaseInfo) + + if tt.expectedError { + if err == nil { + t.Errorf("Expected error for %q but got none", tt.description) + } + } else { + if err != nil { + t.Errorf("Unexpected error for %q: %v", tt.description, err) + } + } + }) + } +} diff --git a/pkg/splunk/enterprise/names.go b/pkg/splunk/enterprise/names.go index e49782f59..dd232b821 100644 --- a/pkg/splunk/enterprise/names.go +++ b/pkg/splunk/enterprise/names.go @@ -112,6 +112,19 @@ const ( shcBundlePushStatusCheckFile = "/operator-staging/appframework/.shcluster_bundle_status.txt" + // splunkFIPSProviderBannerStr is the line written to stderr by the Splunk CLI at + // startup on FIPS-enabled clusters. Because the bundle push command redirects all + // output (&>) to the status file, this banner can appear in the file before the + // actual push result. + splunkFIPSProviderBannerStr = "FIPS provider enabled." + + // splunkSSLCertWarnStr is the prefix of SSL certificate-related warnings emitted + // by the Splunk CLI to stderr. On FIPS-enabled clusters these appear alongside the + // FIPS banner and must be treated as informational. On non-FIPS clusters an SSL + // warning without a FIPS banner indicates a silent failure and should not suppress + // error detection. + splunkSSLCertWarnStr = "WARNING: Server Certificate" + applyIdxcBundleCmdStr = "/opt/splunk/bin/splunk apply cluster-bundle -auth admin:`cat /mnt/splunk-secrets/password` --skip-validation --answer-yes" idxcShowClusterBundleStatusStr = "/opt/splunk/bin/splunk show cluster-bundle-status -auth admin:`cat /mnt/splunk-secrets/password`" diff --git a/test/appframework_aws/c3/appframework_aws_suite_test.go b/test/appframework_aws/c3/appframework_aws_suite_test.go index aa1dde42d..254ce3069 100644 --- a/test/appframework_aws/c3/appframework_aws_suite_test.go +++ b/test/appframework_aws/c3/appframework_aws_suite_test.go @@ -61,7 +61,7 @@ func TestBasic(t *testing.T) { RegisterFailHandler(Fail) sc, _ := GinkgoConfiguration() - sc.Timeout = 240 * time.Minute + sc.Timeout = 270 * time.Minute RunSpecs(t, "Running "+testSuiteName, sc) } diff --git a/test/appframework_aws/m4/appframework_aws_suite_test.go b/test/appframework_aws/m4/appframework_aws_suite_test.go index aa21c7084..5047478d2 100644 --- a/test/appframework_aws/m4/appframework_aws_suite_test.go +++ b/test/appframework_aws/m4/appframework_aws_suite_test.go @@ -56,7 +56,7 @@ func TestBasic(t *testing.T) { RegisterFailHandler(Fail) sc, _ := GinkgoConfiguration() - sc.Timeout = 240 * time.Minute + sc.Timeout = 270 * time.Minute RunSpecs(t, "Running "+testSuiteName, sc) } diff --git a/test/monitoring_console/monitoring_console_suite_test.go b/test/monitoring_console/monitoring_console_suite_test.go index 83bf2060d..6746766ab 100644 --- a/test/monitoring_console/monitoring_console_suite_test.go +++ b/test/monitoring_console/monitoring_console_suite_test.go @@ -45,7 +45,7 @@ func TestBasic(t *testing.T) { RegisterFailHandler(Fail) sc, _ := GinkgoConfiguration() - sc.Timeout = 240 * time.Minute + sc.Timeout = 270 * time.Minute RunSpecs(t, "Running "+testSuiteName, sc) }