diff --git a/pkg/splunk/client/enterprise.go b/pkg/splunk/client/enterprise.go index 9291484cb..82f90ee33 100644 --- a/pkg/splunk/client/enterprise.go +++ b/pkg/splunk/client/enterprise.go @@ -22,7 +22,9 @@ import ( "fmt" "io" "net/http" + "net/url" "regexp" + "sort" "strconv" "strings" "time" @@ -682,6 +684,287 @@ type MCServerRolesInfo struct { type MCDistributedPeers struct { ClusterLabel []string `json:"cluster_label"` ServerRoles []string `json:"server_roles"` + PeerType string `json:"peerType"` +} + +// MCDistributedPeerEntry is the struct for a monitoring console distributed peer entry. +type MCDistributedPeerEntry struct { + Name string `json:"name"` + Content MCDistributedPeers `json:"content"` +} + +// GetMonitoringConsoleDistributedPeers returns the configured peers for the monitoring console. +func (c *SplunkClient) GetMonitoringConsoleDistributedPeers() ([]MCDistributedPeerEntry, error) { + apiResponse := struct { + Entry []MCDistributedPeerEntry `json:"entry"` + }{} + path := "/services/search/distributed/peers" + err := c.Get(path, &apiResponse) + if err != nil { + return nil, err + } + return apiResponse.Entry, nil +} + +// AddMonitoringConsolePeer adds a peer to the monitoring console distributed search peers. +func (c *SplunkClient) AddMonitoringConsolePeer(peerName string) error { + peerName = normalizeMonitoringConsolePeerName(peerName) + if peerName == "" { + return nil + } + + reqBody := url.Values{} + reqBody.Set("name", peerName) + reqBody.Set("remoteUsername", c.Username) + reqBody.Set("remotePassword", c.Password) + + endpoint := fmt.Sprintf("%s/services/search/distributed/peers", c.ManagementURI) + request, err := http.NewRequest("POST", endpoint, strings.NewReader(reqBody.Encode())) + if err != nil { + return err + } + request.Header.Set("Content-Type", "application/x-www-form-urlencoded") + + expectedStatus := []int{200, 201, 409} + return c.Do(request, expectedStatus, nil) +} + +// RemoveMonitoringConsolePeer removes a peer from the monitoring console distributed search peers. +func (c *SplunkClient) RemoveMonitoringConsolePeer(peerName string) error { + peerName = normalizeMonitoringConsolePeerName(peerName) + if peerName == "" { + return nil + } + + endpoint := fmt.Sprintf("%s/services/search/distributed/peers/%s", c.ManagementURI, url.PathEscape(peerName)) + request, err := http.NewRequest("DELETE", endpoint, nil) + if err != nil { + return err + } + + expectedStatus := []int{200, 404} + return c.Do(request, expectedStatus, nil) +} + +// SyncMonitoringConsoleConfig updates distributed peers and DMC groups without requiring an MC restart. +// If desiredConfiguredPeers is nil, peer add/remove reconciliation is skipped and only the DMC refresh runs. +func (c *SplunkClient) SyncMonitoringConsoleConfig(desiredConfiguredPeers []string) error { + apiResponseServerRoles, err := c.GetMonitoringconsoleServerRoles() + if err != nil { + return err + } + + distributedPeers, err := c.GetMonitoringConsoleDistributedPeers() + if err != nil { + return err + } + + if desiredConfiguredPeers != nil { + err = c.reconcileMonitoringConsolePeers(distributedPeers, desiredConfiguredPeers) + if err != nil { + return err + } + + distributedPeers, err = c.GetMonitoringConsoleDistributedPeers() + if err != nil { + return err + } + } + + return c.refreshMonitoringConsoleState(apiResponseServerRoles.ServerRoles, distributedPeers) +} + +func (c *SplunkClient) reconcileMonitoringConsolePeers(distributedPeers []MCDistributedPeerEntry, desiredConfiguredPeers []string) error { + desiredSet := make(map[string]struct{}) + desiredPeers := make([]string, 0, len(desiredConfiguredPeers)) + for _, peer := range desiredConfiguredPeers { + normalizedPeer := normalizeMonitoringConsolePeerName(peer) + if normalizedPeer == "" { + continue + } + if _, ok := desiredSet[normalizedPeer]; ok { + continue + } + desiredSet[normalizedPeer] = struct{}{} + desiredPeers = append(desiredPeers, normalizedPeer) + } + sort.Strings(desiredPeers) + + currentPeers := make(map[string]struct{}) + configuredPeers := make([]string, 0, len(distributedPeers)) + for _, peer := range distributedPeers { + normalizedPeer := normalizeMonitoringConsolePeerName(peer.Name) + if normalizedPeer == "" { + continue + } + currentPeers[normalizedPeer] = struct{}{} + if peer.Content.PeerType == "configured" { + configuredPeers = append(configuredPeers, normalizedPeer) + } + } + sort.Strings(configuredPeers) + + for _, peer := range desiredPeers { + if _, ok := currentPeers[peer]; ok { + continue + } + if err := c.AddMonitoringConsolePeer(peer); err != nil { + return err + } + } + + for _, peer := range configuredPeers { + if _, ok := desiredSet[peer]; ok { + continue + } + if err := c.RemoveMonitoringConsolePeer(peer); err != nil { + return err + } + } + + return nil +} + +func (c *SplunkClient) refreshMonitoringConsoleState(localServerRoles []string, distributedPeers []MCDistributedPeerEntry) error { + groupSpecs := []struct { + name string + defaultValue bool + serverRoles []string + }{ + {name: "dmc_group_cluster_master", defaultValue: false, serverRoles: []string{"cluster_manager", "cluster_master"}}, + {name: "dmc_group_indexer", defaultValue: true, serverRoles: []string{"indexer"}}, + {name: "dmc_group_deployment_server", defaultValue: true, serverRoles: []string{"deployment_server"}}, + {name: splcommon.LicenseManagerDMCGroup, defaultValue: false, serverRoles: []string{"license_manager", "license_master"}}, + {name: "dmc_group_search_head", defaultValue: false, serverRoles: []string{"cluster_search_head", "search_head", "shc_member", "shc_captain"}}, + } + + for _, spec := range groupSpecs { + members := buildMonitoringConsoleMembers(localServerRoles, distributedPeers, spec.serverRoles...) + err := c.UpdateDMCGroups(spec.name, buildMonitoringConsoleGroupBody(members, spec.defaultValue)) + if err != nil { + return err + } + } + + for _, groupName := range []string{"dmc_group_kv_store", "dmc_group_shc_deployer"} { + err := c.UpdateDMCGroups(groupName, buildMonitoringConsoleGroupBody(nil, false)) + if err != nil { + return err + } + } + + clusterLabelGroups := make(map[string][]string) + for _, peer := range distributedPeers { + for _, clusterLabel := range peer.Content.ClusterLabel { + if clusterLabel == "" { + continue + } + clusterLabelGroups[clusterLabel] = append(clusterLabelGroups[clusterLabel], peer.Name) + } + } + + clusterLabels := make([]string, 0, len(clusterLabelGroups)) + for clusterLabel := range clusterLabelGroups { + clusterLabels = append(clusterLabels, clusterLabel) + } + sort.Strings(clusterLabels) + + for _, clusterLabel := range clusterLabels { + members := clusterLabelGroups[clusterLabel] + sort.Strings(members) + err := c.UpdateDMCClusteringLabelGroup(clusterLabel, buildMonitoringConsoleClusterLabelBody(members)) + if err != nil { + return err + } + } + + apiResponseMCAssetTableBuild, err := c.GetMonitoringconsoleAssetTable() + if err != nil { + return err + } + err = c.PostMonitoringConsoleAssetTable(apiResponseMCAssetTableBuild) + if err != nil { + return err + } + + configuredPeers := make([]string, 0, len(distributedPeers)) + for _, peer := range distributedPeers { + configuredPeers = append(configuredPeers, peer.Name) + } + sort.Strings(configuredPeers) + + UISettingsObject, err := c.GetMonitoringConsoleUISettings() + if err != nil { + return err + } + err = c.UpdateLookupUISettings(strings.Join(configuredPeers, ","), UISettingsObject) + if err != nil { + return err + } + + return c.UpdateMonitoringConsoleApp() +} + +func buildMonitoringConsoleMembers(localServerRoles []string, distributedPeers []MCDistributedPeerEntry, expectedRoles ...string) []string { + members := make([]string, 0, len(distributedPeers)+1) + if hasAnyMonitoringConsoleServerRole(localServerRoles, expectedRoles...) { + members = append(members, "localhost:localhost") + } + for _, peer := range distributedPeers { + if hasAnyMonitoringConsoleServerRole(peer.Content.ServerRoles, expectedRoles...) { + members = append(members, peer.Name) + } + } + sort.Strings(members) + return members +} + +func buildMonitoringConsoleGroupBody(members []string, defaultValue bool) string { + if len(members) == 0 { + defaultValue = false + } + + reqBody := url.Values{} + for _, member := range members { + reqBody.Add("member", member) + } + reqBody.Set("default", strconv.FormatBool(defaultValue)) + return reqBody.Encode() +} + +func buildMonitoringConsoleClusterLabelBody(members []string) string { + reqBody := url.Values{} + for _, member := range members { + reqBody.Add("member", member) + } + return reqBody.Encode() +} + +func hasAnyMonitoringConsoleServerRole(serverRoles []string, expectedRoles ...string) bool { + for _, serverRole := range serverRoles { + for _, expectedRole := range expectedRoles { + if serverRole == expectedRole { + return true + } + } + } + return false +} + +func normalizeMonitoringConsolePeerName(peerName string) string { + peerName = strings.TrimSpace(peerName) + peerName = strings.TrimPrefix(peerName, "https://") + peerName = strings.TrimPrefix(peerName, "http://") + if idx := strings.Index(peerName, "/"); idx >= 0 { + peerName = peerName[:idx] + } + if peerName == "" { + return "" + } + if !strings.Contains(peerName, ":") { + peerName = peerName + ":8089" + } + return peerName } // AutomateMCApplyChanges change the state of new indexers from "New" to "Configured" and add them in monitoring console asset table diff --git a/pkg/splunk/client/enterprise_test.go b/pkg/splunk/client/enterprise_test.go index 2c902d537..cc38d6ffb 100644 --- a/pkg/splunk/client/enterprise_test.go +++ b/pkg/splunk/client/enterprise_test.go @@ -19,6 +19,7 @@ import ( "bytes" "context" "fmt" + "io" "net/http" "net/url" "strings" @@ -497,6 +498,39 @@ func TestUpdateDMCClusteringLabelGroup(t *testing.T) { splunkClientTester(t, "TestUpdateDMCClusteringLabelGroup", 201, "", wantRequest, test) } +func TestAddMonitoringConsolePeer(t *testing.T) { + wantRequest, _ := http.NewRequest("POST", "https://localhost:8089/services/search/distributed/peers", nil) + mockSplunkClient := &spltest.MockHTTPClient{} + mockSplunkClient.AddHandler(wantRequest, 201, "", nil) + + c := NewSplunkClient("https://localhost:8089", "admin", "p@ssw0rd") + c.Client = mockSplunkClient + + err := c.AddMonitoringConsolePeer("https://splunk-example-search-head-service") + if err != nil { + t.Fatalf("AddMonitoringConsolePeer() returned err=%v", err) + } + + mockSplunkClient.CheckRequests(t, "TestAddMonitoringConsolePeer") + gotBody, err := io.ReadAll(mockSplunkClient.GotRequests[0].Body) + if err != nil { + t.Fatalf("unable to read request body: %v", err) + } + + wantBody := "name=splunk-example-search-head-service%3A8089&remotePassword=p%40ssw0rd&remoteUsername=admin" + if string(gotBody) != wantBody { + t.Fatalf("AddMonitoringConsolePeer() body=%q; want %q", string(gotBody), wantBody) + } +} + +func TestRemoveMonitoringConsolePeer(t *testing.T) { + wantRequest, _ := http.NewRequest("DELETE", "https://localhost:8089/services/search/distributed/peers/splunk-example-search-head-service:8089", nil) + test := func(c SplunkClient) error { + return c.RemoveMonitoringConsolePeer("splunk-example-search-head-service") + } + splunkClientTester(t, "TestRemoveMonitoringConsolePeer", 200, "", wantRequest, test) +} + func TestGetMonitoringconsoleAssetTable(t *testing.T) { wantRequest, _ := http.NewRequest("GET", "https://localhost:8089/servicesNS/nobody/splunk_monitoring_console/saved/searches/DMC%20Asset%20-%20Build%20Full?count=0&output_mode=json", nil) wantDispatchBuckets := int64(0) diff --git a/pkg/splunk/enterprise/monitoringconsole.go b/pkg/splunk/enterprise/monitoringconsole.go index 0bbc54047..d7f24fdf5 100644 --- a/pkg/splunk/enterprise/monitoringconsole.go +++ b/pkg/splunk/enterprise/monitoringconsole.go @@ -25,6 +25,7 @@ import ( enterpriseApi "github.com/splunk/splunk-operator/api/v4" + splclient "github.com/splunk/splunk-operator/pkg/splunk/client" splcommon "github.com/splunk/splunk-operator/pkg/splunk/common" splctrl "github.com/splunk/splunk-operator/pkg/splunk/splkcontroller" splutil "github.com/splunk/splunk-operator/pkg/splunk/util" @@ -161,6 +162,12 @@ func ApplyMonitoringConsole(ctx context.Context, client splcommon.ControllerClie // no need to requeue if everything is ready if cr.Status.Phase == enterpriseApi.PhaseReady { + err = syncMonitoringConsoleRuntimeState(ctx, client, cr) + if err != nil { + eventPublisher.Warning(ctx, "syncMonitoringConsoleRuntimeState", fmt.Sprintf("sync monitoring console runtime state failed %s", err.Error())) + return result, err + } + finalResult := handleAppFrameworkActivity(ctx, client, cr, &cr.Status.AppContext, &cr.Spec.AppFrameworkConfig) result = *finalResult @@ -176,7 +183,6 @@ func ApplyMonitoringConsole(ctx context.Context, client splcommon.ControllerClie // getMonitoringConsoleStatefulSet returns a Kubernetes StatefulSet object for Splunk Enterprise monitoring console instances. func getMonitoringConsoleStatefulSet(ctx context.Context, client splcommon.ControllerClient, cr *enterpriseApi.MonitoringConsole) (*appsv1.StatefulSet, error) { // get generic statefulset for Splunk Enterprise objects - var monitoringConsoleConfigMap *corev1.ConfigMap configMap := GetSplunkMonitoringconsoleConfigMapName(cr.GetName(), SplunkMonitoringConsole) ss, err := getSplunkStatefulSet(ctx, client, cr, &cr.Spec.CommonSplunkSpec, SplunkMonitoringConsole, 1, []corev1.EnvVar{}) if err != nil { @@ -195,17 +201,86 @@ func getMonitoringConsoleStatefulSet(ctx context.Context, client splcommon.Contr //update podTemplate annotation with configMap resource version namespacedName := types.NamespacedName{Namespace: cr.GetNamespace(), Name: configMap} - monitoringConsoleConfigMap, err = splctrl.GetMCConfigMap(ctx, client, cr, namespacedName) + _, err = splctrl.GetMCConfigMap(ctx, client, cr, namespacedName) if err != nil { return nil, err } - ss.Spec.Template.ObjectMeta.Annotations[monitoringConsoleConfigRev] = monitoringConsoleConfigMap.ResourceVersion + // Keep the pod template stable so MC membership changes are applied over REST instead of forcing a pod recycle. + ss.Spec.Template.ObjectMeta.Annotations[monitoringConsoleConfigRev] = "" // Setup App framework staging volume for apps setupAppsStagingVolume(ctx, client, cr, &ss.Spec.Template, &cr.Spec.AppFrameworkConfig) return ss, nil } +var newMonitoringConsoleSplunkClient = splclient.NewSplunkClient + +func syncMonitoringConsoleRuntimeState(ctx context.Context, client splcommon.ControllerClient, cr *enterpriseApi.MonitoringConsole) error { + configMapName := GetSplunkMonitoringconsoleConfigMapName(cr.GetName(), SplunkMonitoringConsole) + namespacedName := types.NamespacedName{Namespace: cr.GetNamespace(), Name: configMapName} + + monitoringConsoleConfigMap, err := splctrl.GetMCConfigMap(ctx, client, cr, namespacedName) + if err != nil { + return err + } + + mcClient, err := getMonitoringConsoleClient(ctx, client, cr) + if err != nil { + return err + } + + return mcClient.SyncMonitoringConsoleConfig(getMonitoringConsoleDesiredPeers(monitoringConsoleConfigMap)) +} + +func getMonitoringConsoleClient(ctx context.Context, client splcommon.ControllerClient, cr *enterpriseApi.MonitoringConsole) (*splclient.SplunkClient, error) { + defaultSecretObjName := splcommon.GetNamespaceScopedSecretName(cr.GetNamespace()) + defaultSecret, err := splutil.GetSecretByName(ctx, client, cr.GetNamespace(), cr.GetName(), defaultSecretObjName) + if err != nil { + return nil, fmt.Errorf("could not access default secret object to fetch admin password: %w", err) + } + + adminPwd, foundSecret := defaultSecret.Data["password"] + if !foundSecret { + return nil, fmt.Errorf("could not find admin password while trying to sync the monitoring console") + } + + fqdnName := splcommon.GetServiceFQDN(cr.GetNamespace(), GetSplunkServiceName(SplunkMonitoringConsole, cr.GetName(), false)) + return newMonitoringConsoleSplunkClient(fmt.Sprintf("https://%s:8089", fqdnName), "admin", string(adminPwd)), nil +} + +func getMonitoringConsoleDesiredPeers(configMap *corev1.ConfigMap) []string { + desiredPeerSet := make(map[string]struct{}) + desiredPeers := make([]string, 0) + + for _, key := range []string{ + "SPLUNK_INDEXER_URL", + splcommon.ClusterManagerURL, + splcommon.LicenseManagerURL, + "SPLUNK_STANDALONE_URL", + "SPLUNK_SEARCH_HEAD_URL", + "SPLUNK_DEPLOYER_URL", + } { + if configMap == nil || configMap.Data == nil { + continue + } + + for _, peer := range strings.Split(configMap.Data[key], ",") { + peer = strings.TrimSpace(peer) + if peer == "" { + continue + } + if _, ok := desiredPeerSet[peer]; ok { + continue + } + desiredPeerSet[peer] = struct{}{} + desiredPeers = append(desiredPeers, peer) + } + } + + sort.Strings(desiredPeers) + return desiredPeers +} + // helper function to get the list of MonitoringConsole types in the current namespace func getMonitoringConsoleList(ctx context.Context, c splcommon.ControllerClient, cr splcommon.MetaObject, listOpts []rclient.ListOption) (enterpriseApi.MonitoringConsoleList, error) { reqLogger := log.FromContext(ctx) diff --git a/pkg/splunk/enterprise/monitoringconsole_test.go b/pkg/splunk/enterprise/monitoringconsole_test.go index af3eac5b4..8cbabcd22 100644 --- a/pkg/splunk/enterprise/monitoringconsole_test.go +++ b/pkg/splunk/enterprise/monitoringconsole_test.go @@ -17,6 +17,7 @@ import ( "context" "os" "path/filepath" + "reflect" "runtime/debug" "testing" "time" @@ -135,6 +136,34 @@ func TestApplyMonitoringConsole(t *testing.T) { } splunkDeletionTester(t, revised, deleteFunc) } + +func TestGetMonitoringConsoleDesiredPeers(t *testing.T) { + configMap := &corev1.ConfigMap{ + Data: map[string]string{ + "SPLUNK_SEARCH_HEAD_URL": "splunk-sh-1,splunk-sh-0", + "SPLUNK_SEARCH_HEAD_CAPTAIN_URL": "splunk-sh-0", + "SPLUNK_STANDALONE_URL": "splunk-standalone-0", + splcommon.ClusterManagerURL: "splunk-cluster-manager-service", + splcommon.LicenseManagerURL: "splunk-license-manager-service", + "SPLUNK_DEPLOYER_URL": "splunk-deployer-service", + }, + } + + got := getMonitoringConsoleDesiredPeers(configMap) + want := []string{ + "splunk-cluster-manager-service", + "splunk-deployer-service", + "splunk-license-manager-service", + "splunk-sh-0", + "splunk-sh-1", + "splunk-standalone-0", + } + + if !reflect.DeepEqual(got, want) { + t.Fatalf("getMonitoringConsoleDesiredPeers()=%v; want %v", got, want) + } +} + func TestApplyMonitoringConsoleEnvConfigMap(t *testing.T) { os.Setenv("SPLUNK_GENERAL_TERMS", "--accept-sgt-current-at-splunk-com") ctx := context.TODO() diff --git a/test/monitoring_console/manager_monitoring_console_test.go b/test/monitoring_console/manager_monitoring_console_test.go index 7e50c58a6..b63c073b9 100644 --- a/test/monitoring_console/manager_monitoring_console_test.go +++ b/test/monitoring_console/manager_monitoring_console_test.go @@ -16,7 +16,6 @@ package monitoringconsoletest import ( "context" "fmt" - "time" enterpriseApi "github.com/splunk/splunk-operator/api/v4" splcommon "github.com/splunk/splunk-operator/pkg/splunk/common" @@ -446,6 +445,8 @@ var _ = Describe("Monitoring Console test", func() { // Verify Monitoring Console is Ready and stays in ready state testenv.VerifyMonitoringConsoleReady(ctx, deployment, deployment.GetName(), mc, testcaseEnvInst) + mcPodName := fmt.Sprintf(testenv.MonitoringConsolePod, mcName) + mcPodStartTime := testenv.GetPodStartTime(testcaseEnvInst.GetName(), mcPodName) // get revision number of the resource resourceVersion := testenv.GetResourceVersion(ctx, deployment, testcaseEnvInst, mc) @@ -468,8 +469,6 @@ var _ = Describe("Monitoring Console test", func() { // Verify Monitoring Console is Ready and stays in ready state testenv.VerifyMonitoringConsoleReady(ctx, deployment, deployment.GetName(), mc, testcaseEnvInst) - time.Sleep(60 * time.Second) - // Check Cluster Manager in Monitoring Console Config Map testenv.VerifyPodsInMCConfigMap(ctx, deployment, testcaseEnvInst, []string{fmt.Sprintf(testenv.ClusterManagerServiceName, deployment.GetName())}, splcommon.ClusterManagerURL, mcName, true) @@ -480,15 +479,13 @@ var _ = Describe("Monitoring Console test", func() { shPods := testenv.GeneratePodNameSlice(testenv.SearchHeadPod, deployment.GetName(), defaultSHReplicas, false, 0) testenv.VerifyPodsInMCConfigMap(ctx, deployment, testcaseEnvInst, shPods, "SPLUNK_SEARCH_HEAD_URL", mcName, true) - // Add a sleep here in case MC pod restarts to add peers - time.Sleep(300 * time.Second) - // Check Monitoring console Pod is configured with all search head testenv.VerifyPodsInMCConfigString(ctx, deployment, testcaseEnvInst, shPods, mcName, true, false) // Check Monitoring console is configured with all Indexer in Name Space indexerPods := testenv.GeneratePodNameSlice(testenv.IndexerPod, deployment.GetName(), defaultIndexerReplicas, false, 0) testenv.VerifyPodsInMCConfigString(ctx, deployment, testcaseEnvInst, indexerPods, mcName, true, true) + testenv.VerifyPodDidNotReset(deployment, testcaseEnvInst, testcaseEnvInst.GetName(), mcPodName, mcPodStartTime) // Scale Search Head Cluster scaledSHReplicas := defaultSHReplicas + 1 @@ -586,6 +583,7 @@ var _ = Describe("Monitoring Console test", func() { testcaseEnvInst.Log.Info("Checking for Indexer Pod on MC after Scale Up") indexerPods = testenv.GeneratePodNameSlice(testenv.IndexerPod, deployment.GetName(), scaledIndexerReplicas, false, 0) testenv.VerifyPodsInMCConfigString(ctx, deployment, testcaseEnvInst, indexerPods, mcName, true, true) + testenv.VerifyPodDidNotReset(deployment, testcaseEnvInst, testcaseEnvInst.GetName(), mcPodName, mcPodStartTime) }) }) @@ -626,6 +624,8 @@ var _ = Describe("Monitoring Console test", func() { // Verify Monitoring Console is Ready and stays in ready state testenv.VerifyMonitoringConsoleReady(ctx, deployment, deployment.GetName(), mc, testcaseEnvInst) + mcPodName := fmt.Sprintf(testenv.MonitoringConsolePod, mcName) + mcPodStartTime := testenv.GetPodStartTime(testcaseEnvInst.GetName(), mcPodName) // get revision number of the resource resourceVersion := testenv.GetResourceVersion(ctx, deployment, testcaseEnvInst, mc) @@ -661,15 +661,13 @@ var _ = Describe("Monitoring Console test", func() { // Verify Monitoring Console is Ready and stays in ready state testenv.VerifyMonitoringConsoleReady(ctx, deployment, deployment.GetName(), mc, testcaseEnvInst) - // Adding a sleep, in case MC restarts to update peers list - time.Sleep(300 * time.Second) - // Check Monitoring console Pod is configured with all search head testenv.VerifyPodsInMCConfigString(ctx, deployment, testcaseEnvInst, shPods, mcName, true, false) // Check Monitoring console is configured with all Indexer in Name Space indexerPods := testenv.GeneratePodNameSlice(testenv.IndexerPod, deployment.GetName(), defaultIndexerReplicas, false, 0) testenv.VerifyPodsInMCConfigString(ctx, deployment, testcaseEnvInst, indexerPods, mcName, true, true) + testenv.VerifyPodDidNotReset(deployment, testcaseEnvInst, testcaseEnvInst.GetName(), mcPodName, mcPodStartTime) // ################# Update Monitoring Console In Cluster Manager CR ################################## @@ -704,6 +702,8 @@ var _ = Describe("Monitoring Console test", func() { // Verify Monitoring Console TWO is Ready and stays in ready state testenv.VerifyMonitoringConsoleReady(ctx, deployment, mcTwoName, mcTwo, testcaseEnvInst) + mcTwoPodName := fmt.Sprintf(testenv.MonitoringConsolePod, mcTwoName) + mcTwoPodStartTime := testenv.GetPodStartTime(testcaseEnvInst.GetName(), mcTwoPodName) // ########### VERIFY MONITORING CONSOLE TWO AFTER CLUSTER MANAGER RECONFIG ################################### @@ -786,6 +786,7 @@ var _ = Describe("Monitoring Console test", func() { // Check Monitoring console Two is configured with all Indexer in Name Space testcaseEnvInst.Log.Info("Checking for Indexer Pod on MC TWO after SHC Reconfig") testenv.VerifyPodsInMCConfigString(ctx, deployment, testcaseEnvInst, indexerPods, mcTwoName, true, true) + testenv.VerifyPodDidNotReset(deployment, testcaseEnvInst, testcaseEnvInst.GetName(), mcTwoPodName, mcTwoPodStartTime) // ############################ VERIFICATION FOR MONITORING CONSOLE ONE POST SHC RECONFIG ############################### @@ -798,7 +799,7 @@ var _ = Describe("Monitoring Console test", func() { // Check DEPLOYER Not in Monitoring Console One Config Map testcaseEnvInst.Log.Info("Verify DEPLOYER NOT in Monitoring Console One Config Map after SHC Reconfig") - testenv.VerifyPodsInMCConfigMap(ctx, deployment, testcaseEnvInst, []string{fmt.Sprintf(testenv.ClusterManagerServiceName, deployment.GetName())}, "SPLUNK_DEPLOYER_URL", mcName, false) + testenv.VerifyPodsInMCConfigMap(ctx, deployment, testcaseEnvInst, []string{fmt.Sprintf(testenv.DeployerServiceName, deployment.GetName())}, "SPLUNK_DEPLOYER_URL", mcName, false) // Verify all Search Head Members are Not configured on Monitoring Console One testcaseEnvInst.Log.Info("Verify Search Head Pods NOT on Monitoring Console ONE Config Map after SHC Reconfig") @@ -806,6 +807,7 @@ var _ = Describe("Monitoring Console test", func() { testcaseEnvInst.Log.Info("Verify Search Head Pods NOT on Monitoring Console ONE Pod after Search Head Reconfig") testenv.VerifyPodsInMCConfigString(ctx, deployment, testcaseEnvInst, shPods, mcName, false, false) + testenv.VerifyPodDidNotReset(deployment, testcaseEnvInst, testcaseEnvInst.GetName(), mcPodName, mcPodStartTime) // Check Monitoring console One is Not configured with all Indexer in Name Space // CSPL-619 diff --git a/test/monitoring_console/monitoring_console_test.go b/test/monitoring_console/monitoring_console_test.go index 4189e9ff1..01d2e962b 100644 --- a/test/monitoring_console/monitoring_console_test.go +++ b/test/monitoring_console/monitoring_console_test.go @@ -16,7 +16,6 @@ package monitoringconsoletest import ( "context" "fmt" - "time" . "github.com/onsi/ginkgo/v2" "github.com/onsi/ginkgo/v2/types" @@ -85,6 +84,8 @@ var _ = Describe("Monitoring Console test", func() { // Verify Monitoring Console is Ready and stays in ready state testenv.VerifyMonitoringConsoleReady(ctx, deployment, deployment.GetName(), mc, testcaseEnvInst) + mcPodName := fmt.Sprintf(testenv.MonitoringConsolePod, mcName) + mcPodStartTime := testenv.GetPodStartTime(testcaseEnvInst.GetName(), mcPodName) // get revision number of the resource resourceVersion := testenv.GetResourceVersion(ctx, deployment, testcaseEnvInst, mc) @@ -107,8 +108,6 @@ var _ = Describe("Monitoring Console test", func() { // Verify Monitoring Console is Ready and stays in ready state testenv.VerifyMonitoringConsoleReady(ctx, deployment, deployment.GetName(), mc, testcaseEnvInst) - time.Sleep(60 * time.Second) - // Check Cluster Master in Monitoring Console Config Map testenv.VerifyPodsInMCConfigMap(ctx, deployment, testcaseEnvInst, []string{fmt.Sprintf(testenv.ClusterMasterServiceName, deployment.GetName())}, "SPLUNK_CLUSTER_MASTER_URL", mcName, true) @@ -124,6 +123,7 @@ var _ = Describe("Monitoring Console test", func() { // Check Monitoring console is configured with all Indexer in Name Space indexerPods := testenv.GeneratePodNameSlice(testenv.IndexerPod, deployment.GetName(), defaultIndexerReplicas, false, 0) testenv.VerifyPodsInMCConfigString(ctx, deployment, testcaseEnvInst, indexerPods, mcName, true, true) + testenv.VerifyPodDidNotReset(deployment, testcaseEnvInst, testcaseEnvInst.GetName(), mcPodName, mcPodStartTime) // Scale Search Head Cluster scaledSHReplicas := defaultSHReplicas + 1 @@ -221,6 +221,7 @@ var _ = Describe("Monitoring Console test", func() { testcaseEnvInst.Log.Info("Checking for Indexer Pod on MC after Scale Up") indexerPods = testenv.GeneratePodNameSlice(testenv.IndexerPod, deployment.GetName(), scaledIndexerReplicas, false, 0) testenv.VerifyPodsInMCConfigString(ctx, deployment, testcaseEnvInst, indexerPods, mcName, true, true) + testenv.VerifyPodDidNotReset(deployment, testcaseEnvInst, testcaseEnvInst.GetName(), mcPodName, mcPodStartTime) }) }) diff --git a/test/testenv/mcutil.go b/test/testenv/mcutil.go index 36250ae96..69d9a1c2f 100644 --- a/test/testenv/mcutil.go +++ b/test/testenv/mcutil.go @@ -75,8 +75,8 @@ func CheckMCPodReady(ns string) bool { return stsReady && podReady } -// GetConfiguredPeers get list of Peers Configured on Montioring Console -func GetConfiguredPeers(ns string, mcName string) []string { +// GetConfiguredPeers gets the peers configured on the monitoring console. +func GetConfiguredPeers(ns string, mcName string) ([]string, error) { podName := fmt.Sprintf(MonitoringConsolePod, mcName) var peerList []string if len(podName) > 0 { @@ -85,6 +85,7 @@ func GetConfiguredPeers(ns string, mcName string) []string { if err != nil { cmd := fmt.Sprintf("kubectl exec -n %s %s -- cat %s", ns, podName, peerFile) logf.Log.Error(err, "Failed to execute command", "command", cmd) + return nil, err } for _, line := range strings.Split(string(output), "\n") { // Check for empty lines to prevent an error in logic below @@ -101,7 +102,7 @@ func GetConfiguredPeers(ns string, mcName string) []string { } } logf.Log.Info("Peer List found on MC Pod", "MC POD", podName, "Configured Peers", peerList) - return peerList + return peerList, nil } // DeleteMCPod delete monitoring console deployment @@ -116,10 +117,13 @@ func DeleteMCPod(ns string) { } } -// CheckPodNameOnMC Check given pod is configured on Monitoring console pod -func CheckPodNameOnMC(ns string, mcName string, podName string) bool { +// CheckPodNameOnMC checks whether the given pod is configured on the monitoring console pod. +func CheckPodNameOnMC(ns string, mcName string, podName string) (bool, error) { // Get Peers configured on Monitoring Console - peerList := GetConfiguredPeers(ns, mcName) + peerList, err := GetConfiguredPeers(ns, mcName) + if err != nil { + return false, err + } logf.Log.Info("Peer List", "instance", peerList) found := false for _, peer := range peerList { @@ -129,7 +133,7 @@ func CheckPodNameOnMC(ns string, mcName string, podName string) bool { break } } - return found + return found, nil } // GetPodIP returns IP address of a POD as a string diff --git a/test/testenv/util.go b/test/testenv/util.go index dfa68db3b..b1a1605cd 100644 --- a/test/testenv/util.go +++ b/test/testenv/util.go @@ -1168,18 +1168,36 @@ func GetPodsStartTime(ns string) map[string]time.Time { splunkPods := DumpGetPods(ns) for _, podName := range splunkPods { - output, _ := exec.Command("kubectl", "get", "pods", "-n", ns, podName, "-o", "json").Output() - restResponse := PodDetailsStruct{} - err := json.Unmarshal([]byte(output), &restResponse) - if err != nil { - logf.Log.Error(err, "Failed to parse splunk pods") - } - podStartTime, _ := time.Parse("2006-01-02T15:04:05Z", restResponse.Status.StartTime) - splunkPodsStartTime[podName] = podStartTime + splunkPodsStartTime[podName] = GetPodStartTime(ns, podName) } return splunkPodsStartTime } +// GetPodStartTime returns start time of a single pod. +func GetPodStartTime(ns string, podName string) time.Time { + output, err := exec.Command("kubectl", "get", "pods", "-n", ns, podName, "-o", "json").Output() + if err != nil { + cmd := fmt.Sprintf("kubectl get pods -n %s %s -o json", ns, podName) + logf.Log.Error(err, "Failed to execute command", "command", cmd) + return time.Time{} + } + + restResponse := PodDetailsStruct{} + err = json.Unmarshal([]byte(output), &restResponse) + if err != nil { + logf.Log.Error(err, "Failed to parse splunk pod") + return time.Time{} + } + + podStartTime, err := time.Parse(time.RFC3339Nano, restResponse.Status.StartTime) + if err != nil { + logf.Log.Error(err, "Failed to parse pod start time", "podName", podName, "startTime", restResponse.Status.StartTime) + return time.Time{} + } + + return podStartTime +} + // DeletePod Delete pod in the namespace func DeletePod(ns string, podName string) error { _, err := exec.Command("kubectl", "delete", "pod", "-n", ns, podName).Output() diff --git a/test/testenv/verificationutils.go b/test/testenv/verificationutils.go index cb611254d..7fb73a793 100644 --- a/test/testenv/verificationutils.go +++ b/test/testenv/verificationutils.go @@ -1084,30 +1084,67 @@ func WaitForAppInstall(ctx context.Context, deployment *Deployment, testenvInsta // VerifyPodsInMCConfigMap checks if given pod names are present in given KEY of given MC's Config Map func VerifyPodsInMCConfigMap(ctx context.Context, deployment *Deployment, testenvInstance *TestCaseEnv, pods []string, key string, mcName string, expected bool) { - // Get contents of MC config map - mcConfigMap, err := GetMCConfigMap(ctx, deployment, testenvInstance.GetName(), mcName) - gomega.Expect(err).To(gomega.Succeed(), "Unable to get MC config map") for _, podName := range pods { - testenvInstance.Log.Info("Checking for POD on MC Config Map", "POD Name", podName, "DATA", mcConfigMap.Data) - gomega.Expect(expected).To(gomega.Equal(CheckPodNameInString(podName, mcConfigMap.Data[key])), "Verify Pod in MC Config Map. Pod Name %s.", podName) + gomega.Eventually(func() error { + mcConfigMap, err := GetMCConfigMap(ctx, deployment, testenvInstance.GetName(), mcName) + if err != nil { + return err + } + + testenvInstance.Log.Info("Checking for POD on MC Config Map", "POD Name", podName, "KEY", key, "EXPECTED", expected, "DATA", mcConfigMap.Data) + found := CheckPodNameInString(podName, mcConfigMap.Data[key]) + if found != expected { + return fmt.Errorf("expected MC config map key %s for pod %s to be %t, got %t", key, podName, expected, found) + } + + return nil + }, deployment.GetTimeout(), PollInterval).Should(gomega.Succeed(), "Verify Pod in MC Config Map. Pod Name %s.", podName) } } // VerifyPodsInMCConfigString checks if given pod names are present in given KEY of given MC's Config Map func VerifyPodsInMCConfigString(ctx context.Context, deployment *Deployment, testenvInstance *TestCaseEnv, pods []string, mcName string, expected bool, checkPodIP bool) { for _, podName := range pods { - testenvInstance.Log.Info("Checking pod configured in MC POD Peers String", "Pod Name", podName) - var found bool - if checkPodIP { - podIP := GetPodIP(testenvInstance.GetName(), podName) - found = CheckPodNameOnMC(testenvInstance.GetName(), mcName, podIP) - } else { - found = CheckPodNameOnMC(testenvInstance.GetName(), mcName, podName) - } - gomega.Expect(expected).To(gomega.Equal(found), "Verify Pod in MC Config String. Pod Name %s.", podName) + gomega.Eventually(func() error { + testenvInstance.Log.Info("Checking pod configured in MC POD peers string", "Pod Name", podName, "EXPECTED", expected) + + var ( + found bool + err error + ) + if checkPodIP { + podIP := GetPodIP(testenvInstance.GetName(), podName) + if podIP == "" { + return fmt.Errorf("empty pod IP for pod %s", podName) + } + found, err = CheckPodNameOnMC(testenvInstance.GetName(), mcName, podIP) + } else { + found, err = CheckPodNameOnMC(testenvInstance.GetName(), mcName, podName) + } + if err != nil { + return err + } + + if found != expected { + return fmt.Errorf("expected MC configured peers for pod %s to be %t, got %t", podName, expected, found) + } + + return nil + }, deployment.GetTimeout(), PollInterval).Should(gomega.Succeed(), "Verify Pod in MC Config String. Pod Name %s.", podName) } } +// VerifyPodDidNotReset verifies a pod kept the same start time across an operation. +func VerifyPodDidNotReset(deployment *Deployment, testenvInstance *TestCaseEnv, ns string, podName string, podStartTime time.Time) { + gomega.Expect(podStartTime.IsZero()).To(gomega.Equal(false), "Pod start time was not captured. Pod Name %s.", podName) + + gomega.Eventually(func() bool { + currentPodStartTime := GetPodStartTime(ns, podName) + testenvInstance.Log.Info("Checking pod reset for Pod Name", "PodName", podName, "Current Pod Start Time", currentPodStartTime, "Previous Pod Start Time", podStartTime) + return currentPodStartTime.Equal(podStartTime) + }, deployment.GetTimeout(), PollInterval).Should(gomega.Equal(true), "Pod reset was detected. Pod Name %s.", podName) +} + // VerifyClusterManagerBundlePush verify that bundle push was pushed on all indexers func VerifyClusterManagerBundlePush(ctx context.Context, deployment *Deployment, testenvInstance *TestCaseEnv, ns string, replicas int, previousBundleHash string) { gomega.Eventually(func() bool { @@ -1169,9 +1206,6 @@ func VerifyNoPodReset(ctx context.Context, deployment *Deployment, testenvInstan // Get current Age on all splunk pods and compare with previous currentSplunkPodAge := GetPodsStartTime(ns) for podName, currentpodAge := range currentSplunkPodAge { - if strings.Contains(podName, "monitoring-console") { - continue - } // Only compare if the pod was present in previous pod iteration testenvInstance.Log.Info("Checking Pod reset for Pod Name", "PodName", podName, "Current Pod Age", currentpodAge) if _, ok := podStartTimeMap[podName]; ok {