diff --git a/cmd/thv-operator/api/v1alpha1/mcpserver_types.go b/cmd/thv-operator/api/v1alpha1/mcpserver_types.go
index dd665f4250..c2ff2f9a53 100644
--- a/cmd/thv-operator/api/v1alpha1/mcpserver_types.go
+++ b/cmd/thv-operator/api/v1alpha1/mcpserver_types.go
@@ -147,6 +147,18 @@ const (
 	ConditionReasonSessionStorageNotApplicable = "SessionStorageWarningNotApplicable"
 )
 
+// ConditionRateLimitConfigValid indicates whether the rate limit configuration is valid.
+const ConditionRateLimitConfigValid = "RateLimitConfigValid"
+
+const (
+	// ConditionReasonRateLimitConfigValid indicates the rate limit configuration is valid.
+	ConditionReasonRateLimitConfigValid = "RateLimitConfigValid"
+	// ConditionReasonRateLimitPerUserRequiresAuth indicates perUser rate limiting requires authentication.
+	ConditionReasonRateLimitPerUserRequiresAuth = "PerUserRequiresAuth"
+	// ConditionReasonRateLimitNotApplicable indicates rate limiting is not configured.
+	ConditionReasonRateLimitNotApplicable = "RateLimitNotApplicable"
+)
+
 // SessionStorageProviderRedis is the provider name for Redis-backed session storage.
 const SessionStorageProviderRedis = "redis"
 
@@ -155,6 +167,8 @@ const SessionStorageProviderRedis = "redis"
 // +kubebuilder:validation:XValidation:rule="!(has(self.oidcConfig) && has(self.oidcConfigRef))",message="oidcConfig and oidcConfigRef are mutually exclusive; use oidcConfigRef to reference a shared MCPOIDCConfig"
 // +kubebuilder:validation:XValidation:rule="!(has(self.telemetry) && has(self.telemetryConfigRef))",message="telemetry and telemetryConfigRef are mutually exclusive; migrate to telemetryConfigRef"
 // +kubebuilder:validation:XValidation:rule="!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider == 'redis')",message="rateLimiting requires sessionStorage with provider 'redis'"
+// +kubebuilder:validation:XValidation:rule="!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) || has(self.oidcConfig) || has(self.oidcConfigRef) || has(self.externalAuthConfigRef)",message="rateLimiting.perUser requires authentication (oidcConfig, oidcConfigRef, or externalAuthConfigRef)"
+// +kubebuilder:validation:XValidation:rule="!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t, !has(t.perUser)) || has(self.oidcConfig) || has(self.oidcConfigRef) || has(self.externalAuthConfigRef)",message="per-tool perUser rate limiting requires authentication (oidcConfig, oidcConfigRef, or externalAuthConfigRef)"
 //
 //nolint:lll // CEL validation rules exceed line length limit
 type MCPServerSpec struct {
@@ -488,16 +502,23 @@ type SessionStorageConfig struct {
 }
 
 // RateLimitConfig defines rate limiting configuration for an MCP server.
-// At least one of shared or tools must be configured.
+// At least one of shared, perUser, or tools must be configured.
 //
-// +kubebuilder:validation:XValidation:rule="has(self.shared) || (has(self.tools) && size(self.tools) > 0)",message="at least one of shared or tools must be configured"
+// +kubebuilder:validation:XValidation:rule="has(self.shared) || has(self.perUser) || (has(self.tools) && size(self.tools) > 0)",message="at least one of shared, perUser, or tools must be configured"
 //
 //nolint:lll // CEL validation rules exceed line length limit
 type RateLimitConfig struct {
-	// Shared defines a token bucket shared across all users for the entire server.
+	// Shared is a token bucket shared across all users for the entire server.
 	// +optional
 	Shared *RateLimitBucket `json:"shared,omitempty"`
 
+	// PerUser is a token bucket applied independently to each authenticated user
+	// at the server level. Requires authentication to be enabled.
+	// Each unique userID creates Redis keys that expire after 2x refillPeriod.
+	// Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys.
+	// +optional
+	PerUser *RateLimitBucket `json:"perUser,omitempty"`
+
 	// Tools defines per-tool rate limit overrides.
 	// Each entry applies additional rate limits to calls targeting a specific tool name.
 	// A request must pass both the server-level limit and the per-tool limit.
@@ -507,7 +528,8 @@ type RateLimitConfig struct {
 	Tools []ToolRateLimitConfig `json:"tools,omitempty"`
 }
 
-// RateLimitBucket defines a token bucket configuration.
+// RateLimitBucket defines a token bucket configuration with a maximum capacity
+// and a refill period. Used by both shared (global) and per-user rate limits.
 type RateLimitBucket struct {
 	// MaxTokens is the maximum number of tokens (bucket capacity).
 	// This is also the burst size: the maximum number of requests that can be served
@@ -524,15 +546,24 @@ type RateLimitBucket struct {
 }
 
 // ToolRateLimitConfig defines rate limits for a specific tool.
+// At least one of shared or perUser must be configured.
+//
+// +kubebuilder:validation:XValidation:rule="has(self.shared) || has(self.perUser)",message="at least one of shared or perUser must be configured"
+//
+//nolint:lll // kubebuilder marker exceeds line length
 type ToolRateLimitConfig struct {
 	// Name is the MCP tool name this limit applies to.
 	// +kubebuilder:validation:Required
 	// +kubebuilder:validation:MinLength=1
 	Name string `json:"name"`
 
-	// Shared defines a token bucket shared across all users for this specific tool.
-	// +kubebuilder:validation:Required
-	Shared *RateLimitBucket `json:"shared"`
+	// Shared token bucket for this specific tool.
+	// +optional
+	Shared *RateLimitBucket `json:"shared,omitempty"`
+
+	// PerUser token bucket configuration for this tool.
+	// +optional
+	PerUser *RateLimitBucket `json:"perUser,omitempty"`
 }
 
 // Permission profile types
diff --git a/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go b/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go
index 83408b7eb8..bd254deac7 100644
--- a/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go
+++ b/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go
@@ -2649,6 +2649,11 @@ func (in *RateLimitConfig) DeepCopyInto(out *RateLimitConfig) {
 		*out = new(RateLimitBucket)
 		**out = **in
 	}
+	if in.PerUser != nil {
+		in, out := &in.PerUser, &out.PerUser
+		*out = new(RateLimitBucket)
+		**out = **in
+	}
 	if in.Tools != nil {
 		in, out := &in.Tools, &out.Tools
 		*out = make([]ToolRateLimitConfig, len(*in))
@@ -3188,6 +3193,11 @@ func (in *ToolRateLimitConfig) DeepCopyInto(out *ToolRateLimitConfig) {
 		*out = new(RateLimitBucket)
 		**out = **in
 	}
+	if in.PerUser != nil {
+		in, out := &in.PerUser, &out.PerUser
+		*out = new(RateLimitBucket)
+		**out = **in
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ToolRateLimitConfig.
diff --git a/cmd/thv-operator/controllers/mcpserver_controller.go b/cmd/thv-operator/controllers/mcpserver_controller.go
index 29ddff9b2d..18192eaccb 100644
--- a/cmd/thv-operator/controllers/mcpserver_controller.go
+++ b/cmd/thv-operator/controllers/mcpserver_controller.go
@@ -196,9 +196,10 @@ func (r *MCPServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
 	// Validate CABundleRef if specified
 	r.validateCABundleRef(ctx, mcpServer)
 
-	// Validate stdio replica cap and session storage requirements
+	// Validate stdio replica cap, session storage, and rate limit config
 	r.validateStdioReplicaCap(ctx, mcpServer)
 	r.validateSessionStorageForReplicas(ctx, mcpServer)
+	r.validateRateLimitConfig(ctx, mcpServer)
 
 	// Validate PodTemplateSpec early - before other validations
 	// This ensures we fail fast if the spec is invalid
@@ -2289,6 +2290,61 @@ func (r *MCPServerReconciler) validateSessionStorageForReplicas(ctx context.Cont
 	}
 }
 
+// setRateLimitConfigCondition sets the RateLimitConfigValid status condition.
+func setRateLimitConfigCondition(mcpServer *mcpv1alpha1.MCPServer, status metav1.ConditionStatus, reason, message string) {
+	meta.SetStatusCondition(&mcpServer.Status.Conditions, metav1.Condition{
+		Type:               mcpv1alpha1.ConditionRateLimitConfigValid,
+		Status:             status,
+		Reason:             reason,
+		Message:            message,
+		ObservedGeneration: mcpServer.Generation,
+	})
+}
+
+// validateRateLimitConfig validates that per-user rate limiting has authentication enabled.
+// Sets the RateLimitConfigValid condition. This is defense-in-depth only; CEL admission
+// validation is the primary gate. Reconciliation continues even when the condition is False
+// because per-user buckets are silently skipped when userID is empty (graceful degradation).
+func (r *MCPServerReconciler) validateRateLimitConfig(ctx context.Context, mcpServer *mcpv1alpha1.MCPServer) {
+	rl := mcpServer.Spec.RateLimiting
+	if rl == nil {
+		setRateLimitConfigCondition(mcpServer, metav1.ConditionTrue,
+			mcpv1alpha1.ConditionReasonRateLimitNotApplicable,
+			"rate limiting is not configured")
+		if err := r.Status().Update(ctx, mcpServer); err != nil {
+			log.FromContext(ctx).Error(err, "Failed to update MCPServer status after rate limit validation")
+		}
+		return
+	}
+
+	authEnabled := mcpServer.Spec.OIDCConfig != nil ||
+		mcpServer.Spec.OIDCConfigRef != nil ||
+		mcpServer.Spec.ExternalAuthConfigRef != nil
+
+	hasPerUser := rl.PerUser != nil
+	if !hasPerUser {
+		for _, t := range rl.Tools {
+			if t.PerUser != nil {
+				hasPerUser = true
+				break
+			}
+		}
+	}
+
+	if hasPerUser && !authEnabled {
+		setRateLimitConfigCondition(mcpServer, metav1.ConditionFalse,
+			mcpv1alpha1.ConditionReasonRateLimitPerUserRequiresAuth,
+			"perUser rate limiting requires authentication to be enabled (oidcConfig, oidcConfigRef, or externalAuthConfigRef)")
+	} else {
+		setRateLimitConfigCondition(mcpServer, metav1.ConditionTrue,
+			mcpv1alpha1.ConditionReasonRateLimitConfigValid,
+			"rate limit configuration is valid")
+	}
+	if err := r.Status().Update(ctx, mcpServer); err != nil {
+		log.FromContext(ctx).Error(err, "Failed to update MCPServer status after rate limit validation")
+	}
+}
+
 // SetupWithManager sets up the controller with the Manager.
 func (r *MCPServerReconciler) SetupWithManager(mgr ctrl.Manager) error {
 	// Create a handler that maps MCPExternalAuthConfig changes to MCPServer reconciliation requests
diff --git a/cmd/thv-operator/controllers/mcpserver_replicas_test.go b/cmd/thv-operator/controllers/mcpserver_replicas_test.go
index 6d3ef53b5f..08d9326e98 100644
--- a/cmd/thv-operator/controllers/mcpserver_replicas_test.go
+++ b/cmd/thv-operator/controllers/mcpserver_replicas_test.go
@@ -979,3 +979,156 @@ func TestUpdateMCPServerStatusExcludesTerminatingPods(t *testing.T) {
 	assert.Equal(t, int32(2), updatedMCPServer.Status.ReadyReplicas,
 		"ReadyReplicas should exclude terminating pods")
 }
+
+func TestRateLimitConfigValidation(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name         string
+		spec         mcpv1alpha1.MCPServerSpec
+		expectStatus metav1.ConditionStatus
+		expectReason string
+	}{
+		{
+			name: "no-rate-limiting",
+			spec: mcpv1alpha1.MCPServerSpec{
+				Image:     "test-image:latest",
+				Transport: "sse",
+				ProxyPort: 8080,
+			},
+			expectStatus: metav1.ConditionTrue,
+			expectReason: mcpv1alpha1.ConditionReasonRateLimitNotApplicable,
+		},
+		{
+			name: "peruser-with-auth",
+			spec: mcpv1alpha1.MCPServerSpec{
+				Image:     "test-image:latest",
+				Transport: "sse",
+				ProxyPort: 8080,
+				SessionStorage: &mcpv1alpha1.SessionStorageConfig{
+					Provider: mcpv1alpha1.SessionStorageProviderRedis,
+					Address:  "redis:6379",
+				},
+				OIDCConfig: &mcpv1alpha1.OIDCConfigRef{Type: "kubernetes"},
+				RateLimiting: &mcpv1alpha1.RateLimitConfig{
+					PerUser: &mcpv1alpha1.RateLimitBucket{
+						MaxTokens:    100,
+						RefillPeriod: metav1.Duration{Duration: time.Minute},
+					},
+				},
+			},
+			expectStatus: metav1.ConditionTrue,
+			expectReason: mcpv1alpha1.ConditionReasonRateLimitConfigValid,
+		},
+		{
+			name: "peruser-without-auth",
+			spec: mcpv1alpha1.MCPServerSpec{
+				Image:     "test-image:latest",
+				Transport: "sse",
+				ProxyPort: 8080,
+				SessionStorage: &mcpv1alpha1.SessionStorageConfig{
+					Provider: mcpv1alpha1.SessionStorageProviderRedis,
+					Address:  "redis:6379",
+				},
+				RateLimiting: &mcpv1alpha1.RateLimitConfig{
+					PerUser: &mcpv1alpha1.RateLimitBucket{
+						MaxTokens:    100,
+						RefillPeriod: metav1.Duration{Duration: time.Minute},
+					},
+				},
+			},
+			expectStatus: metav1.ConditionFalse,
+			expectReason: mcpv1alpha1.ConditionReasonRateLimitPerUserRequiresAuth,
+		},
+		{
+			name: "per-tool-peruser-without-auth",
+			spec: mcpv1alpha1.MCPServerSpec{
+				Image:     "test-image:latest",
+				Transport: "sse",
+				ProxyPort: 8080,
+				SessionStorage: &mcpv1alpha1.SessionStorageConfig{
+					Provider: mcpv1alpha1.SessionStorageProviderRedis,
+					Address:  "redis:6379",
+				},
+				RateLimiting: &mcpv1alpha1.RateLimitConfig{
+					Tools: []mcpv1alpha1.ToolRateLimitConfig{
+						{
+							Name: "search",
+							PerUser: &mcpv1alpha1.RateLimitBucket{
+								MaxTokens:    10,
+								RefillPeriod: metav1.Duration{Duration: time.Minute},
+							},
+						},
+					},
+				},
+			},
+			expectStatus: metav1.ConditionFalse,
+			expectReason: mcpv1alpha1.ConditionReasonRateLimitPerUserRequiresAuth,
+		},
+		{
+			name: "shared-only-no-auth",
+			spec: mcpv1alpha1.MCPServerSpec{
+				Image:     "test-image:latest",
+				Transport: "sse",
+				ProxyPort: 8080,
+				SessionStorage: &mcpv1alpha1.SessionStorageConfig{
+					Provider: mcpv1alpha1.SessionStorageProviderRedis,
+					Address:  "redis:6379",
+				},
+				RateLimiting: &mcpv1alpha1.RateLimitConfig{
+					Shared: &mcpv1alpha1.RateLimitBucket{
+						MaxTokens:    1000,
+						RefillPeriod: metav1.Duration{Duration: time.Minute},
+					},
+				},
+			},
+			expectStatus: metav1.ConditionTrue,
+			expectReason: mcpv1alpha1.ConditionReasonRateLimitConfigValid,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+
+			name := "rl-" + tt.name
+			namespace := testNamespaceDefault
+
+			mcpServer := &mcpv1alpha1.MCPServer{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      name,
+					Namespace: namespace,
+				},
+				Spec: tt.spec,
+			}
+
+			testScheme := createTestScheme()
+			fakeClient := fake.NewClientBuilder().
+				WithScheme(testScheme).
+				WithObjects(mcpServer).
+				WithStatusSubresource(&mcpv1alpha1.MCPServer{}).
+				Build()
+
+			reconciler := newTestMCPServerReconciler(fakeClient, testScheme, kubernetes.PlatformKubernetes)
+
+			_, err := reconciler.Reconcile(t.Context(), ctrl.Request{
+				NamespacedName: types.NamespacedName{Name: name, Namespace: namespace},
+			})
+			require.NoError(t, err)
+
+			updated := &mcpv1alpha1.MCPServer{}
+			err = fakeClient.Get(t.Context(), types.NamespacedName{Name: name, Namespace: namespace}, updated)
+			require.NoError(t, err)
+
+			var found bool
+			for _, cond := range updated.Status.Conditions {
+				if cond.Type == mcpv1alpha1.ConditionRateLimitConfigValid {
+					found = true
+					assert.Equal(t, tt.expectStatus, cond.Status)
+					assert.Equal(t, tt.expectReason, cond.Reason)
+				}
+			}
+			assert.True(t, found, "ConditionRateLimitConfigValid condition should be set")
+		})
+	}
+}
diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml
index 4a7059b1fe..f111475c81 100644
--- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml
+++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml
@@ -502,8 +502,33 @@ spec:
                   RateLimiting defines rate limiting configuration for the MCP server.
                   Requires Redis session storage to be configured for distributed rate limiting.
                 properties:
+                  perUser:
+                    description: |-
+                      PerUser is a token bucket applied independently to each authenticated user
+                      at the server level. Requires authentication to be enabled.
+                      Each unique userID creates Redis keys that expire after 2x refillPeriod.
+                      Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys.
+                    properties:
+                      maxTokens:
+                        description: |-
+                          MaxTokens is the maximum number of tokens (bucket capacity).
+                          This is also the burst size: the maximum number of requests that can be served
+                          instantaneously before the bucket is depleted.
+                        format: int32
+                        minimum: 1
+                        type: integer
+                      refillPeriod:
+                        description: |-
+                          RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                          The effective refill rate is maxTokens / refillPeriod tokens per second.
+                          Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                        type: string
+                    required:
+                    - maxTokens
+                    - refillPeriod
+                    type: object
                   shared:
-                    description: Shared defines a token bucket shared across all users
+                    description: Shared is a token bucket shared across all users
                       for the entire server.
                     properties:
                       maxTokens:
@@ -530,17 +555,39 @@ spec:
                       Each entry applies additional rate limits to calls targeting a specific tool name.
                       A request must pass both the server-level limit and the per-tool limit.
                     items:
-                      description: ToolRateLimitConfig defines rate limits for a specific
-                        tool.
+                      description: |-
+                        ToolRateLimitConfig defines rate limits for a specific tool.
+                        At least one of shared or perUser must be configured.
                       properties:
                         name:
                           description: Name is the MCP tool name this limit applies
                             to.
                           minLength: 1
                           type: string
+                        perUser:
+                          description: PerUser token bucket configuration for this
+                            tool.
+                          properties:
+                            maxTokens:
+                              description: |-
+                                MaxTokens is the maximum number of tokens (bucket capacity).
+                                This is also the burst size: the maximum number of requests that can be served
+                                instantaneously before the bucket is depleted.
+                              format: int32
+                              minimum: 1
+                              type: integer
+                            refillPeriod:
+                              description: |-
+                                RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                              type: string
+                          required:
+                          - maxTokens
+                          - refillPeriod
+                          type: object
                         shared:
-                          description: Shared defines a token bucket shared across
-                            all users for this specific tool.
+                          description: Shared token bucket for this specific tool.
                           properties:
                             maxTokens:
                               description: |-
@@ -562,17 +609,19 @@ spec:
                           type: object
                       required:
                       - name
-                      - shared
                       type: object
+                      x-kubernetes-validations:
+                      - message: at least one of shared or perUser must be configured
+                        rule: has(self.shared) || has(self.perUser)
                     type: array
                     x-kubernetes-list-map-keys:
                     - name
                     x-kubernetes-list-type: map
                 type: object
                 x-kubernetes-validations:
-                - message: at least one of shared or tools must be configured
-                  rule: has(self.shared) || (has(self.tools) && size(self.tools) >
-                    0)
+                - message: at least one of shared, perUser, or tools must be configured
+                  rule: has(self.shared) || has(self.perUser) || (has(self.tools)
+                    && size(self.tools) > 0)
               replicas:
                 description: |-
                   Replicas is the desired number of proxy runner (thv run) pod replicas.
@@ -965,6 +1014,15 @@ spec:
             - message: rateLimiting requires sessionStorage with provider 'redis'
               rule: '!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider
                 == ''redis'')'
+            - message: rateLimiting.perUser requires authentication (oidcConfig, oidcConfigRef,
+                or externalAuthConfigRef)
+              rule: '!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) ||
+                has(self.oidcConfig) || has(self.oidcConfigRef) || has(self.externalAuthConfigRef)'
+            - message: per-tool perUser rate limiting requires authentication (oidcConfig,
+                oidcConfigRef, or externalAuthConfigRef)
+              rule: '!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t,
+                !has(t.perUser)) || has(self.oidcConfig) || has(self.oidcConfigRef)
+                || has(self.externalAuthConfigRef)'
           status:
             description: MCPServerStatus defines the observed state of MCPServer
             properties:
diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml
index a7232513e7..a00102f26f 100644
--- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml
+++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml
@@ -505,8 +505,33 @@ spec:
                   RateLimiting defines rate limiting configuration for the MCP server.
                   Requires Redis session storage to be configured for distributed rate limiting.
                 properties:
+                  perUser:
+                    description: |-
+                      PerUser is a token bucket applied independently to each authenticated user
+                      at the server level. Requires authentication to be enabled.
+                      Each unique userID creates Redis keys that expire after 2x refillPeriod.
+                      Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys.
+                    properties:
+                      maxTokens:
+                        description: |-
+                          MaxTokens is the maximum number of tokens (bucket capacity).
+                          This is also the burst size: the maximum number of requests that can be served
+                          instantaneously before the bucket is depleted.
+                        format: int32
+                        minimum: 1
+                        type: integer
+                      refillPeriod:
+                        description: |-
+                          RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                          The effective refill rate is maxTokens / refillPeriod tokens per second.
+                          Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                        type: string
+                    required:
+                    - maxTokens
+                    - refillPeriod
+                    type: object
                   shared:
-                    description: Shared defines a token bucket shared across all users
+                    description: Shared is a token bucket shared across all users
                       for the entire server.
                     properties:
                       maxTokens:
@@ -533,17 +558,39 @@ spec:
                       Each entry applies additional rate limits to calls targeting a specific tool name.
                       A request must pass both the server-level limit and the per-tool limit.
                     items:
-                      description: ToolRateLimitConfig defines rate limits for a specific
-                        tool.
+                      description: |-
+                        ToolRateLimitConfig defines rate limits for a specific tool.
+                        At least one of shared or perUser must be configured.
                       properties:
                         name:
                           description: Name is the MCP tool name this limit applies
                             to.
                           minLength: 1
                           type: string
+                        perUser:
+                          description: PerUser token bucket configuration for this
+                            tool.
+                          properties:
+                            maxTokens:
+                              description: |-
+                                MaxTokens is the maximum number of tokens (bucket capacity).
+                                This is also the burst size: the maximum number of requests that can be served
+                                instantaneously before the bucket is depleted.
+                              format: int32
+                              minimum: 1
+                              type: integer
+                            refillPeriod:
+                              description: |-
+                                RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                              type: string
+                          required:
+                          - maxTokens
+                          - refillPeriod
+                          type: object
                         shared:
-                          description: Shared defines a token bucket shared across
-                            all users for this specific tool.
+                          description: Shared token bucket for this specific tool.
                           properties:
                             maxTokens:
                               description: |-
@@ -565,17 +612,19 @@ spec:
                           type: object
                       required:
                       - name
-                      - shared
                       type: object
+                      x-kubernetes-validations:
+                      - message: at least one of shared or perUser must be configured
+                        rule: has(self.shared) || has(self.perUser)
                     type: array
                     x-kubernetes-list-map-keys:
                     - name
                     x-kubernetes-list-type: map
                 type: object
                 x-kubernetes-validations:
-                - message: at least one of shared or tools must be configured
-                  rule: has(self.shared) || (has(self.tools) && size(self.tools) >
-                    0)
+                - message: at least one of shared, perUser, or tools must be configured
+                  rule: has(self.shared) || has(self.perUser) || (has(self.tools)
+                    && size(self.tools) > 0)
               replicas:
                 description: |-
                   Replicas is the desired number of proxy runner (thv run) pod replicas.
@@ -968,6 +1017,15 @@ spec:
             - message: rateLimiting requires sessionStorage with provider 'redis'
               rule: '!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider
                 == ''redis'')'
+            - message: rateLimiting.perUser requires authentication (oidcConfig, oidcConfigRef,
+                or externalAuthConfigRef)
+              rule: '!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) ||
+                has(self.oidcConfig) || has(self.oidcConfigRef) || has(self.externalAuthConfigRef)'
+            - message: per-tool perUser rate limiting requires authentication (oidcConfig,
+                oidcConfigRef, or externalAuthConfigRef)
+              rule: '!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t,
+                !has(t.perUser)) || has(self.oidcConfig) || has(self.oidcConfigRef)
+                || has(self.externalAuthConfigRef)'
           status:
             description: MCPServerStatus defines the observed state of MCPServer
             properties:
diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md
index b9f5a49f18..26e5a2f47f 100644
--- a/docs/operator/crd-api.md
+++ b/docs/operator/crd-api.md
@@ -2905,7 +2905,8 @@ _Appears in:_
 
 
 
-RateLimitBucket defines a token bucket configuration.
+RateLimitBucket defines a token bucket configuration with a maximum capacity
+and a refill period. Used by both shared (global) and per-user rate limits.
 
 
 
@@ -2924,7 +2925,7 @@ _Appears in:_
 
 
 RateLimitConfig defines rate limiting configuration for an MCP server.
-At least one of shared or tools must be configured.
+At least one of shared, perUser, or tools must be configured.
 
 
 
@@ -2933,7 +2934,8 @@ _Appears in:_
 
 | Field | Description | Default | Validation |
 | --- | --- | --- | --- |
-| `shared` _[api.v1alpha1.RateLimitBucket](#apiv1alpha1ratelimitbucket)_ | Shared defines a token bucket shared across all users for the entire server. |  | Optional: \{\} <br /> |
+| `shared` _[api.v1alpha1.RateLimitBucket](#apiv1alpha1ratelimitbucket)_ | Shared is a token bucket shared across all users for the entire server. |  | Optional: \{\} <br /> |
+| `perUser` _[api.v1alpha1.RateLimitBucket](#apiv1alpha1ratelimitbucket)_ | PerUser is a token bucket applied independently to each authenticated user<br />at the server level. Requires authentication to be enabled.<br />Each unique userID creates Redis keys that expire after 2x refillPeriod.<br />Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. |  | Optional: \{\} <br /> |
 | `tools` _[api.v1alpha1.ToolRateLimitConfig](#apiv1alpha1toolratelimitconfig) array_ | Tools defines per-tool rate limit overrides.<br />Each entry applies additional rate limits to calls targeting a specific tool name.<br />A request must pass both the server-level limit and the per-tool limit. |  | Optional: \{\} <br /> |
 
 
@@ -3418,6 +3420,7 @@ _Appears in:_
 
 
 ToolRateLimitConfig defines rate limits for a specific tool.
+At least one of shared or perUser must be configured.
 
 
 
@@ -3427,7 +3430,8 @@ _Appears in:_
 | Field | Description | Default | Validation |
 | --- | --- | --- | --- |
 | `name` _string_ | Name is the MCP tool name this limit applies to. |  | MinLength: 1 <br />Required: \{\} <br /> |
-| `shared` _[api.v1alpha1.RateLimitBucket](#apiv1alpha1ratelimitbucket)_ | Shared defines a token bucket shared across all users for this specific tool. |  | Required: \{\} <br /> |
+| `shared` _[api.v1alpha1.RateLimitBucket](#apiv1alpha1ratelimitbucket)_ | Shared token bucket for this specific tool. |  | Optional: \{\} <br /> |
+| `perUser` _[api.v1alpha1.RateLimitBucket](#apiv1alpha1ratelimitbucket)_ | PerUser token bucket configuration for this tool. |  | Optional: \{\} <br /> |
 
 
 #### api.v1alpha1.URLSource
diff --git a/docs/server/docs.go b/docs/server/docs.go
index 2319396919..fdf70f16ec 100644
--- a/docs/server/docs.go
+++ b/docs/server/docs.go
@@ -45,7 +45,7 @@ const docTemplate = `{
                 "type": "object"
             },
             "github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket": {
-                "description": "Shared defines a token bucket shared across all users for this specific tool.\n+kubebuilder:validation:Required",
+                "description": "PerUser token bucket configuration for this tool.\n+optional",
                 "properties": {
                     "maxTokens": {
                         "description": "MaxTokens is the maximum number of tokens (bucket capacity).\nThis is also the burst size: the maximum number of requests that can be served\ninstantaneously before the bucket is depleted.\n+kubebuilder:validation:Required\n+kubebuilder:validation:Minimum=1",
@@ -60,6 +60,9 @@ const docTemplate = `{
             "github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitConfig": {
                 "description": "RateLimitConfig contains the CRD rate limiting configuration.\nWhen set, rate limiting middleware is added to the proxy middleware chain.",
                 "properties": {
+                    "perUser": {
+                        "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket"
+                    },
                     "shared": {
                         "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket"
                     },
@@ -80,6 +83,9 @@ const docTemplate = `{
                         "description": "Name is the MCP tool name this limit applies to.\n+kubebuilder:validation:Required\n+kubebuilder:validation:MinLength=1",
                         "type": "string"
                     },
+                    "perUser": {
+                        "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket"
+                    },
                     "shared": {
                         "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket"
                     }
diff --git a/docs/server/swagger.json b/docs/server/swagger.json
index e7aa68b69e..95e69cd871 100644
--- a/docs/server/swagger.json
+++ b/docs/server/swagger.json
@@ -38,7 +38,7 @@
                 "type": "object"
             },
             "github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket": {
-                "description": "Shared defines a token bucket shared across all users for this specific tool.\n+kubebuilder:validation:Required",
+                "description": "PerUser token bucket configuration for this tool.\n+optional",
                 "properties": {
                     "maxTokens": {
                         "description": "MaxTokens is the maximum number of tokens (bucket capacity).\nThis is also the burst size: the maximum number of requests that can be served\ninstantaneously before the bucket is depleted.\n+kubebuilder:validation:Required\n+kubebuilder:validation:Minimum=1",
@@ -53,6 +53,9 @@
             "github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitConfig": {
                 "description": "RateLimitConfig contains the CRD rate limiting configuration.\nWhen set, rate limiting middleware is added to the proxy middleware chain.",
                 "properties": {
+                    "perUser": {
+                        "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket"
+                    },
                     "shared": {
                         "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket"
                     },
@@ -73,6 +76,9 @@
                         "description": "Name is the MCP tool name this limit applies to.\n+kubebuilder:validation:Required\n+kubebuilder:validation:MinLength=1",
                         "type": "string"
                     },
+                    "perUser": {
+                        "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket"
+                    },
                     "shared": {
                         "$ref": "#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket"
                     }
diff --git a/docs/server/swagger.yaml b/docs/server/swagger.yaml
index e12b491388..6227788b23 100644
--- a/docs/server/swagger.yaml
+++ b/docs/server/swagger.yaml
@@ -33,8 +33,8 @@ components:
       type: object
     github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket:
       description: |-
-        Shared defines a token bucket shared across all users for this specific tool.
-        +kubebuilder:validation:Required
+        PerUser token bucket configuration for this tool.
+        +optional
       properties:
         maxTokens:
           description: |-
@@ -52,6 +52,8 @@ components:
         RateLimitConfig contains the CRD rate limiting configuration.
         When set, rate limiting middleware is added to the proxy middleware chain.
       properties:
+        perUser:
+          $ref: '#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket'
         shared:
           $ref: '#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket'
         tools:
@@ -75,6 +77,8 @@ components:
             +kubebuilder:validation:Required
             +kubebuilder:validation:MinLength=1
           type: string
+        perUser:
+          $ref: '#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket'
         shared:
           $ref: '#/components/schemas/github_com_stacklok_toolhive_cmd_thv-operator_api_v1alpha1.RateLimitBucket'
       type: object
diff --git a/pkg/ratelimit/limiter.go b/pkg/ratelimit/limiter.go
index 6a4415bb02..0ff06f412f 100644
--- a/pkg/ratelimit/limiter.go
+++ b/pkg/ratelimit/limiter.go
@@ -22,7 +22,7 @@ import (
 type Limiter interface {
 	// Allow checks whether a request is permitted.
 	// toolName is the MCP tool being called (empty for non-tool requests).
-	// userID is the authenticated user (reserved for #4550, currently no-op).
+	// userID is the authenticated user (empty for unauthenticated requests).
 	Allow(ctx context.Context, toolName, userID string) (*Decision, error)
 }
 
@@ -54,6 +54,14 @@ func NewLimiter(client redis.Cmdable, namespace, name string, crd *v1alpha1.Rate
 		l.serverBucket = b
 	}
 
+	if crd.PerUser != nil {
+		spec, err := newBucketSpec(namespace, name, crd.PerUser)
+		if err != nil {
+			return nil, fmt.Errorf("perUser bucket: %w", err)
+		}
+		l.perUserSpec = &spec
+	}
+
 	for _, t := range crd.Tools {
 		if t.Shared != nil {
 			b, err := newBucket(namespace, name, "shared:tool:"+t.Name, t.Shared)
@@ -65,24 +73,43 @@ func NewLimiter(client redis.Cmdable, namespace, name string, crd *v1alpha1.Rate
 			}
 			l.toolBuckets[t.Name] = b
 		}
+		if t.PerUser != nil {
+			spec, err := newBucketSpec(namespace, name, t.PerUser)
+			if err != nil {
+				return nil, fmt.Errorf("tool %q perUser bucket: %w", t.Name, err)
+			}
+			if l.perUserTools == nil {
+				l.perUserTools = make(map[string]bucketSpec)
+			}
+			l.perUserTools[t.Name] = spec
+		}
 	}
 
 	return l, nil
 }
 
+// bucketSpec holds deferred bucket parameters for per-user buckets that are
+// created on the fly in Allow() because the userID is not known at construction time.
+type bucketSpec struct {
+	namespace    string
+	serverName   string
+	maxTokens    int32
+	refillPeriod time.Duration
+}
+
 // limiter is the concrete implementation of Limiter.
 type limiter struct {
 	client       redis.Cmdable
-	serverBucket *bucket.TokenBucket            // nil when no global server limit
-	toolBuckets  map[string]*bucket.TokenBucket // tool name -> bucket
+	serverBucket *bucket.TokenBucket            // nil when no shared server limit
+	toolBuckets  map[string]*bucket.TokenBucket // tool name -> shared bucket
+	perUserSpec  *bucketSpec                    // nil when no server-level per-user limit
+	perUserTools map[string]bucketSpec          // tool name -> per-user bucket spec; nil when none
 }
 
 // Allow atomically checks all applicable rate limit buckets for the request.
 // Tokens are only consumed if ALL buckets have sufficient capacity, preventing
-// a rejected per-tool call from draining the server-level budget.
-func (l *limiter) Allow(ctx context.Context, toolName, _ string) (*Decision, error) {
-	// TODO(#4550): per-user rate limiting — currently ignored.
-
+// a rejected per-tool or per-user call from draining other budgets.
+func (l *limiter) Allow(ctx context.Context, toolName, userID string) (*Decision, error) {
 	// Collect applicable buckets in priority order.
 	var buckets []*bucket.TokenBucket
 	if l.serverBucket != nil {
@@ -94,6 +121,36 @@ func (l *limiter) Allow(ctx context.Context, toolName, _ string) (*Decision, err
 		}
 	}
 
+	// Per-user buckets are created on the fly because userID is request-scoped.
+	// bucket.New only allocates a struct — all state lives in Redis, so creating
+	// a new TokenBucket per request is safe (no local state to lose).
+	//
+	// Key prefixes deviate from RFC THV-0057 to prevent cross-type collisions:
+	// RFC uses "user:{userId}:tool:{toolName}" for both scopes, but a userID
+	// containing ":tool:" would collide with the per-tool key. Instead we use
+	// distinct prefixes: "user:" for server-level, "user-tool:" for tool-level.
+	if userID != "" {
+		if l.perUserSpec != nil {
+			s := l.perUserSpec
+			buckets = append(buckets, bucket.New(
+				s.namespace, s.serverName,
+				"user:"+userID,
+				s.maxTokens, s.refillPeriod,
+			))
+		}
+		if toolName != "" && l.perUserTools != nil {
+			if s, ok := l.perUserTools[toolName]; ok {
+				// Key prefix "user-tool:" is distinct from "user:" to prevent
+				// collisions when a userID contains delimiter characters.
+				buckets = append(buckets, bucket.New(
+					s.namespace, s.serverName,
+					"user-tool:"+toolName+":"+userID,
+					s.maxTokens, s.refillPeriod,
+				))
+			}
+		}
+	}
+
 	if len(buckets) == 0 {
 		return &Decision{Allowed: true}, nil
 	}
@@ -119,14 +176,38 @@ func (noopLimiter) Allow(context.Context, string, string) (*Decision, error) {
 	return &Decision{Allowed: true}, nil
 }
 
-// newBucket validates a CRD bucket spec and creates a TokenBucket.
-func newBucket(namespace, serverName, suffix string, b *v1alpha1.RateLimitBucket) (*bucket.TokenBucket, error) {
+// validateBucketCRD checks that a CRD bucket spec has valid parameters.
+func validateBucketCRD(b *v1alpha1.RateLimitBucket) (int32, time.Duration, error) {
 	if b.MaxTokens < 1 {
-		return nil, fmt.Errorf("maxTokens must be >= 1, got %d", b.MaxTokens)
+		return 0, 0, fmt.Errorf("maxTokens must be >= 1, got %d", b.MaxTokens)
 	}
 	d := b.RefillPeriod.Duration
 	if d <= 0 {
-		return nil, fmt.Errorf("refillPeriod must be positive, got %s", d)
+		return 0, 0, fmt.Errorf("refillPeriod must be positive, got %s", d)
+	}
+	return b.MaxTokens, d, nil
+}
+
+// newBucket validates a CRD bucket spec and creates a TokenBucket.
+func newBucket(namespace, serverName, suffix string, b *v1alpha1.RateLimitBucket) (*bucket.TokenBucket, error) {
+	maxTokens, refillPeriod, err := validateBucketCRD(b)
+	if err != nil {
+		return nil, err
+	}
+	return bucket.New(namespace, serverName, suffix, maxTokens, refillPeriod), nil
+}
+
+// newBucketSpec validates a CRD bucket spec and creates a deferred bucketSpec
+// for per-user buckets that are materialized at Allow() time.
+func newBucketSpec(namespace, serverName string, b *v1alpha1.RateLimitBucket) (bucketSpec, error) {
+	maxTokens, refillPeriod, err := validateBucketCRD(b)
+	if err != nil {
+		return bucketSpec{}, err
 	}
-	return bucket.New(namespace, serverName, suffix, b.MaxTokens, d), nil
+	return bucketSpec{
+		namespace:    namespace,
+		serverName:   serverName,
+		maxTokens:    maxTokens,
+		refillPeriod: refillPeriod,
+	}, nil
 }
diff --git a/pkg/ratelimit/limiter_test.go b/pkg/ratelimit/limiter_test.go
index ce68954cdf..a825303d99 100644
--- a/pkg/ratelimit/limiter_test.go
+++ b/pkg/ratelimit/limiter_test.go
@@ -4,7 +4,6 @@
 package ratelimit
 
 import (
-	"context"
 	"testing"
 	"time"
 
@@ -34,7 +33,7 @@ func TestNewLimiter_NilCRDReturnsNoop(t *testing.T) {
 	l, err := NewLimiter(client, "ns", "srv", nil)
 	require.NoError(t, err)
 
-	d, err := l.Allow(context.Background(), "anything", "user-a")
+	d, err := l.Allow(t.Context(), "anything", "user-a")
 	require.NoError(t, err)
 	assert.True(t, d.Allowed)
 }
@@ -74,7 +73,7 @@ func TestNewLimiter_ZeroDuration(t *testing.T) {
 func TestLimiter_ServerGlobalExhausted(t *testing.T) {
 	t.Parallel()
 	client, _ := newTestClient(t)
-	ctx := context.Background()
+	ctx := t.Context()
 
 	crd := &v1alpha1.RateLimitConfig{
 		Shared: &v1alpha1.RateLimitBucket{MaxTokens: 2, RefillPeriod: metav1.Duration{Duration: time.Minute}},
@@ -97,7 +96,7 @@ func TestLimiter_ServerGlobalExhausted(t *testing.T) {
 func TestLimiter_PerToolIsolation(t *testing.T) {
 	t.Parallel()
 	client, _ := newTestClient(t)
-	ctx := context.Background()
+	ctx := t.Context()
 
 	crd := &v1alpha1.RateLimitConfig{
 		Tools: []v1alpha1.ToolRateLimitConfig{
@@ -127,7 +126,7 @@ func TestLimiter_PerToolIsolation(t *testing.T) {
 func TestLimiter_ServerAndPerToolBothRequired(t *testing.T) {
 	t.Parallel()
 	client, _ := newTestClient(t)
-	ctx := context.Background()
+	ctx := t.Context()
 
 	crd := &v1alpha1.RateLimitConfig{
 		Shared: &v1alpha1.RateLimitBucket{MaxTokens: 5, RefillPeriod: metav1.Duration{Duration: time.Minute}},
@@ -170,32 +169,190 @@ func TestLimiter_RedisUnavailableReturnsError(t *testing.T) {
 
 	mr.Close()
 
-	_, err = l.Allow(context.Background(), "", "")
+	_, err = l.Allow(t.Context(), "", "")
 	assert.Error(t, err)
 }
 
-func TestLimiter_UserIDNoOp(t *testing.T) {
+func TestLimiter_PerUserServerLevel(t *testing.T) {
 	t.Parallel()
 	client, _ := newTestClient(t)
-	ctx := context.Background()
+	ctx := t.Context()
 
 	crd := &v1alpha1.RateLimitConfig{
-		Shared: &v1alpha1.RateLimitBucket{MaxTokens: 2, RefillPeriod: metav1.Duration{Duration: time.Minute}},
+		PerUser: &v1alpha1.RateLimitBucket{MaxTokens: 2, RefillPeriod: metav1.Duration{Duration: time.Minute}},
 	}
 	l, err := NewLimiter(client, "ns", "srv", crd)
 	require.NoError(t, err)
 
-	// Different users share the same global bucket (per-user not yet implemented).
+	// User A exhausts their 2 tokens.
+	for range 2 {
+		d, err := l.Allow(ctx, "", "user-a")
+		require.NoError(t, err)
+		require.True(t, d.Allowed)
+	}
 	d, err := l.Allow(ctx, "", "user-a")
 	require.NoError(t, err)
-	require.True(t, d.Allowed)
+	assert.False(t, d.Allowed)
+	assert.Greater(t, d.RetryAfter, time.Duration(0))
 
+	// User B is independent — still has full budget.
 	d, err = l.Allow(ctx, "", "user-b")
 	require.NoError(t, err)
+	assert.True(t, d.Allowed)
+}
+
+func TestLimiter_PerToolPerUserIsolation(t *testing.T) {
+	t.Parallel()
+	client, _ := newTestClient(t)
+	ctx := t.Context()
+
+	crd := &v1alpha1.RateLimitConfig{
+		Tools: []v1alpha1.ToolRateLimitConfig{
+			{
+				Name:    "search",
+				PerUser: &v1alpha1.RateLimitBucket{MaxTokens: 1, RefillPeriod: metav1.Duration{Duration: time.Minute}},
+			},
+		},
+	}
+	l, err := NewLimiter(client, "ns", "srv", crd)
+	require.NoError(t, err)
+
+	// User A uses their 1 token for "search".
+	d, err := l.Allow(ctx, "search", "user-a")
+	require.NoError(t, err)
 	require.True(t, d.Allowed)
 
-	// Third call from any user is rejected.
-	d, err = l.Allow(ctx, "", "user-c")
+	// User A rejected for "search".
+	d, err = l.Allow(ctx, "search", "user-a")
+	require.NoError(t, err)
+	assert.False(t, d.Allowed)
+
+	// User B can still use "search".
+	d, err = l.Allow(ctx, "search", "user-b")
+	require.NoError(t, err)
+	assert.True(t, d.Allowed)
+
+	// User A can use a different tool (no limit configured for "list").
+	d, err = l.Allow(ctx, "list", "user-a")
+	require.NoError(t, err)
+	assert.True(t, d.Allowed)
+}
+
+func TestLimiter_ServerAndToolPerUserBothRequired(t *testing.T) {
+	t.Parallel()
+	client, _ := newTestClient(t)
+	ctx := t.Context()
+
+	crd := &v1alpha1.RateLimitConfig{
+		PerUser: &v1alpha1.RateLimitBucket{MaxTokens: 5, RefillPeriod: metav1.Duration{Duration: time.Minute}},
+		Tools: []v1alpha1.ToolRateLimitConfig{
+			{
+				Name:    "search",
+				PerUser: &v1alpha1.RateLimitBucket{MaxTokens: 2, RefillPeriod: metav1.Duration{Duration: time.Minute}},
+			},
+		},
+	}
+	l, err := NewLimiter(client, "ns", "srv", crd)
+	require.NoError(t, err)
+
+	// User A makes 2 "search" calls — both pass.
+	for range 2 {
+		d, err := l.Allow(ctx, "search", "user-a")
+		require.NoError(t, err)
+		require.True(t, d.Allowed)
+	}
+
+	// Third "search" rejected by per-tool per-user limit (server per-user still has 3).
+	d, err := l.Allow(ctx, "search", "user-a")
+	require.NoError(t, err)
+	assert.False(t, d.Allowed)
+
+	// "list" (no per-tool limit) still allowed for user A.
+	d, err = l.Allow(ctx, "list", "user-a")
+	require.NoError(t, err)
+	assert.True(t, d.Allowed)
+}
+
+func TestLimiter_PerUserRejectionDoesNotDrainShared(t *testing.T) {
+	t.Parallel()
+	client, _ := newTestClient(t)
+	ctx := t.Context()
+
+	// Shared: 3 tokens, PerUser: 1 token.
+	// A noisy user hitting their per-user limit must not consume shared tokens.
+	crd := &v1alpha1.RateLimitConfig{
+		Shared:  &v1alpha1.RateLimitBucket{MaxTokens: 3, RefillPeriod: metav1.Duration{Duration: time.Minute}},
+		PerUser: &v1alpha1.RateLimitBucket{MaxTokens: 1, RefillPeriod: metav1.Duration{Duration: time.Minute}},
+	}
+	l, err := NewLimiter(client, "ns", "srv", crd)
+	require.NoError(t, err)
+
+	// User A: first call passes (shared=2, user-a=0).
+	d, err := l.Allow(ctx, "", "user-a")
+	require.NoError(t, err)
+	require.True(t, d.Allowed)
+
+	// User A: second call rejected by per-user limit. Shared must NOT be drained.
+	d, err = l.Allow(ctx, "", "user-a")
 	require.NoError(t, err)
 	assert.False(t, d.Allowed)
+
+	// Users B and C should each succeed (shared still has 2 tokens).
+	d, err = l.Allow(ctx, "", "user-b")
+	require.NoError(t, err)
+	assert.True(t, d.Allowed, "user-b should not be blocked — shared bucket should not have been drained by user-a's rejected request")
+
+	d, err = l.Allow(ctx, "", "user-c")
+	require.NoError(t, err)
+	assert.True(t, d.Allowed, "user-c should not be blocked — shared bucket should still have tokens")
+
+	// Now shared is exhausted (3 consumed: a, b, c). User D is rejected by shared.
+	d, err = l.Allow(ctx, "", "user-d")
+	require.NoError(t, err)
+	assert.False(t, d.Allowed, "user-d should be rejected — shared bucket is now exhausted")
+}
+
+func TestLimiter_RedisUnavailablePerUser(t *testing.T) {
+	t.Parallel()
+	client, mr := newTestClient(t)
+
+	crd := &v1alpha1.RateLimitConfig{
+		PerUser: &v1alpha1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}},
+	}
+	l, err := NewLimiter(client, "ns", "srv", crd)
+	require.NoError(t, err)
+
+	mr.Close()
+
+	_, err = l.Allow(t.Context(), "", "user-a")
+	assert.Error(t, err)
+}
+
+func TestNewLimiter_PerUserZeroMaxTokens(t *testing.T) {
+	t.Parallel()
+	client, _ := newTestClient(t)
+
+	crd := &v1alpha1.RateLimitConfig{
+		PerUser: &v1alpha1.RateLimitBucket{MaxTokens: 0, RefillPeriod: metav1.Duration{Duration: time.Minute}},
+	}
+	_, err := NewLimiter(client, "ns", "srv", crd)
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), "perUser bucket: maxTokens must be >= 1")
+}
+
+func TestNewLimiter_ToolPerUserZeroDuration(t *testing.T) {
+	t.Parallel()
+	client, _ := newTestClient(t)
+
+	crd := &v1alpha1.RateLimitConfig{
+		Tools: []v1alpha1.ToolRateLimitConfig{
+			{
+				Name:    "search",
+				PerUser: &v1alpha1.RateLimitBucket{MaxTokens: 5, RefillPeriod: metav1.Duration{Duration: 0}},
+			},
+		},
+	}
+	_, err := NewLimiter(client, "ns", "srv", crd)
+	assert.Error(t, err)
+	assert.Contains(t, err.Error(), `tool "search" perUser bucket: refillPeriod must be positive`)
 }