Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions deployments/charts/quick-start/templates/mek-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,13 @@ spec:
exit 1
fi

# Generate a unique kid for this MEK to make key material mismatches
# detectable. A static kid (e.g. "key1") causes get_mek() to silently
# return the wrong key after MEK regeneration. See NVIDIA/OSMO#731.
KID="key-$(openssl rand -hex 8)"

# Create JWK JSON structure and encode it
JWK_JSON='{"k":"'$RANDOM_KEY'","kid":"key1","kty":"oct"}'
JWK_JSON='{"k":"'$RANDOM_KEY'","kid":"'$KID'","kty":"oct"}'
ENCODED_JWK=$(echo -n "$JWK_JSON" | base64 | tr -d '\n')

# Get current timestamp
Expand All @@ -70,9 +75,9 @@ spec:
data:
mek.yaml: |
# MEK generated $TIMESTAMP
currentMek: key1
currentMek: $KID
meks:
key1: $ENCODED_JWK
$KID: $ENCODED_JWK
EOF

echo "Generated MEK ConfigMap"
Expand Down
27 changes: 17 additions & 10 deletions deployments/scripts/deploy-k8s.sh
Original file line number Diff line number Diff line change
Expand Up @@ -270,24 +270,31 @@ create_secrets() {
$RUN_KUBECTL "delete secret redis-secret --namespace $OSMO_NAMESPACE --ignore-not-found=true"
$RUN_KUBECTL "create secret generic redis-secret --from-literal=redis-password=$REDIS_PASSWORD --namespace $OSMO_NAMESPACE"

# Generate and create MEK
log_info "Generating Master Encryption Key (MEK)..."
local random_key=$(openssl rand -base64 32 | tr -d '\n')
local jwk_json="{\"k\":\"$random_key\",\"kid\":\"key1\",\"kty\":\"oct\"}"
local encoded_jwk=$(echo -n "$jwk_json" | base64 | tr -d '\n')

local mek_manifest="apiVersion: v1
# Generate and create MEK (skip if already exists to avoid key material mismatch)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Take a look at deploy_service.rst to see if there are updates to the commands there

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated both docs/deployment_guide/getting_started/deploy_service.rst and docs/deployment_guide/appendix/deploy_minimal.rst to match the deploy script behavior:

  • Replaced all hardcoded "kid":"key1" with dynamic KID="key-$(openssl rand -hex 8)" generation
  • Updated ConfigMap YAML examples to use $KID variable
  • Added a .. note:: explaining why unique key IDs matter (detecting key material mismatches)
  • Added a .. tip:: warning not to re-create the MEK ConfigMap if it already exists

See commit ebd427b.

if $RUN_KUBECTL "get configmap mek-config -n $OSMO_NAMESPACE" >/dev/null 2>&1; then
log_info "MEK ConfigMap already exists, skipping generation"
else
log_info "Generating Master Encryption Key (MEK)..."
local random_key=$(openssl rand -base64 32 | tr -d '\n')
# Use a unique kid per generation to make key material mismatches
# detectable. See https://github.com/NVIDIA/OSMO/issues/731
local kid="key-$(openssl rand -hex 8)"
local jwk_json="{\"k\":\"$random_key\",\"kid\":\"$kid\",\"kty\":\"oct\"}"
local encoded_jwk=$(echo -n "$jwk_json" | base64 | tr -d '\n')

local mek_manifest="apiVersion: v1
kind: ConfigMap
metadata:
name: mek-config
namespace: $OSMO_NAMESPACE
data:
mek.yaml: |
currentMek: key1
currentMek: $kid
meks:
key1: $encoded_jwk"
$kid: $encoded_jwk"

$RUN_KUBECTL_APPLY_STDIN "$mek_manifest"
$RUN_KUBECTL_APPLY_STDIN "$mek_manifest"
fi

log_success "Secrets created"
}
Expand Down
7 changes: 4 additions & 3 deletions run/start_service_kind.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,10 @@ def _generate_mek() -> None:

random_key = base64.b64encode(secrets.token_bytes(32)).decode('utf-8')

kid = f'key-{secrets.token_hex(8)}'
jwk_json = {
'k': random_key,
'kid': 'key1',
'kid': kid,
'kty': 'oct'
}

Expand All @@ -149,9 +150,9 @@ def _generate_mek() -> None:
data:
mek.yaml: |
# MEK generated {time.strftime('%Y-%m-%d %H:%M:%S')}
currentMek: key1
currentMek: {kid}
meks:
key1: {encoded_jwk}
{kid}: {encoded_jwk}
"""

with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as temp_file:
Expand Down
39 changes: 38 additions & 1 deletion src/utils/connectors/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,19 @@ def get_dataset_configs(self) -> 'DatasetConfig':
def get_method(self) -> Optional[Literal['dev']]:
return self.config.method

@staticmethod
def _is_jwe_compact(value: str) -> bool:
"""Check if a string looks like a JWE compact serialization.

JWE compact format has 5 base64url segments separated by dots:
header.encryptedKey.iv.ciphertext.tag
The header is base64url-encoded JSON starting with '{"alg":' which
encodes to 'eyJ'.

This distinguishes JWE from JWS/JWT (3 dots) and plain JSON (0 dots).
"""
return isinstance(value, str) and value.startswith("eyJ") and value.count('.') == 4

def decrypt_credential(self, db_row) -> Dict:
result = {}
payload = PostgresConnector.decode_hstore(db_row.payload)
Expand All @@ -622,6 +635,14 @@ def decrypt_credential(self, db_row) -> Dict:
self.generate_update_secret_func(cmd, cmd_args))
result[key] = decrypted.value
except (JWException, osmo_errors.OSMONotFoundError):
if self._is_jwe_compact(value):
raise osmo_errors.OSMOServerError(
f"Cannot decrypt credential key '{key}' for user "
f"'{db_row.user_name}' with current MEK: key material "
f"mismatch. The MEK ConfigMap was likely recreated with "
f"new key material while encrypted data remains in the "
f"database. See https://github.com/NVIDIA/OSMO/issues/731"
)
result[key] = value
encrypted = self.secret_manager.encrypt(value, db_row.user_name)
cmd = (
Expand Down Expand Up @@ -2695,7 +2716,23 @@ def _decrypt(result_data: Any,
new_encrypted = new_encrypted_list[0]
return decrypted.value, new_encrypted
except (JWException, osmo_errors.OSMONotFoundError):
# Encrypt the plain text secret
if PostgresConnector._is_jwe_compact(secret):
# Value is already JWE-encrypted but cannot be decrypted
# with the current MEK. This happens when the MEK ConfigMap
# is regenerated with new key material. Raise rather than
# wrapping in another JWE layer (which causes exponential
# config growth) or returning ciphertext to application code.
# See https://github.com/NVIDIA/OSMO/issues/731
raise osmo_errors.OSMOServerError(
f"Cannot decrypt config key '{top_level_key}' with "
f"current MEK: key material mismatch. The MEK ConfigMap "
f"was likely recreated with new key material while "
f"encrypted data remains in the database. To recover, "
f"reset the affected config values to plaintext in the "
f"database and restart the service. "
f"See https://github.com/NVIDIA/OSMO/issues/731"
)
# Genuinely unencrypted plaintext — encrypt it
encrypted = postgres.secret_manager.encrypt(secret, '')
encrypt_keys.add(top_level_key)
return secret, encrypted.value
Expand Down