From 3eeb3ba288444c99ccbeda83864ece313c24d88c Mon Sep 17 00:00:00 2001
From: Amit Portnoy <1131991+amitport@users.noreply.github.com>
Date: Sat, 1 Mar 2025 08:19:57 +0200
Subject: [PATCH] max_seq_length should not be larger than any options

when loading an auto-model, max_seq_length is read directedly from huggingface and it cannot be overwritten easily.
---
 sentence_transformers/models/Transformer.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/sentence_transformers/models/Transformer.py b/sentence_transformers/models/Transformer.py
index a95f12d7c..65c59f119 100644
--- a/sentence_transformers/models/Transformer.py
+++ b/sentence_transformers/models/Transformer.py
@@ -88,16 +88,18 @@ def __init__(
             **tokenizer_args,
         )
 
-        # No max_seq_length set. Try to infer from model
+        max_seq_options = []
         if max_seq_length is None:
-            if (
-                hasattr(self.auto_model, "config")
-                and hasattr(self.auto_model.config, "max_position_embeddings")
-                and hasattr(self.tokenizer, "model_max_length")
-            ):
-                max_seq_length = min(self.auto_model.config.max_position_embeddings, self.tokenizer.model_max_length)
-
-        self.max_seq_length = max_seq_length
+            max_seq_options.append(max_seq_length)
+        if (
+            hasattr(self.auto_model, "config")
+            and hasattr(self.auto_model.config, "max_position_embeddings")
+        ):
+            max_seq_options.append(self.auto_model.config.max_position_embeddings)
+        if hasattr(self.tokenizer, "model_max_length"):
+            max_seq_options.append(self.tokenizer.model_max_length)
+        
+        self.max_seq_length = min(max_seq_options)
 
         if tokenizer_name_or_path is not None:
             self.auto_model.config.tokenizer_class = self.tokenizer.__class__.__name__