apache · bharos · Apr 4, 2026
diff --git a/api/src/main/java/org/apache/iceberg/expressions/StrictMetricsEvaluator.java b/api/src/main/java/org/apache/iceberg/expressions/StrictMetricsEvaluator.java
@@ -22,13 +22,15 @@
 
 import java.nio.ByteBuffer;
 import java.util.Collection;
+import java.util.Comparator;
 import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
 import org.apache.iceberg.ContentFile;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.expressions.ExpressionVisitors.BoundExpressionVisitor;
+import org.apache.iceberg.types.Comparators;
 import org.apache.iceberg.types.Conversions;
 import org.apache.iceberg.types.Types.StructType;
 import org.apache.iceberg.util.NaNUtil;
@@ -462,6 +464,43 @@ public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
 
     @Override
     public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
+      int id = ref.fieldId();
+      if (isNestedColumn(id)) {
+        return ROWS_MIGHT_NOT_MATCH;
+      }
+
+      if (canContainNulls(id)) {
+        return ROWS_MIGHT_NOT_MATCH;
+      }
+
+      String prefix = (String) lit.value();
+      Comparator<CharSequence> comparator = Comparators.charSequences();
+
+      if (lowerBounds != null
+          && lowerBounds.containsKey(id)
+          && upperBounds != null
+          && upperBounds.containsKey(id)) {
+        CharSequence lower = Conversions.fromByteBuffer(ref.type(), lowerBounds.get(id));
+        CharSequence upper = Conversions.fromByteBuffer(ref.type(), upperBounds.get(id));
+
+        // if lower is shorter than the prefix then lower doesn't start with the prefix
+        if (lower.length() < prefix.length()) {
+          return ROWS_MIGHT_NOT_MATCH;
+        }
+
+        if (comparator.compare(lower.subSequence(0, prefix.length()), prefix) == 0) {
+          // if upper is shorter than the prefix then upper can't start with the prefix
+          if (upper.length() < prefix.length()) {
+            return ROWS_MIGHT_NOT_MATCH;
+          }
+
+          if (comparator.compare(upper.subSequence(0, prefix.length()), prefix) == 0) {
+            // both bounds start with the prefix, so all rows must start with the prefix
+            return ROWS_MUST_MATCH;
+          }
+        }
+      }
+
       return ROWS_MIGHT_NOT_MATCH;
     }
 

diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java b/api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java
@@ -33,6 +33,7 @@
 import static org.apache.iceberg.expressions.Expressions.notNaN;
 import static org.apache.iceberg.expressions.Expressions.notNull;
 import static org.apache.iceberg.expressions.Expressions.or;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
 import static org.apache.iceberg.types.Conversions.toByteBuffer;
 import static org.apache.iceberg.types.Types.NestedField.optional;
 import static org.apache.iceberg.types.Types.NestedField.required;
@@ -684,4 +685,114 @@ SCHEMA, lessThanOrEqual("struct.nested_col_with_stats", INT_MAX_VALUE))
         new StrictMetricsEvaluator(SCHEMA, notNull("struct.nested_col_with_stats")).eval(FILE);
     assertThat(shouldRead).as("notNull nested column should not match").isFalse();
   }
+
+  // String-focused file: required column 3 has no nulls and string bounds ["abc", "abd"]
+  private static final DataFile STRING_FILE =
+      new TestDataFile(
+          "string_file.avro",
+          Row.of(),
+          50,
+          // any value counts, including nulls
+          ImmutableMap.of(3, 50L),
+          // null value counts
+          ImmutableMap.of(),
+          // nan value counts
+          null,
+          // lower bounds
+          ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")),
+          // upper bounds
+          ImmutableMap.of(3, toByteBuffer(StringType.get(), "abd")));
+
+  // String file with wider range: required column 3 has no nulls and bounds ["aa", "dC"]
+  private static final DataFile STRING_FILE_2 =
+      new TestDataFile(
+          "string_file_2.avro",
+          Row.of(),
+          50,
+          // any value counts, including nulls
+          ImmutableMap.of(3, 50L),
+          // null value counts
+          ImmutableMap.of(),
+          // nan value counts
+          null,
+          // lower bounds
+          ImmutableMap.of(3, toByteBuffer(StringType.get(), "aa")),
+          // upper bounds
+          ImmutableMap.of(3, toByteBuffer(StringType.get(), "dC")));
+
+  @Test
+  void testStartsWithBothBoundsMatchPrefix() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, startsWith("required", "ab")).eval(STRING_FILE);
+    assertThat(shouldRead).as("Should match: both bounds start with the prefix").isTrue();
+  }
+
+  @Test
+  void testStartsWithSingleCharPrefixBothBoundsMatch() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, startsWith("required", "a")).eval(STRING_FILE);
+    assertThat(shouldRead)
+        .as("Should match: both bounds start with the single char prefix")
+        .isTrue();
+  }
+
+  @Test
+  void testStartsWithOnlyLowerBoundMatchesPrefix() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, startsWith("required", "abc")).eval(STRING_FILE);
+    assertThat(shouldRead)
+        .as("Should not match: upper bound does not start with the prefix")
+        .isFalse();
+  }
+
+  @Test
+  void testStartsWithBoundsDoNotMatchPrefix() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, startsWith("required", "zzz")).eval(STRING_FILE);
+    assertThat(shouldRead).as("Should not match: no bounds start with the prefix").isFalse();
+  }
+
+  @Test
+  void testStartsWithWiderRange() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, startsWith("required", "a")).eval(STRING_FILE_2);
+    assertThat(shouldRead)
+        .as("Should not match: upper bound does not start with the prefix")
+        .isFalse();
+
+    shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, startsWith("required", "e")).eval(STRING_FILE_2);
+    assertThat(shouldRead).as("Should not match: no bounds start with the prefix").isFalse();
+  }
+
+  @Test
+  void testStartsWithNoStats() {
+    boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, startsWith("required", "a")).eval(FILE);
+    assertThat(shouldRead).as("Should not match: no bounds available for column").isFalse();
+  }
+
+  @Test
+  void testStartsWithAllNulls() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, startsWith("all_nulls", "a")).eval(FILE);
+    assertThat(shouldRead)
+        .as("Should not match: all null values do not satisfy startsWith")
+        .isFalse();
+  }
+
+  @Test
+  void testStartsWithSomeNulls() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, startsWith("some_nulls", "b")).eval(FILE_2);
+    assertThat(shouldRead)
+        .as("Should not match: some nulls means not all rows can satisfy startsWith")
+        .isFalse();
+  }
+
+  @Test
+  void testStartsWithPrefixLongerThanBounds() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, startsWith("required", "abcdef")).eval(STRING_FILE);
+    assertThat(shouldRead).as("Should not match: prefix is longer than the bounds").isFalse();
+  }
 }