facebookincubator · simonhollis · Mar 18, 2026 · Mar 18, 2026 · simonmar · Mar 19, 2026
diff --git a/glean/config/server/server_config.thrift b/glean/config/server/server_config.thrift
@@ -178,6 +178,13 @@ union SchemaLocation {
   5: string indexconfig;
 }
 
+// ACL filtering policy for query-time enforcement
+struct ACLPolicy {
+  // List of repository names where ACL filtering is enabled.
+  // Empty list = disabled for all repos.
+  1: list<RepoName> enabled_repos = [];
+}
+
 // Configeration for Glean Servers
 struct Config {
   1: DatabaseRetentionPolicy retention;
@@ -338,6 +345,12 @@ struct Config {
   // Default storage backend for newly created databases. Can be overriden
   // by command-line options. See also db_create_version.
   41: optional string db_create_storage;
+
+  // ACL filtering configuration. When enabled, query results are filtered
+  // based on user's group membership. Requires both JustKnobs
+  // (code_indexing/glean/check_acls) to be enabled AND the repo to be
+  // in the enabled_repos list.
+  42: optional ACLPolicy acl_policy;
 }
 
 // The following were automatically generated and may benefit from renaming.

diff --git a/glean/if/glean.thrift b/glean/if/glean.thrift
@@ -208,6 +208,13 @@ struct Batch {
 
   // The schema ID, which must match the schema ID of the DB
   7: optional SchemaId schema_id;
+
+  // ACL configuration for this batch.
+  // Maps file/directory paths to lists of ACL group ID strings.
+  // Each path maps to one or more group IDs (e.g., {"src/alpha": ["1"],
+  // "src/bravo": ["2", "3"]}).
+  // Must be provided if ACL is enabled for the database.
+  9: optional map<string, list<string>> (hs.type = "HashMap") acl_config;
 }
 
 struct Subst {
@@ -260,6 +267,14 @@ exception DatabaseNotIncomplete {
   1: DatabaseStatus status;
 }
 
+// Conflict when merging ACL configurations
+// Thrown when a batch provides an ACL mapping that conflicts with existing DB config
+exception ACLConfigConflict {
+  1: string message;
+  2: string conflicting_key;
+  3: string existing_value;
+  4: string new_value;
+}
 exception UnknownSchemaId {
   1: SchemaId schema_id;
 }
@@ -780,6 +795,7 @@ struct UserQueryResults {
 
   9: optional string type;
   // The inferred type of the query
+
 }
 
 // struct versions of exception types, needed because the
@@ -830,6 +846,9 @@ struct UserQueryClientInfo {
   // User making the query
   3: string application;
   // Name of program making the query.
+  4: optional list<string> acl_group_names;
+  // ACL group names for query-time filtering.
+  // Names are resolved to IDs server-side using the DB's name-to-ID mapping.
 }
 
 struct ListDatabases {
@@ -866,6 +885,13 @@ struct SendJsonBatch {
   // passed to finishBatch to check that the write has completed and
   // obtain the substitution.
   3: bool remember = false;
+
+  // ACL configuration for this batch.
+  // Maps file/directory paths to lists of ACL group ID strings.
+  // Each path maps to one or more group IDs (e.g., {"src/alpha": ["1"],
+  // "src/bravo": ["2", "3"]}).
+  // Must be provided if ACL is enabled for the database.
+  4: optional map<string, list<string>> (hs.type = "HashMap") acl_config;
 }
 
 struct SendJsonBatchResponse {

diff --git a/glean/rts/ownership/acl.cpp b/glean/rts/ownership/acl.cpp
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "glean/rts/ownership/acl.h"
+#include "glean/rts/timer.h"
+
+#include <folly/container/F14Map.h>
+#include <set>
+
+namespace facebook {
+namespace glean {
+namespace rts {
+
+namespace {
+
+// Create an OR-set from a list of UsetIds and add it to the Usets container.
+Uset* makeOrSet(Usets& usets, const std::vector<UsetId>& ids) {
+  CHECK(!ids.empty());
+  std::set<uint32_t> idSet(ids.begin(), ids.end());
+  auto entry = std::make_unique<Uset>(SetU32::from(idSet), Or, 1);
+  return usets.add(std::move(entry));
+}
+
+// Create an AND-set from a list of Uset pointers.
+// Each Uset represents one OR-level in the CNF.
+Uset* makeAndSet(Usets& usets, const std::vector<Uset*>& orSets) {
+  CHECK(!orSets.empty());
+  if (orSets.size() == 1) {
+    return orSets[0];
+  }
+  std::set<uint32_t> ids;
+  for (auto* orSet : orSets) {
+    usets.promote(orSet);
+    ids.insert(orSet->id);
+  }
+  auto entry = std::make_unique<Uset>(SetU32::from(ids), And, 1);
+  return usets.add(std::move(entry));
+}
+
+} // namespace
+
+void augmentOwnershipWithACL(
+    ComputedOwnership& ownership,
+    const std::vector<UnitACLAssignment>& assignments) {
+  if (assignments.empty()) {
+    return;
+  }
+
+  auto t = makeAutoTimer("augmentOwnershipWithACL");
+  auto& usets = ownership.sets_;
+  auto& facts = ownership.facts_;
+  const auto firstSetId = usets.getFirstId();
+
+  // Step 1: Build UnitId → ACL CNF Uset mapping
+  folly::F14FastMap<UnitId, Uset*> unitACLMap;
+  unitACLMap.reserve(assignments.size());
+
+  for (const auto& assignment : assignments) {
+    if (assignment.levels.empty()) {
+      continue;
+    }
+
+    std::vector<Uset*> orSets;
+    orSets.reserve(assignment.levels.size());
+    for (const auto& levelGroups : assignment.levels) {
+      if (levelGroups.empty()) {
+        continue;
+      }
+      auto* orSet = makeOrSet(usets, levelGroups);
+      orSets.push_back(orSet);
+    }
+
+    if (orSets.empty()) {
+      continue;
+    }
+
+    Uset* cnf = makeAndSet(usets, orSets);
+    unitACLMap[assignment.unitId] = cnf;
+  }
+
+  if (unitACLMap.empty()) {
+    VLOG(1) << "augmentOwnershipWithACL: no unit ACL assignments, skipping";
+    return;
+  }
+
+  VLOG(1) << "augmentOwnershipWithACL: " << unitACLMap.size()
+          << " units with ACL assignments";
+
+  // Promote all ACL CNF Usets so they have IDs.
+  for (auto& [unitId, cnf] : unitACLMap) {
+    usets.promote(cnf);
+  }
+
+  // Step 2: Build UsetId → Uset* index for resolving promoted sets.
+  //
+  // After computeOwnership, ALL fact owners are promoted Usets with
+  // IDs >= firstSetId — even single-unit facts get wrapped in an OR-set
+  // and promoted. The old code checked (ownerUsetId < firstSetId) which
+  // was NEVER true, so no facts were ever augmented. We must resolve
+  // promoted sets to their leaf UnitIds to find ACL matches.
+  folly::F14FastMap<UsetId, Uset*> idToUset;
+  usets.foreach([&](Uset* entry) {
+    if (entry->promoted()) {
+      idToUset[entry->id] = entry;
+    }
+  });
+
+  // Step 3: Walk facts_ and augment ownership.
+  // facts_ is a vector of (Id, UsetId) interval boundaries.
+  size_t augmented = 0;
+
+  // Cache: memoize owner UsetId → augmented UsetId to avoid duplicate work.
+  folly::F14FastMap<UsetId, UsetId> augmentCache;
+
+  for (auto& [factId, ownerUsetId] : facts) {
+    if (ownerUsetId == INVALID_USET) {
+      continue;
+    }
+
+    auto cacheIt = augmentCache.find(ownerUsetId);
+    if (cacheIt != augmentCache.end()) {
+      if (cacheIt->second != ownerUsetId) {
+        ownerUsetId = cacheIt->second;
+        ++augmented;
+      }
+      continue;
+    }
+
+    UsetId originalOwner = ownerUsetId;
+
+    // Collect unique ACL CNF IDs for leaf UnitIds in this owner's set.
+    std::set<uint32_t> aclCnfIds;
+
+    if (ownerUsetId < firstSetId) {
+      // Direct UnitId (rare after computeOwnership but handle it)
+      auto it = unitACLMap.find(ownerUsetId);
+      if (it != unitACLMap.end()) {
+        aclCnfIds.insert(it->second->id);
+      }
+    } else {
+      // Promoted set — resolve and examine leaf members.
+      // After computeOwnership, sets are flat OR-sets of UnitIds
+      // (no nested set references), so checking immediate members
+      // is sufficient.
+      auto usetIt = idToUset.find(ownerUsetId);
+      if (usetIt != idToUset.end()) {
+        usetIt->second->exp.set.foreach([&](uint32_t member) {
+          if (member < firstSetId) {
+            auto it = unitACLMap.find(member);
+            if (it != unitACLMap.end()) {
+              aclCnfIds.insert(it->second->id);
+            }
+          }
+        });
+      }
+    }
+
+    if (aclCnfIds.empty()) {
+      augmentCache[originalOwner] = originalOwner;
+      continue;
+    }
+
+    // Create AND(existingOwner, aclCnf1, aclCnf2, ...)
+    std::set<uint32_t> andMembers = {ownerUsetId};
+    andMembers.insert(aclCnfIds.begin(), aclCnfIds.end());
+    auto andEntry = std::make_unique<Uset>(SetU32::from(andMembers), And, 1);
+    auto* andSet = usets.add(std::move(andEntry));
+    usets.promote(andSet);
+
+    augmentCache[originalOwner] = andSet->id;
+    ownerUsetId = andSet->id;
+    ++augmented;
+  }
+
+  VLOG(1) << "augmentOwnershipWithACL: augmented " << augmented
+          << " fact intervals out of " << facts.size();
+  t.log("augmentOwnershipWithACL");
+}
+
+} // namespace rts
+} // namespace glean
+} // namespace facebook
diff --git a/glean/rts/ownership/acl.h b/glean/rts/ownership/acl.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include "glean/rts/ownership.h"
+#include "glean/rts/ownership/uset.h"
+
+#include <string>
+#include <vector>
+
+namespace facebook {
+namespace glean {
+namespace rts {
+
+/// Entry mapping a directory prefix to a list of ACL group UsetIds.
+/// All groups at the same prefix level are ORed together.
+struct ACLConfigEntry {
+  std::string prefix;
+  std::vector<UsetId> groupUsetIds;
+};
+
+/// Per-unit ACL assignment: a UnitId and its matching ACL levels.
+/// Each inner vector represents groups at one directory prefix level (ORed).
+/// The outer vector represents levels that are ANDed together.
+struct UnitACLAssignment {
+  UnitId unitId;
+  std::vector<std::vector<UsetId>> levels; // outer=AND, inner=OR
+};
+
+/// Augment computed ownership with ACL constraints.
+///
+/// For each unit that has ACL assignments, this function:
+/// 1. Creates OR(groups) for each directory level
+/// 2. ANDs all level OR-sets to get a CNF UsetId per unit
+/// 3. Walks facts_ and ANDs each fact's existing owner with the unit's CNF
+///
+/// @param ownership  The computed ownership (modified in-place)
+/// @param assignments  Per-unit ACL assignments (UnitId → levels of group IDs)
+void augmentOwnershipWithACL(
+    ComputedOwnership& ownership,
+    const std::vector<UnitACLAssignment>& assignments);
+
+} // namespace rts
+} // namespace glean
+} // namespace facebook
diff --git a/glean/rts/ownership/setu32.h b/glean/rts/ownership/setu32.h
@@ -471,6 +471,9 @@ class SetU32 {
   /// Append a new value which must be >= the largest value in the set
   void append(uint32_t value);
 
+  /// Append all blocks from [start, finish)
+  void append(const_iterator start, const_iterator finish);
+
   static SetU32 from(const std::set<uint32_t>& set) {
     SetU32 setu32;
     for (auto elt : set) {
@@ -480,16 +483,11 @@ class SetU32 {
   }
 
   /**
-   * Merge two sets. If `right` is a subset of `left` or vice versa, returns a
-   * pointer to the superset. Otherwise, stores the result in `result` and
-   * returns a pointer to it.
+   * Iterate over all elements of the set.
    */
-  static const SetU32*
-  merge(SetU32& result, const SetU32& left, const SetU32& right);
-
   template <typename F>
   void foreach(F&& f) const {
-    for (auto& block : *this) {
+    for (const auto& block : *this) {
       auto id = block.hdr.id() << 8;
       switch (block.hdr.type()) {
         case SetU32::Hdr::Sparse: {
@@ -528,14 +526,11 @@ class SetU32 {
   MutableEliasFanoList toEliasFano(uint32_t upper) const;
   static SetU32 fromEliasFano(const EliasFanoList& list);
 
-  static void dump(SetU32&);
+  static const SetU32*
+  merge(SetU32& result, const SetU32& left, const SetU32& right);
 
- private:
-  static bool fitsSparse(uint8_t m, uint8_t n) {
-    return int(m) + n < 32;
-  }
+  static void dump(SetU32&);
 
-  void append(const_iterator start, const_iterator finish);
   void append(uint32_t id, Bits256 w);
 
   void appendMerge(
@@ -545,6 +540,11 @@ class SetU32 {
       const_iterator right_end);
   void appendMerge(Block left, Block right);
 
+ private:
+  static bool fitsSparse(uint8_t m, uint8_t n) {
+    return int(m) + n < 32;
+  }
+
   std::vector<Hdr> hdrs;
   std::vector<Bits256> dense;
   std::vector<uint8_t> sparse;