diff --git a/glean/config/server/server_config.thrift b/glean/config/server/server_config.thrift index 44fcda2cf..f50f6d909 100644 --- a/glean/config/server/server_config.thrift +++ b/glean/config/server/server_config.thrift @@ -178,6 +178,13 @@ union SchemaLocation { 5: string indexconfig; } +// ACL filtering policy for query-time enforcement +struct ACLPolicy { + // List of repository names where ACL filtering is enabled. + // Empty list = disabled for all repos. + 1: list enabled_repos = []; +} + // Configeration for Glean Servers struct Config { 1: DatabaseRetentionPolicy retention; @@ -338,6 +345,12 @@ struct Config { // Default storage backend for newly created databases. Can be overriden // by command-line options. See also db_create_version. 41: optional string db_create_storage; + + // ACL filtering configuration. When enabled, query results are filtered + // based on user's group membership. Requires both JustKnobs + // (code_indexing/glean/check_acls) to be enabled AND the repo to be + // in the enabled_repos list. + 42: optional ACLPolicy acl_policy; } // The following were automatically generated and may benefit from renaming. diff --git a/glean/if/glean.thrift b/glean/if/glean.thrift index c60e0bb90..4a009e119 100644 --- a/glean/if/glean.thrift +++ b/glean/if/glean.thrift @@ -208,6 +208,13 @@ struct Batch { // The schema ID, which must match the schema ID of the DB 7: optional SchemaId schema_id; + + // ACL configuration for this batch. + // Maps file/directory paths to lists of ACL group ID strings. + // Each path maps to one or more group IDs (e.g., {"src/alpha": ["1"], + // "src/bravo": ["2", "3"]}). + // Must be provided if ACL is enabled for the database. + 9: optional map> (hs.type = "HashMap") acl_config; } struct Subst { @@ -260,6 +267,14 @@ exception DatabaseNotIncomplete { 1: DatabaseStatus status; } +// Conflict when merging ACL configurations +// Thrown when a batch provides an ACL mapping that conflicts with existing DB config +exception ACLConfigConflict { + 1: string message; + 2: string conflicting_key; + 3: string existing_value; + 4: string new_value; +} exception UnknownSchemaId { 1: SchemaId schema_id; } @@ -780,6 +795,7 @@ struct UserQueryResults { 9: optional string type; // The inferred type of the query + } // struct versions of exception types, needed because the @@ -830,6 +846,9 @@ struct UserQueryClientInfo { // User making the query 3: string application; // Name of program making the query. + 4: optional list acl_group_names; + // ACL group names for query-time filtering. + // Names are resolved to IDs server-side using the DB's name-to-ID mapping. } struct ListDatabases { @@ -866,6 +885,13 @@ struct SendJsonBatch { // passed to finishBatch to check that the write has completed and // obtain the substitution. 3: bool remember = false; + + // ACL configuration for this batch. + // Maps file/directory paths to lists of ACL group ID strings. + // Each path maps to one or more group IDs (e.g., {"src/alpha": ["1"], + // "src/bravo": ["2", "3"]}). + // Must be provided if ACL is enabled for the database. + 4: optional map> (hs.type = "HashMap") acl_config; } struct SendJsonBatchResponse { diff --git a/glean/rts/ownership/acl.cpp b/glean/rts/ownership/acl.cpp new file mode 100644 index 000000000..2b4638fe2 --- /dev/null +++ b/glean/rts/ownership/acl.cpp @@ -0,0 +1,187 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "glean/rts/ownership/acl.h" +#include "glean/rts/timer.h" + +#include +#include + +namespace facebook { +namespace glean { +namespace rts { + +namespace { + +// Create an OR-set from a list of UsetIds and add it to the Usets container. +Uset* makeOrSet(Usets& usets, const std::vector& ids) { + CHECK(!ids.empty()); + std::set idSet(ids.begin(), ids.end()); + auto entry = std::make_unique(SetU32::from(idSet), Or, 1); + return usets.add(std::move(entry)); +} + +// Create an AND-set from a list of Uset pointers. +// Each Uset represents one OR-level in the CNF. +Uset* makeAndSet(Usets& usets, const std::vector& orSets) { + CHECK(!orSets.empty()); + if (orSets.size() == 1) { + return orSets[0]; + } + std::set ids; + for (auto* orSet : orSets) { + usets.promote(orSet); + ids.insert(orSet->id); + } + auto entry = std::make_unique(SetU32::from(ids), And, 1); + return usets.add(std::move(entry)); +} + +} // namespace + +void augmentOwnershipWithACL( + ComputedOwnership& ownership, + const std::vector& assignments) { + if (assignments.empty()) { + return; + } + + auto t = makeAutoTimer("augmentOwnershipWithACL"); + auto& usets = ownership.sets_; + auto& facts = ownership.facts_; + const auto firstSetId = usets.getFirstId(); + + // Step 1: Build UnitId → ACL CNF Uset mapping + folly::F14FastMap unitACLMap; + unitACLMap.reserve(assignments.size()); + + for (const auto& assignment : assignments) { + if (assignment.levels.empty()) { + continue; + } + + std::vector orSets; + orSets.reserve(assignment.levels.size()); + for (const auto& levelGroups : assignment.levels) { + if (levelGroups.empty()) { + continue; + } + auto* orSet = makeOrSet(usets, levelGroups); + orSets.push_back(orSet); + } + + if (orSets.empty()) { + continue; + } + + Uset* cnf = makeAndSet(usets, orSets); + unitACLMap[assignment.unitId] = cnf; + } + + if (unitACLMap.empty()) { + VLOG(1) << "augmentOwnershipWithACL: no unit ACL assignments, skipping"; + return; + } + + VLOG(1) << "augmentOwnershipWithACL: " << unitACLMap.size() + << " units with ACL assignments"; + + // Promote all ACL CNF Usets so they have IDs. + for (auto& [unitId, cnf] : unitACLMap) { + usets.promote(cnf); + } + + // Step 2: Build UsetId → Uset* index for resolving promoted sets. + // + // After computeOwnership, ALL fact owners are promoted Usets with + // IDs >= firstSetId — even single-unit facts get wrapped in an OR-set + // and promoted. The old code checked (ownerUsetId < firstSetId) which + // was NEVER true, so no facts were ever augmented. We must resolve + // promoted sets to their leaf UnitIds to find ACL matches. + folly::F14FastMap idToUset; + usets.foreach([&](Uset* entry) { + if (entry->promoted()) { + idToUset[entry->id] = entry; + } + }); + + // Step 3: Walk facts_ and augment ownership. + // facts_ is a vector of (Id, UsetId) interval boundaries. + size_t augmented = 0; + + // Cache: memoize owner UsetId → augmented UsetId to avoid duplicate work. + folly::F14FastMap augmentCache; + + for (auto& [factId, ownerUsetId] : facts) { + if (ownerUsetId == INVALID_USET) { + continue; + } + + auto cacheIt = augmentCache.find(ownerUsetId); + if (cacheIt != augmentCache.end()) { + if (cacheIt->second != ownerUsetId) { + ownerUsetId = cacheIt->second; + ++augmented; + } + continue; + } + + UsetId originalOwner = ownerUsetId; + + // Collect unique ACL CNF IDs for leaf UnitIds in this owner's set. + std::set aclCnfIds; + + if (ownerUsetId < firstSetId) { + // Direct UnitId (rare after computeOwnership but handle it) + auto it = unitACLMap.find(ownerUsetId); + if (it != unitACLMap.end()) { + aclCnfIds.insert(it->second->id); + } + } else { + // Promoted set — resolve and examine leaf members. + // After computeOwnership, sets are flat OR-sets of UnitIds + // (no nested set references), so checking immediate members + // is sufficient. + auto usetIt = idToUset.find(ownerUsetId); + if (usetIt != idToUset.end()) { + usetIt->second->exp.set.foreach([&](uint32_t member) { + if (member < firstSetId) { + auto it = unitACLMap.find(member); + if (it != unitACLMap.end()) { + aclCnfIds.insert(it->second->id); + } + } + }); + } + } + + if (aclCnfIds.empty()) { + augmentCache[originalOwner] = originalOwner; + continue; + } + + // Create AND(existingOwner, aclCnf1, aclCnf2, ...) + std::set andMembers = {ownerUsetId}; + andMembers.insert(aclCnfIds.begin(), aclCnfIds.end()); + auto andEntry = std::make_unique(SetU32::from(andMembers), And, 1); + auto* andSet = usets.add(std::move(andEntry)); + usets.promote(andSet); + + augmentCache[originalOwner] = andSet->id; + ownerUsetId = andSet->id; + ++augmented; + } + + VLOG(1) << "augmentOwnershipWithACL: augmented " << augmented + << " fact intervals out of " << facts.size(); + t.log("augmentOwnershipWithACL"); +} + +} // namespace rts +} // namespace glean +} // namespace facebook diff --git a/glean/rts/ownership/acl.h b/glean/rts/ownership/acl.h new file mode 100644 index 000000000..866338c3e --- /dev/null +++ b/glean/rts/ownership/acl.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "glean/rts/ownership.h" +#include "glean/rts/ownership/uset.h" + +#include +#include + +namespace facebook { +namespace glean { +namespace rts { + +/// Entry mapping a directory prefix to a list of ACL group UsetIds. +/// All groups at the same prefix level are ORed together. +struct ACLConfigEntry { + std::string prefix; + std::vector groupUsetIds; +}; + +/// Per-unit ACL assignment: a UnitId and its matching ACL levels. +/// Each inner vector represents groups at one directory prefix level (ORed). +/// The outer vector represents levels that are ANDed together. +struct UnitACLAssignment { + UnitId unitId; + std::vector> levels; // outer=AND, inner=OR +}; + +/// Augment computed ownership with ACL constraints. +/// +/// For each unit that has ACL assignments, this function: +/// 1. Creates OR(groups) for each directory level +/// 2. ANDs all level OR-sets to get a CNF UsetId per unit +/// 3. Walks facts_ and ANDs each fact's existing owner with the unit's CNF +/// +/// @param ownership The computed ownership (modified in-place) +/// @param assignments Per-unit ACL assignments (UnitId → levels of group IDs) +void augmentOwnershipWithACL( + ComputedOwnership& ownership, + const std::vector& assignments); + +} // namespace rts +} // namespace glean +} // namespace facebook diff --git a/glean/rts/ownership/setu32.h b/glean/rts/ownership/setu32.h index b1c0680ca..fb8d895b4 100644 --- a/glean/rts/ownership/setu32.h +++ b/glean/rts/ownership/setu32.h @@ -471,6 +471,9 @@ class SetU32 { /// Append a new value which must be >= the largest value in the set void append(uint32_t value); + /// Append all blocks from [start, finish) + void append(const_iterator start, const_iterator finish); + static SetU32 from(const std::set& set) { SetU32 setu32; for (auto elt : set) { @@ -480,16 +483,11 @@ class SetU32 { } /** - * Merge two sets. If `right` is a subset of `left` or vice versa, returns a - * pointer to the superset. Otherwise, stores the result in `result` and - * returns a pointer to it. + * Iterate over all elements of the set. */ - static const SetU32* - merge(SetU32& result, const SetU32& left, const SetU32& right); - template void foreach(F&& f) const { - for (auto& block : *this) { + for (const auto& block : *this) { auto id = block.hdr.id() << 8; switch (block.hdr.type()) { case SetU32::Hdr::Sparse: { @@ -528,14 +526,11 @@ class SetU32 { MutableEliasFanoList toEliasFano(uint32_t upper) const; static SetU32 fromEliasFano(const EliasFanoList& list); - static void dump(SetU32&); + static const SetU32* + merge(SetU32& result, const SetU32& left, const SetU32& right); - private: - static bool fitsSparse(uint8_t m, uint8_t n) { - return int(m) + n < 32; - } + static void dump(SetU32&); - void append(const_iterator start, const_iterator finish); void append(uint32_t id, Bits256 w); void appendMerge( @@ -545,6 +540,11 @@ class SetU32 { const_iterator right_end); void appendMerge(Block left, Block right); + private: + static bool fitsSparse(uint8_t m, uint8_t n) { + return int(m) + n < 32; + } + std::vector hdrs; std::vector dense; std::vector sparse; diff --git a/glean/rts/ownership/slice.h b/glean/rts/ownership/slice.h index e677b5b2f..08a20ab0f 100644 --- a/glean/rts/ownership/slice.h +++ b/glean/rts/ownership/slice.h @@ -80,13 +80,21 @@ struct Slices { if (usetid == INVALID_USET) { return false; } + // AND semantics: when multiple slices cover the same UsetId (e.g., + // an ownership slice and an ACL slice for the same layer), ALL + // covering slices must agree the UsetId is visible. + // For non-overlapping slices (existing behavior), this is equivalent + // to the old first-match semantics. + bool found = false; for (auto slice : slices_) { if (slice->inRange(usetid)) { - auto visible = slice->visible(usetid); - return visible; + found = true; + if (!slice->visible(usetid)) { + return false; + } } } - return false; + return found; } UsetId first() const { diff --git a/glean/rts/serialize.h b/glean/rts/serialize.h index 6e1911269..63862f02b 100644 --- a/glean/rts/serialize.h +++ b/glean/rts/serialize.h @@ -210,9 +210,11 @@ using Nat = int64_t; using Binary = folly::ByteRange; using String = std::string; using List = std::vector; // TODO: generalise +using StringList = std::vector; using Map = std::map; // TODO: generalise +using StringMap = std::map; -using Field = std::variant; +using Field = std::variant; using Object = std::vector>; enum Type : uint32_t { @@ -237,6 +239,10 @@ template <> Type typeOf>() { return ListTy; } +template <> +Type typeOf>() { + return ListTy; +} inline void put(binary::Output& out, Nat x) { out.packed(folly::encodeZigZag(x)); @@ -298,9 +304,15 @@ inline void put(binary::Output& out, const Object& obj) { } else if (std::holds_alternative(val)) { field(BinaryTy, num); put(out, std::get(val)); + } else if (std::holds_alternative(val)) { + field(StringTy, num); + put(out, std::get(val)); } else if (std::holds_alternative(val)) { field(MapTy, num); put(out, std::get(val)); + } else if (std::holds_alternative(val)) { + field(MapTy, num); + put(out, std::get(val)); } } out.fixed(uint8_t(0)); // object terminator