-
Notifications
You must be signed in to change notification settings - Fork 85
Add ACL ownership augmentation and AND-slice semantics to C++ runtime #704
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -208,6 +208,13 @@ struct Batch { | |
|
|
||
| // The schema ID, which must match the schema ID of the DB | ||
| 7: optional SchemaId schema_id; | ||
|
|
||
| // ACL configuration for this batch. | ||
| // Maps file/directory paths to lists of ACL group ID strings. | ||
| // Each path maps to one or more group IDs (e.g., {"src/alpha": ["1"], | ||
| // "src/bravo": ["2", "3"]}). | ||
| // Must be provided if ACL is enabled for the database. | ||
| 9: optional map<string, list<string>> (hs.type = "HashMap") acl_config; | ||
| } | ||
|
|
||
| struct Subst { | ||
|
|
@@ -260,6 +267,14 @@ exception DatabaseNotIncomplete { | |
| 1: DatabaseStatus status; | ||
| } | ||
|
|
||
| // Conflict when merging ACL configurations | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm pretty suspicious of this, as we discussed: the ACL config should be fixed for a given revision of the repo. Also I don't think the server has any business enforcing this.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. We can remove this conflict resolution logic. |
||
| // Thrown when a batch provides an ACL mapping that conflicts with existing DB config | ||
| exception ACLConfigConflict { | ||
| 1: string message; | ||
| 2: string conflicting_key; | ||
| 3: string existing_value; | ||
| 4: string new_value; | ||
| } | ||
| exception UnknownSchemaId { | ||
| 1: SchemaId schema_id; | ||
| } | ||
|
|
@@ -780,6 +795,7 @@ struct UserQueryResults { | |
|
|
||
| 9: optional string type; | ||
| // The inferred type of the query | ||
|
|
||
| } | ||
|
|
||
| // struct versions of exception types, needed because the | ||
|
|
@@ -830,6 +846,9 @@ struct UserQueryClientInfo { | |
| // User making the query | ||
| 3: string application; | ||
| // Name of program making the query. | ||
| 4: optional list<string> acl_group_names; | ||
| // ACL group names for query-time filtering. | ||
| // Names are resolved to IDs server-side using the DB's name-to-ID mapping. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this line is redundant, IDs are not a concept the user of this API needs to care about.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is needed since remote Glean clients need a way of passing this information to queries.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry for not being clear, I was referring to just the part of the comment beginning "Names are resolved..", it's redundant detail from the user's point of view
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed |
||
| } | ||
|
|
||
| struct ListDatabases { | ||
|
|
@@ -866,6 +885,13 @@ struct SendJsonBatch { | |
| // passed to finishBatch to check that the write has completed and | ||
| // obtain the substitution. | ||
| 3: bool remember = false; | ||
|
|
||
| // ACL configuration for this batch. | ||
| // Maps file/directory paths to lists of ACL group ID strings. | ||
| // Each path maps to one or more group IDs (e.g., {"src/alpha": ["1"], | ||
| // "src/bravo": ["2", "3"]}). | ||
| // Must be provided if ACL is enabled for the database. | ||
| 4: optional map<string, list<string>> (hs.type = "HashMap") acl_config; | ||
| } | ||
|
|
||
| struct SendJsonBatchResponse { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,187 @@ | ||
| /* | ||
| * Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| * All rights reserved. | ||
| * | ||
| * This source code is licensed under the BSD-style license found in the | ||
| * LICENSE file in the root directory of this source tree. | ||
| */ | ||
|
|
||
| #include "glean/rts/ownership/acl.h" | ||
| #include "glean/rts/timer.h" | ||
|
|
||
| #include <folly/container/F14Map.h> | ||
| #include <set> | ||
|
|
||
| namespace facebook { | ||
| namespace glean { | ||
| namespace rts { | ||
|
|
||
| namespace { | ||
|
|
||
| // Create an OR-set from a list of UsetIds and add it to the Usets container. | ||
| Uset* makeOrSet(Usets& usets, const std::vector<UsetId>& ids) { | ||
| CHECK(!ids.empty()); | ||
| std::set<uint32_t> idSet(ids.begin(), ids.end()); | ||
| auto entry = std::make_unique<Uset>(SetU32::from(idSet), Or, 1); | ||
| return usets.add(std::move(entry)); | ||
| } | ||
|
|
||
| // Create an AND-set from a list of Uset pointers. | ||
| // Each Uset represents one OR-level in the CNF. | ||
| Uset* makeAndSet(Usets& usets, const std::vector<Uset*>& orSets) { | ||
| CHECK(!orSets.empty()); | ||
| if (orSets.size() == 1) { | ||
| return orSets[0]; | ||
| } | ||
| std::set<uint32_t> ids; | ||
| for (auto* orSet : orSets) { | ||
| usets.promote(orSet); | ||
| ids.insert(orSet->id); | ||
| } | ||
| auto entry = std::make_unique<Uset>(SetU32::from(ids), And, 1); | ||
| return usets.add(std::move(entry)); | ||
| } | ||
|
|
||
| } // namespace | ||
|
|
||
| void augmentOwnershipWithACL( | ||
| ComputedOwnership& ownership, | ||
| const std::vector<UnitACLAssignment>& assignments) { | ||
| if (assignments.empty()) { | ||
| return; | ||
| } | ||
|
|
||
| auto t = makeAutoTimer("augmentOwnershipWithACL"); | ||
| auto& usets = ownership.sets_; | ||
| auto& facts = ownership.facts_; | ||
| const auto firstSetId = usets.getFirstId(); | ||
|
|
||
| // Step 1: Build UnitId → ACL CNF Uset mapping | ||
| folly::F14FastMap<UnitId, Uset*> unitACLMap; | ||
| unitACLMap.reserve(assignments.size()); | ||
|
|
||
| for (const auto& assignment : assignments) { | ||
| if (assignment.levels.empty()) { | ||
| continue; | ||
| } | ||
|
|
||
| std::vector<Uset*> orSets; | ||
| orSets.reserve(assignment.levels.size()); | ||
| for (const auto& levelGroups : assignment.levels) { | ||
| if (levelGroups.empty()) { | ||
| continue; | ||
| } | ||
| auto* orSet = makeOrSet(usets, levelGroups); | ||
| orSets.push_back(orSet); | ||
| } | ||
|
|
||
| if (orSets.empty()) { | ||
| continue; | ||
| } | ||
|
|
||
| Uset* cnf = makeAndSet(usets, orSets); | ||
| unitACLMap[assignment.unitId] = cnf; | ||
| } | ||
|
|
||
| if (unitACLMap.empty()) { | ||
| VLOG(1) << "augmentOwnershipWithACL: no unit ACL assignments, skipping"; | ||
| return; | ||
| } | ||
|
|
||
| VLOG(1) << "augmentOwnershipWithACL: " << unitACLMap.size() | ||
| << " units with ACL assignments"; | ||
|
|
||
| // Promote all ACL CNF Usets so they have IDs. | ||
| for (auto& [unitId, cnf] : unitACLMap) { | ||
| usets.promote(cnf); | ||
| } | ||
|
|
||
| // Step 2: Build UsetId → Uset* index for resolving promoted sets. | ||
| // | ||
| // After computeOwnership, ALL fact owners are promoted Usets with | ||
| // IDs >= firstSetId — even single-unit facts get wrapped in an OR-set | ||
| // and promoted. The old code checked (ownerUsetId < firstSetId) which | ||
| // was NEVER true, so no facts were ever augmented. We must resolve | ||
| // promoted sets to their leaf UnitIds to find ACL matches. | ||
| folly::F14FastMap<UsetId, Uset*> idToUset; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you only need this because Isn't there a problem with stacked DBs here? The Building this mapping seems pretty ugly and potentially expensive. You could imagine either storing the mapping in the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the incremental case, we ensure that stacked UsetIds are > any used ID in the base, so they don't conflict. Then, we reverse-walk the stack to lookup USets, so we eventually cover all the mappings from the stack. Specifically here, a UsetId could refer to a Uset from the base DB that isn't in this ComputedOwnership container. However, since the stacked DB only has stacked DB facts in it, we should be OK. I'm thinking over the performance question.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can have a fact with an owner UsetId that refers to a set in the base DB, so you won't find it in the mapping, I think this ends up working out OK with the code as it is but I'm not sure - at the least I would suggest testing it and adding a comment.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. Adding testcases and some logging to expose when a computed ownership UsetId belongs to a base DB's Usets container not the stacked one. |
||
| usets.foreach([&](Uset* entry) { | ||
| if (entry->promoted()) { | ||
| idToUset[entry->id] = entry; | ||
| } | ||
| }); | ||
|
|
||
| // Step 3: Walk facts_ and augment ownership. | ||
| // facts_ is a vector of (Id, UsetId) interval boundaries. | ||
| size_t augmented = 0; | ||
|
|
||
| // Cache: memoize owner UsetId → augmented UsetId to avoid duplicate work. | ||
| folly::F14FastMap<UsetId, UsetId> augmentCache; | ||
|
|
||
| for (auto& [factId, ownerUsetId] : facts) { | ||
| if (ownerUsetId == INVALID_USET) { | ||
| continue; | ||
| } | ||
|
|
||
| auto cacheIt = augmentCache.find(ownerUsetId); | ||
| if (cacheIt != augmentCache.end()) { | ||
| if (cacheIt->second != ownerUsetId) { | ||
| ownerUsetId = cacheIt->second; | ||
| ++augmented; | ||
| } | ||
| continue; | ||
| } | ||
|
|
||
| UsetId originalOwner = ownerUsetId; | ||
|
|
||
| // Collect unique ACL CNF IDs for leaf UnitIds in this owner's set. | ||
| std::set<uint32_t> aclCnfIds; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They're type aliases, but agree |
||
|
|
||
| if (ownerUsetId < firstSetId) { | ||
| // Direct UnitId (rare after computeOwnership but handle it) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would expect this to be quite common because most facts are owned by a single unit, so I'm surprised. Why do you say it's rare?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, it should be an error case. Since we should by now have created an OR set, there shouldn't be any bare UnitIds here. However, you're right that in many cases, the size of the OR set will be length=1
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding a test case, logging and assertion for this too. |
||
| auto it = unitACLMap.find(ownerUsetId); | ||
| if (it != unitACLMap.end()) { | ||
| aclCnfIds.insert(it->second->id); | ||
| } | ||
| } else { | ||
| // Promoted set — resolve and examine leaf members. | ||
| // After computeOwnership, sets are flat OR-sets of UnitIds | ||
| // (no nested set references), so checking immediate members | ||
| // is sufficient. | ||
| auto usetIt = idToUset.find(ownerUsetId); | ||
| if (usetIt != idToUset.end()) { | ||
| usetIt->second->exp.set.foreach([&](uint32_t member) { | ||
| if (member < firstSetId) { | ||
| auto it = unitACLMap.find(member); | ||
| if (it != unitACLMap.end()) { | ||
| aclCnfIds.insert(it->second->id); | ||
| } | ||
| } | ||
| }); | ||
| } | ||
| } | ||
|
|
||
| if (aclCnfIds.empty()) { | ||
| augmentCache[originalOwner] = originalOwner; | ||
| continue; | ||
| } | ||
|
|
||
| // Create AND(existingOwner, aclCnf1, aclCnf2, ...) | ||
| std::set<uint32_t> andMembers = {ownerUsetId}; | ||
| andMembers.insert(aclCnfIds.begin(), aclCnfIds.end()); | ||
| auto andEntry = std::make_unique<Uset>(SetU32::from(andMembers), And, 1); | ||
| auto* andSet = usets.add(std::move(andEntry)); | ||
| usets.promote(andSet); | ||
|
|
||
| augmentCache[originalOwner] = andSet->id; | ||
| ownerUsetId = andSet->id; | ||
| ++augmented; | ||
| } | ||
|
|
||
| VLOG(1) << "augmentOwnershipWithACL: augmented " << augmented | ||
| << " fact intervals out of " << facts.size(); | ||
| t.log("augmentOwnershipWithACL"); | ||
| } | ||
|
|
||
| } // namespace rts | ||
| } // namespace glean | ||
| } // namespace facebook | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| /* | ||
| * Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| * All rights reserved. | ||
| * | ||
| * This source code is licensed under the BSD-style license found in the | ||
| * LICENSE file in the root directory of this source tree. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include "glean/rts/ownership.h" | ||
| #include "glean/rts/ownership/uset.h" | ||
|
|
||
| #include <string> | ||
| #include <vector> | ||
|
|
||
| namespace facebook { | ||
| namespace glean { | ||
| namespace rts { | ||
|
|
||
| /// Entry mapping a directory prefix to a list of ACL group UsetIds. | ||
| /// All groups at the same prefix level are ORed together. | ||
| struct ACLConfigEntry { | ||
| std::string prefix; | ||
| std::vector<UsetId> groupUsetIds; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not construct the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This allows caching of the recursive construction. example: dir:
[a,b] -> UsetID(1) and dir to usetIDs: Not too bad in this example, but as directories get deeper, the efficiency of creation is more significant. |
||
| }; | ||
|
|
||
| /// Per-unit ACL assignment: a UnitId and its matching ACL levels. | ||
| /// Each inner vector represents groups at one directory prefix level (ORed). | ||
| /// The outer vector represents levels that are ANDed together. | ||
| struct UnitACLAssignment { | ||
| UnitId unitId; | ||
| std::vector<std::vector<UsetId>> levels; // outer=AND, inner=OR | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as above, why not construct the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, I think your suggestion makes sense. There's no inheritance needed per-file.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe doing something else is more complex. Here's an attempt: #705 |
||
| }; | ||
|
|
||
| /// Augment computed ownership with ACL constraints. | ||
| /// | ||
| /// For each unit that has ACL assignments, this function: | ||
| /// 1. Creates OR(groups) for each directory level | ||
| /// 2. ANDs all level OR-sets to get a CNF UsetId per unit | ||
| /// 3. Walks facts_ and ANDs each fact's existing owner with the unit's CNF | ||
| /// | ||
| /// @param ownership The computed ownership (modified in-place) | ||
| /// @param assignments Per-unit ACL assignments (UnitId → levels of group IDs) | ||
| void augmentOwnershipWithACL( | ||
| ComputedOwnership& ownership, | ||
| const std::vector<UnitACLAssignment>& assignments); | ||
|
|
||
| } // namespace rts | ||
| } // namespace glean | ||
| } // namespace facebook | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
note that JustKnobs is not open-source
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for pointing out. The plan here is to use #ifdef to have different paths for OSS (no JustKnobs check) and internal (with JustKnobs check)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As a general point, we try to avoid
#ifdefin the codebase as far as possible because it's a maintenance headache. There are a few alternatives, e.g. make an API that has a stub implementation in the OSS build.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will use the facebook/ vs github/ directory pattern with facebook/Glean/Query/ACL.hs having the internal implementation and github/Glean/Query/ACL.hs being a stub