Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions glean.cabal.in
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,7 @@ library db
Glean.Query.Expand
Glean.Query.Flatten
Glean.Query.Flatten.Types
Glean.Query.Lint
Glean.Query.Opt
Glean.Query.Prune
Glean.Query.Reorder
Expand Down
5 changes: 4 additions & 1 deletion glean/db/Glean/Database/Config.hs
Original file line number Diff line number Diff line change
Expand Up @@ -204,15 +204,17 @@ data Config = Config
data DebugFlags = DebugFlags
{ tcDebug :: !Bool
, queryDebug :: !Bool
, queryLint :: !Bool
}

instance Default DebugFlags where
def = DebugFlags { tcDebug = False, queryDebug = False }
def = DebugFlags { tcDebug = False, queryDebug = False, queryLint = False }

instance Semigroup DebugFlags where
a <> b = DebugFlags
{ tcDebug = tcDebug a || tcDebug b
, queryDebug = queryDebug a || queryDebug b
, queryLint = queryLint a || queryLint b
}

instance Monoid DebugFlags where
Expand Down Expand Up @@ -561,6 +563,7 @@ options = do
debugParser = do
tcDebug <- switch (long "debug-tc")
queryDebug <- switch (long "debug-query")
queryLint <- switch (long "debug-query-lint")
return DebugFlags{..}

serverConfigThriftSource = option (eitherReader ThriftSource.parse)
Expand Down
1 change: 1 addition & 0 deletions glean/db/Glean/Database/Env.hs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ getDebugEnv = do
where
add "tc" = return def { tcDebug = True }
add "query" = return def { queryDebug = True }
add "lint" = return def { queryLint = True }
add other = do
logWarning $ "Unkonwn GLEAN_DEBUG class: " <> other
return def
Expand Down
290 changes: 290 additions & 0 deletions glean/db/Glean/Query/Lint.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
{-
Copyright (c) Meta Platforms, Inc. and affiliates.
All rights reserved.

This source code is licensed under the BSD-style license found in the
LICENSE file in the root directory of this source tree.
-}

module Glean.Query.Lint (
lintQuery
) where

import Control.Monad
import Control.Monad.Except
import Control.Monad.State
import Data.List (foldl')
import Data.IntSet (IntSet)
import qualified Data.IntSet as IntSet
import Compat.Prettyprinter

import Glean.Angle.Types
(Type_(..), latestAngleVersion, tempPredicateId)
import Glean.Database.Schema.Types
(DbSchema, PredicateDetails(..), lookupPid, lookupPredicateId)
import Glean.Display
import Glean.Query.Codegen.Types
import Glean.RTS.Term as RTS
import Glean.RTS.Types as RTS
import Glean.Schema.Util (tupleSchema)

-- | The lint monad: tracks the set of bound variable IDs in state,
-- and can throw a pretty-printed error.
type L a = StateT IntSet (Except (Doc ())) a

runL :: L a -> Either (Doc ()) a
runL m = runExcept (evalStateT m IntSet.empty)

lintError :: Doc () -> L a
lintError = throwError

-- | Record a variable as bound.
bindVar :: Var -> L ()
bindVar v = modify (IntSet.insert (varId v))

-- | Check that a variable is currently bound.
requireBound :: Var -> L ()
requireBound v = do
bound <- get
unless (varId v `IntSet.member` bound) $
lintError $ "unbound variable:" <+> displayDefault v

-- | Run an action in a nested scope. The bound set is saved and
-- restored afterwards, so bindings made inside do not escape.
scoped :: L a -> L a
scoped m = do
saved <- get
a <- m
put saved
return a

{-
Check the query for internal consistency. lintQuery will check
the following properties of the query:

- type-correctness, e.g.
- in A = B, the types of A and B are equivalent
- in FactGenerator and DerivedFactGenerator, the types of the key
and value pattern match those of the predicate in the schema
- arguments of primitives have the correct type
- bind-before-use: for all variables, MatchBind occurs before MatchVar

-}
lintQuery :: DbSchema -> CodegenQuery -> Either (Doc ()) ()
lintQuery schema QueryWithInfo{..} = runL $ do
let CgQuery head body = qiQuery
lintStatements schema body
lintPat head
case qiGenerator of
Nothing -> checkEqType "query head" head qiReturnType
Just gen -> do
lintGenerator gen
lintGeneratorTypes schema head gen

-- ---------------------------------------------------------------------------
-- Bind-before-use and type checking for statement lists

lintStatements :: DbSchema -> [CgStatement] -> L ()
lintStatements schema = mapM_ (lintStatement schema)

lintStatement :: DbSchema -> CgStatement -> L ()
lintStatement schema = \case
CgStatement pat gen -> do
-- The generator is evaluated first; its patterns can bind variables
lintGenerator gen
-- Then the LHS pattern matches the generator result, and can use
-- variables bound by the generator as well as previously bound ones
lintPat pat
-- Type check: pattern type should match generator result type
lintStmtTypes schema pat gen

CgAllStatement var expr stmts -> do
-- The inner statements are evaluated in a nested scope
scoped $ do
lintStatements schema stmts
lintPat expr
-- The variable bound by 'all' is available to subsequent statements
bindVar var

CgNegation stmts ->
-- Negation introduces its own scope; bindings do not escape
scoped $ lintStatements schema stmts

CgDisjunction stmtss -> do
-- Each branch starts with the same bound set. Variables bound
-- in all branches are available afterwards.
bounds <- forM stmtss $ \stmts -> scoped $ do
lintStatements schema stmts
get
case bounds of
[] -> return ()
(b:bs) -> put (foldl' IntSet.intersection b bs)

CgConditional{..} -> do
outer <- get
lintStatements schema cond
-- then branch sees cond bindings
lintStatements schema then_
thenBound <- get
-- else branch only sees pre-condition bindings
put outer
lintStatements schema else_
elseBound <- get
-- Variables bound in both branches are available after
put (IntSet.intersection thenBound elseBound)

-- ---------------------------------------------------------------------------
-- Check uses and record bindings in a single depth-first left-to-right
-- traversal, so that a MatchBind makes the variable immediately
-- available for a later MatchVar in the same pattern.

lintPat :: Pat -> L ()
lintPat = mapM_ lintMatch

lintMatch :: Match () Var -> L ()
lintMatch = \case
MatchBind v -> bindVar v
MatchVar v -> requireBound v
MatchAnd a b -> do
lintPat a
lintPat b
MatchPrefix _ rest ->
lintPat rest
MatchArrayPrefix _ pre whole -> do
mapM_ lintPat pre
lintPat whole
_ -> return ()

-- | Lint expression-position subterms of a generator, then bind
-- pattern-position variables.
lintGenerator :: Generator -> L ()
lintGenerator = \case
FactGenerator _ kpat vpat _ -> do
lintPat kpat
lintPat vpat
TermGenerator expr ->
lintPat expr
DerivedFactGenerator _ key value -> do
lintPat key
lintPat value
ArrayElementGenerator _ arr ->
lintPat arr
SetElementGenerator _ set ->
lintPat set
PrimCall _ args _ ->
mapM_ lintPat args

-- | Type-check the result generator (qiGenerator). The head expression
-- is the fact ID that the generator will look up.
lintGeneratorTypes :: DbSchema -> Pat -> Generator -> L ()
lintGeneratorTypes schema head gen = case gen of
FactGenerator pidRef kpat vpat _ -> do
mdetails <- lookupPredicate schema pidRef
forM_ mdetails $ \details -> do
checkEqType
("result generator key of" <+> displayDefault pidRef)
kpat (predicateKeyType details)
checkEqType
("result generator value of" <+> displayDefault pidRef)
vpat (predicateValueType details)
checkEqType "result generator fact ID" head (PredicateTy () pidRef)
_ -> return ()

-- ---------------------------------------------------------------------------
-- Type checking

-- | Check the type consistency of a statement: pattern type vs
-- generator result type, and predicate key/value types for fact
-- generators.
lintStmtTypes :: DbSchema -> Pat -> Generator -> L ()
lintStmtTypes schema pat gen = case gen of
FactGenerator pidRef kpat vpat _ -> do
mdetails <- lookupPredicate schema pidRef
forM_ mdetails $ \details -> do
checkEqType
("key of" <+> displayDefault pidRef) kpat (predicateKeyType details)
checkEqType
("value of" <+> displayDefault pidRef) vpat (predicateValueType details)
-- The LHS pattern matches the fact ID, which has the predicate type
checkEqType "fact generator result" pat (PredicateTy () pidRef)

DerivedFactGenerator pidRef key value -> do
mdetails <- lookupPredicate schema pidRef
forM_ mdetails $ \details -> do
checkEqType
("key of" <+> displayDefault pidRef) key (predicateKeyType details)
checkEqType
("value of" <+> displayDefault pidRef) value (predicateValueType details)
checkEqType "derived fact generator result" pat (PredicateTy () pidRef)

TermGenerator expr ->
case (patType pat, patType expr) of
(Just pty, Just ety) -> typesShouldMatch "term generator" pty ety
_ -> return ()

ArrayElementGenerator eltTy _arr ->
checkEqType "array element generator" pat eltTy

SetElementGenerator eltTy _set ->
checkEqType "set element generator" pat eltTy

PrimCall _op _args retTy ->
checkEqType "primcall result" pat retTy

lookupPredicate :: DbSchema -> PidRef -> L (Maybe PredicateDetails)
lookupPredicate schema (PidRef pid predId)
-- The temporary predicate used for derived fact generators is not
-- stored in the schema, so skip it.
| predId == tempPredicateId = return Nothing
| otherwise =
case lookupPid pid schema of
Nothing -> case lookupPredicateId predId schema of
Nothing ->
lintError $ "unknown predicate:" <+> displayDefault predId
Just details -> return (Just details)
Just details -> return (Just details)

-- | Check that the type of a pattern matches an expected type.
-- When the pattern's type cannot be determined, the check is skipped.
checkEqType :: Doc () -> Pat -> Type -> L ()
checkEqType ctx pat expected =
case patType pat of
Nothing -> return ()
Just actual -> typesShouldMatch ctx actual expected

typesShouldMatch :: Doc () -> Type -> Type -> L ()
typesShouldMatch ctx actual expected
| eqType latestAngleVersion actual expected = return ()
| otherwise = throwError $ vcat
[ "type mismatch in" <+> ctx <> ":"
, " expected:" <+> displayDefault expected
, " actual:" <+> displayDefault actual
]

-- ---------------------------------------------------------------------------
-- Inferring the type of a pattern

-- | Attempt to determine the type of a pattern. Returns Nothing when
-- the type cannot be determined from the pattern structure alone
-- (e.g. an empty array or an Alt without surrounding context).
patType :: Pat -> Maybe Type
patType (Byte _) = Just ByteTy
patType (Nat _) = Just NatTy
patType (RTS.String _) = Just StringTy
patType (ByteArray _) = Just (ArrayTy ByteTy)
patType (Array []) = Nothing
patType (Array (t:_)) = ArrayTy <$> patType t
patType (Tuple ts) = tupleSchema <$> traverse patType ts
patType (Alt _ _) = Nothing
patType (Ref m) = matchType m

matchType :: Match () Var -> Maybe Type
matchType (MatchWild ty) = Just ty
matchType (MatchNever ty) = Just ty
matchType (MatchBind v) = Just (varType v)
matchType (MatchVar v) = Just (varType v)
matchType (MatchFid _) = Nothing
matchType (MatchAnd a _) = patType a
matchType (MatchPrefix _ _) = Just StringTy
matchType (MatchArrayPrefix ty _ _) = Just (ArrayTy ty)
matchType (MatchExt _) = Nothing
7 changes: 7 additions & 0 deletions glean/db/Glean/Query/UserQuery.hs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ import Glean.Query.Flatten
import Glean.Query.Opt
import Glean.Query.Reorder
import Glean.Query.Incremental (makeIncremental)
import Glean.Query.Lint (lintQuery)
import Glean.RTS as RTS
import Glean.RTS.Bytecode.Disassemble
import qualified Glean.RTS.Bytecode.Gen.Version as Bytecode
Expand Down Expand Up @@ -987,6 +988,12 @@ compileAngleQuery rec ver dbSchema mode source stored debug = do
reordered <- checkBadQuery id $ runExcept $ reorder dbSchema optimised
ifDebug $ "reordered query: " <> show (displayDefault (qiQuery reordered))

when (queryLint debug) $
case lintQuery dbSchema reordered of
Left err -> throwIO $ Thrift.BadQuery $
Text.pack $ show $ "query lint:" <+> err
Right () -> return ()

final <- case mode of
NoExtraSteps -> return reordered
IncrementalDerivation getStats -> do
Expand Down
1 change: 1 addition & 0 deletions glean/hs/Glean/RTS/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ eqType version a b = case (a,b) of
(NatTy, NatTy) -> True
(StringTy, StringTy) -> True
(ArrayTy a, ArrayTy b) -> eqType version a b
(SetTy a, SetTy b) -> eqType version a b
(RecordTy as, RecordTy bs) ->
let isTuple = all (Text.isInfixOf "tuplefield" . fieldDefName)
-- previous to version 7 records were always compared structurally
Expand Down
9 changes: 7 additions & 2 deletions glean/test/lib/Glean/Database/Test.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ module Glean.Database.Test
, setLMDBNoUnpack
, enableTcDebug
, enableQueryDebug
, enableQueryLint
, enableRocksDBCache
, withTestEnv
, kickOffTestDB
Expand Down Expand Up @@ -129,6 +130,10 @@ enableQueryDebug :: Setting
enableQueryDebug cfg = cfg
{ cfgDebug = (cfgDebug cfg) { queryDebug = True } }

enableQueryLint :: Setting
enableQueryLint cfg = cfg
{ cfgDebug = (cfgDebug cfg) { queryLint = True } }

enableRocksDBCache :: Setting
enableRocksDBCache cfg = cfg
{ cfgServerConfig = (cfgServerConfig cfg) <&> \scfg -> scfg
Expand All @@ -146,12 +151,12 @@ withTestEnv settings action =
\(cfgAPI :: NullConfigProvider) -> do
let
dbConfig = foldl' (\acc f -> f acc)
def
(enableQueryLint def
{ cfgDataStore = tmpDataStore
, cfgSchemaLocation = Just schemaLocationFiles
, cfgServerConfig = ThriftSource.value def
{ ServerConfig.config_db_rocksdb_cache_mb = 0 }
}
})
settings

withDatabases evb dbConfig cfgAPI action
Expand Down
Loading
Loading