{-# Language MultiWayIf #-} {-# Language PatternSynonyms #-} {-# Language ViewPatterns #-} module Fixme.Scan.Git.Local where import Prelude hiding (init) import Fixme.Prelude hiding (indent) import Fixme.Types import Fixme.State import Fixme.Scan as Scan import Fixme.Log import HBS2.Storage import HBS2.Storage.Compact import HBS2.System.Dir import HBS2.Git.Local.CLI import DBPipe.SQLite hiding (field) import Data.Config.Suckless import Data.Text.Fuzzy.Tokenize import Data.ByteString.Char8 qualified as BS import Data.ByteString.Lazy.Char8 qualified as LBS8 import Data.ByteString.Lazy (ByteString) import Data.Either import Data.Fixed import Data.List qualified as List import Data.List.Split (chunksOf) import Data.Maybe import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict qualified as HM import Data.HashSet qualified as HS import Data.HashSet (HashSet) import Data.Text qualified as Text import Data.Text.Encoding (decodeUtf8With) import Data.Text.Encoding.Error (ignore) import Data.Word import Text.InterpolatedString.Perl6 (qc) import Data.Coerce import Data.Generics.Product.Fields (field) import Lens.Micro.Platform import System.Process.Typed import Control.Monad.Trans.Cont import Control.Monad.Trans.Maybe import System.IO qualified as IO import System.IO.Temp (emptySystemTempFile) import System.TimeIt import Data.Map qualified as Map import Streaming.Prelude qualified as S data ScanGitArgs = PrintBlobs | PrintFixme | ScanRunDry | ScanAllCommits deriving stock (Eq,Ord,Show,Data,Generic) pattern ScanGitArgs :: forall {c} . ScanGitArgs -> Syntax c pattern ScanGitArgs w <- ( scanGitArg -> Just w ) scanGitArg :: Syntax c -> Maybe ScanGitArgs scanGitArg = \case SymbolVal "print-blobs" -> Just PrintBlobs SymbolVal "print-fixme" -> Just PrintFixme SymbolVal "dry" -> Just ScanRunDry SymbolVal "all-commits" -> Just ScanAllCommits _ -> Nothing {- HLINT ignore "Functor law" -} listCommits :: FixmePerks m => FixmeM m [(GitHash, HashMap FixmeAttrName FixmeAttrVal)] listCommits = do gd <- fixmeGetGitDirCLIOpt days <- asks fixmeEnvGitScanDays >>= readTVarIO <&> fmap ( \x -> "--since" <+> squotes (pretty x <+> "days ago")) <&> fromMaybe mempty <&> show let cmd = [qc|git {gd} log --all --format="%H '%cn' '%ce' %ct" {days}|] debug $ yellow "listCommits" <+> pretty cmd gitRunCommand cmd <&> fromRight mempty <&> LBS8.lines <&> mapMaybe extract where extract :: ByteString -> Maybe (GitHash, HashMap FixmeAttrName FixmeAttrVal) extract lbs = do let txt = decodeUtf8With ignore (LBS8.toStrict lbs) let r = tokenize @Text spec txt case r of [co, n, e, t] -> do let gh = fromStringMay @GitHash (Text.unpack co) let bag = [ ("commit", co) , ("commit-time", t) , ("committer-name", n) , ("committer-email", e) , ("committer", [qc|{n} <{e}>|]) ] & fmap ( over _1 FixmeAttrName . over _2 FixmeAttrVal) & HM.fromList (,) <$> gh <*> pure bag _ -> Nothing spec = sq <> delims " \t" listRefs :: FixmePerks m => Bool -> FixmeM m [(GitHash, GitRef)] listRefs every = do gd <- fixmeGetGitDirCLIOpt gitRunCommand [qc|git {gd} show-ref --dereference|] <&> fromRight mempty <&> fmap LBS8.words . LBS8.lines <&> mapMaybe (\case [h,b] -> (,) <$> fromStringMay @GitHash (LBS8.unpack h) <*> pure (GitRef (LBS8.toStrict b)) _ -> Nothing ) >>= filterM filt where filt _ | every = pure True filt (h,_) = do done <- withState $ isProcessed $ ViaSerialise h pure (not done) listBlobs :: (FixmePerks m, MonadReader FixmeEnv m) => GitHash -> m [(FilePath,GitHash)] listBlobs co = do gd <- fixmeGetGitDirCLIOpt gitRunCommand [qc|git {gd} ls-tree -r -l -t {pretty co}|] <&> fromRight mempty <&> fmap LBS8.words . LBS8.lines <&> mapMaybe (\case [a,"blob",h,_,fn] -> (LBS8.unpack fn,) <$> fromStringMay @GitHash (LBS8.unpack h) _ -> Nothing) filterBlobs0 :: FixmePerks m => [(Bool,FilePattern)] -> [(FilePath,GitHash)] -> FixmeM m [(FilePath,GitHash)] filterBlobs0 pat xs = do -- pat <- asks fixmeEnvFileMask >>= readTVarIO <&> fmap (True,) let src = [ ((f,h),f) | (f,h) <- xs ] let r = [(h,f) | (_,(f,h),_) <- matchMany pat src] & HM.fromList & HM.toList pure $ [ (b,a) | (a,b) <- r ] filterBlobs :: FixmePerks m => [(FilePath,GitHash)] -> FixmeM m [(FilePath,GitHash)] filterBlobs xs = do pat <- asks fixmeEnvFileMask >>= readTVarIO <&> fmap (True,) filterBlobs0 pat xs scanGitLogLocal :: FixmePerks m => FilePath -> ( CompactStorage HbSync -> FixmeM m () ) -> FixmeM m () scanGitLogLocal refMask play = do warn $ red "scanGitLogLocal" <+> pretty refMask (t,refs) <- timeItT $ listRefs False let hashes = fmap fst refs warn $ yellow "listRefs in" <+> pretty (realToFrac t :: Fixed E6) let pat = [(True, refMask)] -- FIXME: use-cache-to-skip-already-processed-tips logz <- withState do S.toList_ $ for_ hashes $ \h -> do done <- lift $ isProcessed (ViaSerialise h) unless done do blobs <- lift $ lift $ (listBlobs h >>= filterBlobs0 pat) when (List.null blobs) do lift $ insertProcessed (ViaSerialise h) for_ blobs $ \(_,b) -> do S.yield (h,b) warn $ yellow "STEP 3" <+> "for each tree --- find log" warn $ vcat (fmap pretty logz) warn $ yellow "STEP 4" <+> "for each log --- scan log" withState $ transactional do flip runContT pure do for_ logz $ \(commitHash, h) -> callCC \shit -> do warn $ blue "SCAN BLOB" <+> pretty h tmp <- ContT $ bracket (liftIO (emptySystemTempFile "fixme-log")) rm blob <- lift $ lift $ gitCatBlob h liftIO (LBS8.writeFile tmp blob) esto <- lift $ try @_ @CompactStorageOpenError $ compactStorageOpen @HbSync readonly tmp -- skip even problematic commit lift $ insertProcessed (ViaSerialise commitHash) either (const $ warn $ "skip malformed/unknown log" <+> pretty h) (const none) esto sto <- either (const $ shit ()) pure esto lift $ lift $ play sto compactStorageClose sto scanGitLocal :: FixmePerks m => [ScanGitArgs] -> Maybe FilePath -> FixmeM m () scanGitLocal args p = do env <- ask flip runContT pure do (dbFn, _) <- ContT $ withSystemTempFile "fixme-db" . curry tempDb <- newDBPipeEnv dbPipeOptsDef dbFn withDB tempDb do ddl [qc| create table co ( cohash text not null , ts int null , primary key (cohash) ) |] ddl [qc| create table coattr ( cohash text not null , name text not null , value text not null , primary key (cohash,name) ) |] ddl [qc| create table blob ( hash text not null , cohash text not null , path text not null , primary key (hash,cohash,path) ) |] -- update_ [qc|ATTACH DATABASE '{dbpath}' as fixme|] let onlyNewCommits xs | ScanAllCommits `elem` args = pure xs | otherwise = lift $ filterM (newCommit . view _1) xs co <- lift listCommits >>= onlyNewCommits lift do withDB tempDb $ transactional do for_ co $ \(commit, attr) -> do debug $ "commit" <+> pretty commit blobs <- lift $ listBlobs commit >>= withFixmeEnv env . filterBlobs let ts = HM.lookup "commit-time" attr >>= readMay @Word64 . Text.unpack . coerce insert [qc| insert into co (cohash,ts) values (?,?) on conflict (cohash) do nothing |] (commit,ts) for_ (HM.toList attr) $ \(a,b) -> do insert [qc| insert into coattr(cohash,name,value) values(?,?,?) on conflict (cohash,name) do nothing |] (commit,a,b) for_ blobs $ \(fp,h) -> do insert [qc| insert into blob (hash,cohash,path) values (?,?,?) on conflict (hash,cohash,path) do nothing |] (h,commit,fp) blobs <- withDB tempDb do select_ @_ @(GitHash, FilePath) [qc|select distinct hash, path from blob order by path|] when ( PrintBlobs `elem` args ) do for_ blobs $ \(h,fp) -> do notice $ pretty h <+> pretty fp callCC \fucked -> do gitCat <- ContT $ bracket startGitCatFile (hClose . getStdin) let ssin = getStdin gitCat let ssout = getStdout gitCat liftIO $ IO.hSetBuffering ssin LineBuffering for_ blobs $ \(h,fp) -> callCC \next -> do seen <- lift (withState $ selectObjectHash h) <&> isJust when seen do trace $ red "ALREADY SEEN BLOB" <+> pretty h next () liftIO $ IO.hPrint ssin (pretty h) >> IO.hFlush ssin prefix <- liftIO (BS.hGetLine ssout) <&> BS.words case prefix of [bh, "blob", ssize] -> do let mslen = readMay @Int (BS.unpack ssize) len <- ContT $ maybe1 mslen (pure ()) blob <- liftIO $ LBS8.hGet ssout len void $ liftIO $ BS.hGetLine ssout poor <- lift (Scan.scanBlob (Just fp) blob) rich <- withDB tempDb do let q = [qc| WITH CommitAttributes AS ( SELECT co.cohash, co.ts, coattr.name, coattr.value FROM co JOIN coattr ON co.cohash = coattr.cohash ), MinCommitTimes AS ( SELECT blob.hash, MIN(co.ts) as mintime FROM blob JOIN co ON blob.cohash = co.cohash WHERE co.ts IS NOT NULL GROUP BY blob.hash ), RelevantCommits AS ( SELECT blob.hash, blob.cohash, blob.path FROM blob JOIN MinCommitTimes ON blob.hash = MinCommitTimes.hash JOIN co ON blob.cohash = co.cohash AND co.ts = MinCommitTimes.mintime ) SELECT CommitAttributes.name, CommitAttributes.value FROM RelevantCommits JOIN CommitAttributes ON RelevantCommits.cohash = CommitAttributes.cohash WHERE RelevantCommits.hash = ? |] what <- select @(FixmeAttrName,FixmeAttrVal) q (Only h) <&> HM.fromList <&> (<> HM.fromList [ ("blob",fromString $ show (pretty h)) , ("file",fromString fp) ]) for poor $ \f -> do let lno = maybe mempty ( HM.singleton "line" . FixmeAttrVal . Text.pack . show ) (fixmeStart f) let ts = HM.lookup "commit-time" what <&> Text.unpack . coerce >>= readMay <&> FixmeTimestamp pure $ set (field @"fixmeTs") ts $ over (field @"fixmeAttr") (<> (what <> lno)) f let fxpos1 = [ (fixmeTitle fx, [i :: Int]) | (i,fx) <- zip [0..] rich -- , fixmeTitle fx /= mempty ] & Map.fromListWith (flip (<>)) let mt e = do let seed = [ (fst e, i) | i <- snd e ] flip fix (0,[],seed) $ \next (num,acc,rest) -> case rest of [] -> acc (x:xs) -> next (succ num, (x,num) : acc, xs) let fxpos2 = [ mt e | e <- Map.toList fxpos1 ] & mconcat & Map.fromList fixmies <- for (zip [0..] rich) $ \(i,fx) -> do let title = fixmeTitle fx let kb = Map.lookup (title,i) fxpos2 let ka = HM.lookup "file" (fixmeAttr fx) let kk = (,,) <$> ka <*> pure title <*> kb case kk of Nothing -> pure fx Just (a,b,c) -> do let ks = [qc|{show (pretty a)}#{show (pretty b)}:{show c}|] :: Text let ksh = hashObject @HbSync (serialise ks) & pretty & show & Text.pack & FixmeAttrVal let kh = HM.singleton "fixme-key" ksh let kv = HM.singleton "fixme-key-string" (FixmeAttrVal ks) <> kh pure $ over (field @"fixmeAttr") (<> kv) fx when ( PrintFixme `elem` args ) do for_ fixmies $ \fixme -> do notice $ pretty fixme when ( ScanRunDry `elem` args ) $ fucked () debug $ "actually-import-fixmies" <+> pretty h liftIO $ withFixmeEnv env $ withState $ transactional do insertBlob h for_ fixmies insertFixme _ -> fucked () unless ( ScanRunDry `elem` args ) do lift runLogActions liftIO $ withFixmeEnv env $ withState $ transactional do for_ co $ \w -> do insertCommit (view _1 w) gitListStage :: (FixmePerks m) => FixmeM m [Either (FilePath, GitHash) (FilePath, GitHash)] gitListStage = do gd <- fixmeGetGitDirCLIOpt modified <- gitRunCommand [qc|git {gd} status --porcelain|] <&> fromRight mempty <&> fmap LBS8.words . LBS8.lines <&> mapMaybe ( \case ["M", fn] -> Just (LBS8.unpack fn) _ -> Nothing ) new <- S.toList_ $ do for_ modified $ \fn -> void $ runMaybeT do e <- gitRunCommand [qc|git {gd} hash-object {fn}|] >>= toMPlus <&> maybe mempty LBS8.unpack . headMay . LBS8.words <&> fromStringMay @GitHash >>= toMPlus lift (S.yield $ (fn,e)) old <- gitRunCommand [qc|git {gd} ls-files -s|] <&> fromRight mempty <&> fmap LBS8.words . LBS8.lines <&> mapMaybe ( \case [_, h, _, fn] -> (LBS8.unpack fn,) <$> fromStringMay @GitHash (LBS8.unpack h) _ -> Nothing ) new1 <- filterBlobs new <&> fmap Left old1 <- filterBlobs old <&> fmap Right pure (old1 <> new1) getMetaDataFromGitBlame :: FixmePerks m => FilePath -> Fixme -> FixmeM m Fixme getMetaDataFromGitBlame f fx0 = do gd <- fixmeGetGitDirCLIOpt fromMaybe mempty <$> runMaybeT do l0 <- fixmeStart fx0 & toMPlus <&> fromIntegral <&> succ let cmd = [qc|git {gd} blame {f} -L{l0},{l0} -t -l -p|] s0 <- gitRunCommand cmd <&> LBS8.unpack . fromRight mempty s <- parseTop s0 & toMPlus let ko = headMay (words <$> lines s0) >>= headMay >>= (\z -> do if z == "0000000000000000000000000000000000000000" then Nothing else Just z ) >>= fromStringMay @GitHash pieces <- for s $ \case ListVal (SymbolVal "committer" : StringLikeList w) | isJust ko -> do let co = FixmeAttrVal $ fromString $ unwords w pure $ mempty { fixmeAttr = HM.singleton "committer-name" co } ListVal (SymbolVal "committer-mail" : StringLikeList w) | isJust ko -> do let co = FixmeAttrVal $ fromString $ unwords w pure $ mempty { fixmeAttr = HM.singleton "committer-email" co } ListVal [SymbolVal "committer-time", TimeStampLike t] | isJust ko -> do let ct = FixmeAttrVal $ fromString $ show t pure $ mempty { fixmeAttr = HM.singleton "commit-time" ct, fixmeTs = Just t } _ -> pure mempty let coco = mempty { fixmeAttr = maybe mempty (HM.singleton "commit" . fromString . show . pretty) ko } pure $ mconcat pieces <> coco gitExtractFileMetaData :: FixmePerks m => [FilePath] -> FixmeM m (HashMap FilePath Fixme) gitExtractFileMetaData fns = do -- FIXME: magic-number let chunks = chunksOf 64 fns gd <- fixmeGetGitDirCLIOpt commitz <- S.toList_ $ for_ chunks $ \chu -> do let filez = unwords chu let cmd = [qc|git {gd} log --diff-filter=AMR --pretty=format:'entry %H %at "%an" "%ae"' -- {filez}|] ss <- gitRunCommand cmd <&> fromRight mempty <&> fmap LBS8.unpack . LBS8.lines for_ ss $ \s -> do let syn = parseTop s & fromRight mempty case syn of [ListVal [SymbolVal "entry", SymbolVal (Id e), LitIntVal t, StringLike n, StringLike m]] -> do -- liftIO $ print $ pretty e <+> pretty syn S.yield (fromString @GitHash (Text.unpack e), (t,n,m) ) _ -> pure () let co = HM.fromList commitz & HM.toList rich0 <- S.toList_ $ do for_ co $ \(c, (t,n,m)) -> do let pat = [ (True, f) | f <- fns ] blobz <- lift $ listBlobs c >>= filterBlobs0 pat for_ blobz $ \(f,h) -> do let attr = HM.fromList [ ("commit", FixmeAttrVal (fromString $ show $ pretty c)) , ("commit-time", FixmeAttrVal (fromString $ show $ pretty t)) , ("committer-name", FixmeAttrVal (fromString n)) , ("committer-email", FixmeAttrVal (fromString m)) , ("committer", FixmeAttrVal (fromString $ [qc|{n} <{m}>|])) , ("file", FixmeAttrVal (fromString f)) , ("blob", FixmeAttrVal (fromString $ show $ pretty $ h)) ] let what = mempty { fixmeAttr = attr } S.yield (f,t,what) let rich = List.sortBy (\a b -> compare (view _2 a) (view _2 b)) rich0 pure $ HM.fromList [ (view _1 w, view _3 w) | w <- rich ] -- TODO: move-outta-here runLogActions :: FixmePerks m => FixmeM m () runLogActions = do debug $ yellow "runLogActions" actions <- asks fixmeEnvReadLogActions >>= readTVarIO for_ actions $ \(ReadLogAction a) -> do liftIO (a (List noContext [])) updateIndexes data GitBlobInfo = GitBlobInfo FilePath GitHash deriving stock (Eq,Ord,Data,Generic,Show) instance Hashable GitBlobInfo data GitIndexEntry = GitCommit Word64 (HashSet GitBlobInfo) deriving stock (Eq,Ord,Data,Generic,Show) instance Serialise GitBlobInfo instance Serialise GitIndexEntry listCommitForIndex :: forall m . (FixmePerks m, MonadReader FixmeEnv m) => ( (GitHash, GitIndexEntry) -> m ()) -> m () listCommitForIndex fn = do gd <- fixmeGetGitDirCLIOpt let cmd = [qc|git {gd} log --all --format="%H %ct"|] debug $ yellow "listCommits" <+> pretty cmd s0 <- gitRunCommand cmd <&> fromRight mempty <&> fmap (words . LBS8.unpack) . LBS8.lines <&> mapMaybe ( \case [a,b] -> (,) <$> fromStringMay @GitHash a <*> makeIndexEntry0 a b _ -> Nothing ) for_ s0 $ \(h, GitCommit w _) -> do blobz <- listBlobs h <&> HS.fromList . fmap ( uncurry GitBlobInfo ) fn (h, GitCommit w blobz) where makeIndexEntry0 _ t = GitCommit <$> readMay t <*> pure mempty gitCatBlob :: (FixmePerks m, MonadReader FixmeEnv m) => GitHash -> m ByteString gitCatBlob h = do gd <- fixmeGetGitDirCLIOpt (_,s,_) <- readProcess $ shell [qc|git {gd} cat-file blob {pretty h}|] pure s startGitCatFile :: (FixmePerks m, MonadReader FixmeEnv m) => m (Process Handle Handle ()) startGitCatFile = do gd <- fixmeGetGitDirCLIOpt let cmd = [qc|git {gd} cat-file --batch|] debug $ pretty cmd let config = setStdin createPipe $ setStdout createPipe $ setStderr closed $ shell cmd startProcess config