From ee2614601138915ae69874619549780d9a578e99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophile=20Choutri?= Date: Fri, 6 Oct 2023 17:29:41 +0200 Subject: [PATCH] [FLORA-414] Store archive hashes --- src/core/Flora/Import/Package/Bulk.hs | 54 ++++++++++++++++----------- src/core/Flora/Import/Types.hs | 8 ++++ 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/src/core/Flora/Import/Package/Bulk.hs b/src/core/Flora/Import/Package/Bulk.hs index 7c3483e1..57eb0ed1 100644 --- a/src/core/Flora/Import/Package/Bulk.hs +++ b/src/core/Flora/Import/Package/Bulk.hs @@ -11,6 +11,7 @@ import Codec.Archive.Tar (Entries) import Codec.Archive.Tar qualified as Tar import Codec.Archive.Tar.Entry qualified as Tar import Codec.Archive.Tar.Index qualified as Tar +import Data.Poolboy (WorkQueue) import Codec.Compression.GZip qualified as GZip import Control.Monad (when, (>=>)) import Data.ByteString qualified as BS @@ -22,13 +23,14 @@ import Data.Set (Set) import Data.Set qualified as Set import Data.Text (Text) import Data.Text qualified as Text -import Data.Time (UTCTime) import Data.Time.Clock.POSIX (posixSecondsToUTCTime) import Effectful import Effectful.Log qualified as Log +import Effectful.Log (Log) import Effectful.PostgreSQL.Transact.Effect (DB, getPool, runDB) import Effectful.Reader.Static (Reader, ask, runReader) -import Effectful.Time (runTime) +import Effectful.Time qualified as Time +import Effectful.Time (Time) import Log (Logger, defaultLogLevel) import Streamly.Data.Fold qualified as SFold import Streamly.Prelude qualified as S @@ -45,6 +47,7 @@ import Flora.Import.Package , persistImportOutput , withWorkerDbPool ) +import Flora.Import.Types import Flora.Model.Package import Flora.Model.Package.Update qualified as Update import Flora.Model.PackageIndex.Query qualified as Query @@ -105,7 +108,9 @@ importFromIndex appLogger user (repositoryName, repositoryURL) index directImpor in Tar.entryContent entry & \case Tar.NormalFile bs _ | ".cabal" `isSuffixOf` entryPath && entryTime > time -> - (entryPath, entryTime, BL.toStrict bs) `S.cons` acc + (CabalFile entryPath entryTime (BL.toStrict bs)) `S.cons` acc + | ".json" `isSuffixOf` entryPath && entryTime > time -> + (JSONFile entryPath (BL.toStrict bs)) `S.cons` acc _ -> acc -- | Finds all cabal files in the specified directory, and inserts them into the database after extracting the relevant data @@ -121,7 +126,8 @@ importAllFilesInDirectory appLogger user (repositoryName, repositoryURL) dir dir liftIO $ System.createDirectoryIfMissing True dir packages <- buildPackageListFromDirectory dir liftIO . putStrLn $ "🔎 Searching cabal files in " <> dir - importFromStream appLogger user (repositoryName, repositoryURL, packages) directImport $ findAllCabalFilesInDirectory dir + importFromStream appLogger user (repositoryName, repositoryURL, packages) directImport $ + findAllCabalFilesInDirectory dir importFromStream :: (Reader PoolConfig :> es, DB :> es, IOE :> es) @@ -129,7 +135,7 @@ importFromStream -> UserId -> (Text, Text, Set PackageName) -> Bool - -> S.AsyncT IO (String, UTCTime, BS.ByteString) + -> S.AsyncT IO ImportSubject -> Eff es () importFromStream appLogger user (repositoryName, repositoryURL, repositoryPackages) directImport stream = do pool <- getPool @@ -160,26 +166,30 @@ importFromStream appLogger user (repositoryName, repositoryURL, repositoryPackag when (currentCount `mod` 400 == 0) $ displayStats currentCount return currentCount - processFile wq pool poolConfig = - runEff - . runReader poolConfig - . runDB pool - . runTime - . Log.runLog "flora-cli" appLogger defaultLogLevel - . ( \(path, timestamp, content) -> - loadContent path content - >>= ( extractPackageDataFromCabal user (repositoryName, repositoryPackages) timestamp - >=> \importedPackage -> - if directImport - then persistImportOutput wq importedPackage - else enqueueImportJob importedPackage - ) - ) + processFile wq pool poolConfig importSubject = + (processingAction wq importSubject) + & Log.runLog "flora-cli" appLogger defaultLogLevel + & Time.runTime + & runDB pool + & runReader poolConfig + & runEff + processingAction :: (DB :> es, Log :> es, Time :> es, IOE :> es) => WorkQueue -> ImportSubject -> Eff es () + processingAction wq subject = + case subject of + (CabalFile path timestamp content) -> + loadContent path content + >>= ( extractPackageDataFromCabal user (repositoryName, repositoryPackages) timestamp + >=> \importedPackage -> + if directImport + then persistImportOutput wq importedPackage + else enqueueImportJob importedPackage + ) + (JSONFile path content) -> undefined displayStats :: MonadIO m => Int -> m () displayStats currentCount = liftIO . putStrLn $ "✅ Processed " <> show currentCount <> " new cabal files" -findAllCabalFilesInDirectory :: FilePath -> S.AsyncT IO (String, UTCTime, BS.ByteString) +findAllCabalFilesInDirectory :: FilePath -> S.AsyncT IO ImportSubject findAllCabalFilesInDirectory workdir = S.concatMapM traversePath $ S.fromList [workdir] where traversePath p = do @@ -191,7 +201,7 @@ findAllCabalFilesInDirectory workdir = S.concatMapM traversePath $ S.fromList [w False | ".cabal" `isSuffixOf` p -> do content <- BS.readFile p timestamp <- System.getModificationTime p - return $ S.fromPure (p, timestamp, content) + return $ S.fromPure (CabalFile p timestamp content) _ -> return S.nil buildPackageListFromArchive :: Entries e -> Either e (Set PackageName) diff --git a/src/core/Flora/Import/Types.hs b/src/core/Flora/Import/Types.hs index 5316020a..b324deec 100644 --- a/src/core/Flora/Import/Types.hs +++ b/src/core/Flora/Import/Types.hs @@ -1,7 +1,9 @@ module Flora.Import.Types where import Control.Exception +import Data.ByteString (ByteString) import Data.Text (Text) +import Data.Time (UTCTime) import Flora.Model.Package data ImportError @@ -12,3 +14,9 @@ data ImportError | CabalFileCouldNotBeParsed FilePath deriving stock (Eq, Show) deriving anyclass (Exception) + +data ImportSubject + = CabalFile FilePath UTCTime ByteString + | JSONFile FilePath ByteString + deriving stock (Eq, Show) + deriving anyclass (Exception)