Skip to content

Commit

Permalink
[FLORA-414] Store archive hashes
Browse files Browse the repository at this point in the history
  • Loading branch information
tchoutri committed Oct 7, 2023
1 parent bb49142 commit ee26146
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 22 deletions.
54 changes: 32 additions & 22 deletions src/core/Flora/Import/Package/Bulk.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import Codec.Archive.Tar (Entries)
import Codec.Archive.Tar qualified as Tar
import Codec.Archive.Tar.Entry qualified as Tar
import Codec.Archive.Tar.Index qualified as Tar
import Data.Poolboy (WorkQueue)
import Codec.Compression.GZip qualified as GZip
import Control.Monad (when, (>=>))
import Data.ByteString qualified as BS
Expand All @@ -22,13 +23,14 @@ import Data.Set (Set)
import Data.Set qualified as Set
import Data.Text (Text)
import Data.Text qualified as Text
import Data.Time (UTCTime)
import Data.Time.Clock.POSIX (posixSecondsToUTCTime)
import Effectful
import Effectful.Log qualified as Log
import Effectful.Log (Log)
import Effectful.PostgreSQL.Transact.Effect (DB, getPool, runDB)
import Effectful.Reader.Static (Reader, ask, runReader)
import Effectful.Time (runTime)
import Effectful.Time qualified as Time
import Effectful.Time (Time)
import Log (Logger, defaultLogLevel)
import Streamly.Data.Fold qualified as SFold
import Streamly.Prelude qualified as S
Expand All @@ -45,6 +47,7 @@ import Flora.Import.Package
, persistImportOutput
, withWorkerDbPool
)
import Flora.Import.Types
import Flora.Model.Package
import Flora.Model.Package.Update qualified as Update
import Flora.Model.PackageIndex.Query qualified as Query
Expand Down Expand Up @@ -105,7 +108,9 @@ importFromIndex appLogger user (repositoryName, repositoryURL) index directImpor
in Tar.entryContent entry & \case
Tar.NormalFile bs _
| ".cabal" `isSuffixOf` entryPath && entryTime > time ->
(entryPath, entryTime, BL.toStrict bs) `S.cons` acc
(CabalFile entryPath entryTime (BL.toStrict bs)) `S.cons` acc
| ".json" `isSuffixOf` entryPath && entryTime > time ->
(JSONFile entryPath (BL.toStrict bs)) `S.cons` acc
_ -> acc

-- | Finds all cabal files in the specified directory, and inserts them into the database after extracting the relevant data
Expand All @@ -121,15 +126,16 @@ importAllFilesInDirectory appLogger user (repositoryName, repositoryURL) dir dir
liftIO $ System.createDirectoryIfMissing True dir
packages <- buildPackageListFromDirectory dir
liftIO . putStrLn $ "🔎 Searching cabal files in " <> dir
importFromStream appLogger user (repositoryName, repositoryURL, packages) directImport $ findAllCabalFilesInDirectory dir
importFromStream appLogger user (repositoryName, repositoryURL, packages) directImport $
findAllCabalFilesInDirectory dir

importFromStream
:: (Reader PoolConfig :> es, DB :> es, IOE :> es)
=> Logger
-> UserId
-> (Text, Text, Set PackageName)
-> Bool
-> S.AsyncT IO (String, UTCTime, BS.ByteString)
-> S.AsyncT IO ImportSubject
-> Eff es ()
importFromStream appLogger user (repositoryName, repositoryURL, repositoryPackages) directImport stream = do
pool <- getPool
Expand Down Expand Up @@ -160,26 +166,30 @@ importFromStream appLogger user (repositoryName, repositoryURL, repositoryPackag
when (currentCount `mod` 400 == 0) $
displayStats currentCount
return currentCount
processFile wq pool poolConfig =
runEff
. runReader poolConfig
. runDB pool
. runTime
. Log.runLog "flora-cli" appLogger defaultLogLevel
. ( \(path, timestamp, content) ->
loadContent path content
>>= ( extractPackageDataFromCabal user (repositoryName, repositoryPackages) timestamp
>=> \importedPackage ->
if directImport
then persistImportOutput wq importedPackage
else enqueueImportJob importedPackage
)
)
processFile wq pool poolConfig importSubject =
(processingAction wq importSubject)
& Log.runLog "flora-cli" appLogger defaultLogLevel
& Time.runTime
& runDB pool
& runReader poolConfig
& runEff
processingAction :: (DB :> es, Log :> es, Time :> es, IOE :> es) => WorkQueue -> ImportSubject -> Eff es ()
processingAction wq subject =
case subject of
(CabalFile path timestamp content) ->
loadContent path content
>>= ( extractPackageDataFromCabal user (repositoryName, repositoryPackages) timestamp
>=> \importedPackage ->
if directImport
then persistImportOutput wq importedPackage
else enqueueImportJob importedPackage
)
(JSONFile path content) -> undefined
displayStats :: MonadIO m => Int -> m ()
displayStats currentCount =
liftIO . putStrLn $ "✅ Processed " <> show currentCount <> " new cabal files"

findAllCabalFilesInDirectory :: FilePath -> S.AsyncT IO (String, UTCTime, BS.ByteString)
findAllCabalFilesInDirectory :: FilePath -> S.AsyncT IO ImportSubject
findAllCabalFilesInDirectory workdir = S.concatMapM traversePath $ S.fromList [workdir]
where
traversePath p = do
Expand All @@ -191,7 +201,7 @@ findAllCabalFilesInDirectory workdir = S.concatMapM traversePath $ S.fromList [w
False | ".cabal" `isSuffixOf` p -> do
content <- BS.readFile p
timestamp <- System.getModificationTime p
return $ S.fromPure (p, timestamp, content)
return $ S.fromPure (CabalFile p timestamp content)
_ -> return S.nil

buildPackageListFromArchive :: Entries e -> Either e (Set PackageName)
Expand Down
8 changes: 8 additions & 0 deletions src/core/Flora/Import/Types.hs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
module Flora.Import.Types where

import Control.Exception
import Data.ByteString (ByteString)
import Data.Text (Text)
import Data.Time (UTCTime)
import Flora.Model.Package

data ImportError
Expand All @@ -12,3 +14,9 @@ data ImportError
| CabalFileCouldNotBeParsed FilePath
deriving stock (Eq, Show)
deriving anyclass (Exception)

data ImportSubject
= CabalFile FilePath UTCTime ByteString
| JSONFile FilePath ByteString
deriving stock (Eq, Show)
deriving anyclass (Exception)

0 comments on commit ee26146

Please sign in to comment.