Skip to content

Commit

Permalink
Automatically schedule readme fetching job after index import (#195)
Browse files Browse the repository at this point in the history
* Automatically schedule readme fetching job after index import

* Schedule README import right after index import
  • Loading branch information
tchoutri authored Sep 10, 2022
1 parent 24250a1 commit 5071115
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 26 deletions.
6 changes: 5 additions & 1 deletion migrations/20211106123053_create_releases.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
create type readme_status as enum ('imported', 'inexistent', 'not-imported');

-- A release belongs to a package, and contains multiple components.
create table if not exists releases (
release_id uuid primary key,
Expand All @@ -8,9 +10,11 @@ create table if not exists releases (
uploaded_at timestamptz,
created_at timestamptz not null,
updated_at timestamptz not null,
readme text
readme text,
readme_status readme_status not null
);

create index on releases(package_id);
create index on releases(uploaded_at);
create index on releases(readme_status);
create unique index on releases(package_id, version);
3 changes: 2 additions & 1 deletion src/Flora/Import/Package.hs
Original file line number Diff line number Diff line change
Expand Up @@ -250,13 +250,14 @@ extractPackageDataFromCabal userId genericDesc = do
Release
{ releaseId
, packageId
, readme = Nothing
, version = packageVersion
, archiveChecksum = mempty
, metadata = metadata
, uploadedAt = Nothing
, createdAt = timestamp
, updatedAt = timestamp
, readme = Nothing
, readmeStatus = NotImported
}

let lib = extractLibrary package release Nothing Nothing <$> allLibraries packageDesc
Expand Down
2 changes: 1 addition & 1 deletion src/Flora/Model/Release/Query.hs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ getPackageReleasesWithoutReadme =
from releases as r
join packages as p
on p.package_id = r.package_id
where r.readme is null
where r.readme_status is 'not-imported'
|]

getPackageReleasesWithoutUploadTimestamp :: [DB, IOE] :>> es => Eff es (Vector (ReleaseId, Version, PackageName))
Expand Down
36 changes: 33 additions & 3 deletions src/Flora/Model/Release/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,24 @@ module Flora.Model.Release.Types
, TextHtml (..)
, Release (..)
, ReleaseMetadata (..)
, ReadmeStatus (..)
)
where

import Data.Aeson
import Data.Aeson.Orphans ()
import Data.Text (Text)
import Data.ByteString (ByteString)
import Data.Text (Text, unpack)
import Data.Text.Display
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Data.Time (UTCTime)
import Data.Typeable (Typeable)
import Data.UUID (UUID)
import Database.PostgreSQL.Entity.Types (Entity, GenericEntity, TableName)
import Database.PostgreSQL.Simple (FromRow, ToRow)
import Database.PostgreSQL.Simple.FromField (FromField (..))
import Database.PostgreSQL.Simple.FromField (FromField (..), ResultError (..), returnError)
import Database.PostgreSQL.Simple.Newtypes (Aeson (..))
import Database.PostgreSQL.Simple.ToField (ToField (..))
import Database.PostgreSQL.Simple.ToField (Action (..), ToField (..))
import Distribution.SPDX.License ()
import Distribution.SPDX.License qualified as SPDX
import Distribution.Types.Version
Expand Down Expand Up @@ -68,6 +71,8 @@ data Release = Release
, updatedAt :: UTCTime
-- ^ Last update timestamp for this release
, readme :: Maybe TextHtml
-- ^ Content of the release's README
, readmeStatus :: ReadmeStatus
}
deriving stock (Eq, Show, Generic)
deriving anyclass (FromRow, ToRow)
Expand All @@ -78,6 +83,31 @@ data Release = Release
instance Ord Release where
compare x y = compare (x.version) (y.version)

data ReadmeStatus
= Imported
| Inexistent
| NotImported
deriving stock (Eq, Ord, Show, Enum, Bounded, Generic)

parseReadmeStatus :: ByteString -> Maybe ReadmeStatus
parseReadmeStatus "imported" = pure Imported
parseReadmeStatus "inexistent" = pure Inexistent
parseReadmeStatus "not-imported" = pure NotImported
parseReadmeStatus _ = Nothing

instance Display ReadmeStatus where
displayBuilder Imported = "imported"
displayBuilder Inexistent = "inexistent"
displayBuilder NotImported = "not-imported"

instance FromField ReadmeStatus where
fromField f Nothing = returnError UnexpectedNull f ""
fromField _ (Just bs) | Just status <- parseReadmeStatus bs = pure status
fromField f (Just bs) = returnError ConversionFailed f $ unpack $ "Conversion error: Expected component to be one of " <> display @[ReadmeStatus] [minBound .. maxBound] <> ", but instead got " <> decodeUtf8 bs

instance ToField ReadmeStatus where
toField = Escape . encodeUtf8 . display

data ReleaseMetadata = ReleaseMetadata
{ license :: SPDX.License
, sourceRepos :: [Text]
Expand Down
12 changes: 7 additions & 5 deletions src/Flora/Model/Release/Update.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import Effectful
import Effectful.PostgreSQL.Transact.Effect

import Data.Time (UTCTime)
import Flora.Model.Release.Types (Release, ReleaseId, TextHtml (..))
import Flora.Model.Release.Types (ReadmeStatus (..), Release, ReleaseId, TextHtml (..))

insertRelease :: ([DB, IOE] :>> es) => Release -> Eff es ()
insertRelease = dbtToEff . insert @Release
Expand All @@ -24,14 +24,16 @@ upsertRelease release = dbtToEff $ upsert @Release release [[field| updated_at |
refreshLatestVersions :: ([DB, IOE] :>> es) => Eff es ()
refreshLatestVersions = dbtToEff $ void $ execute Update [sql| REFRESH MATERIALIZED VIEW CONCURRENTLY "latest_versions" |] ()

updateReadme :: ([DB, IOE] :>> es) => ReleaseId -> Maybe TextHtml -> Eff es ()
updateReadme releaseId readmeBody =
updateReadme :: ([DB, IOE] :>> es) => ReleaseId -> Maybe TextHtml -> ReadmeStatus -> Eff es ()
updateReadme releaseId readmeBody status =
dbtToEff $
void $
updateFieldsBy @Release
[[field| readme |]]
[ [field| readme |]
, [field| readme_status |]
]
([field| release_id |], releaseId)
(Only readmeBody)
(readmeBody, status)

updateUploadTime :: ([DB, IOE] :>> es) => ReleaseId -> UTCTime -> Eff es ()
updateUploadTime releaseId timestamp =
Expand Down
28 changes: 16 additions & 12 deletions src/Flora/OddJobs.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,38 +19,39 @@ module Flora.OddJobs
where

import Commonmark qualified
import Control.Concurrent (forkIO)
import Control.Exception
import Control.Monad
import Control.Monad.IO.Class
import Data.Aeson (Result (..), fromJSON, toJSON)
import Data.Pool
import Data.Text
import Data.Text.Display
import Data.Text.Lazy qualified as TL
import Data.Text.Lazy.Encoding qualified as TL
import Data.Time qualified as Time
import Database.PostgreSQL.Entity.DBT
import Database.PostgreSQL.Simple (Only (..))
import Database.PostgreSQL.Simple qualified as PG
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Distribution.Types.Version
import Effectful.Log (localDomainEff', logMessageEff')
import Effectful.PostgreSQL.Transact.Effect
import Log
import Lucid qualified
import Network.HTTP.Types (notFound404, statusCode)
import OddJobs.Job (Job (..), createJob, scheduleJob)
import Servant.Client (ClientError (..))
import Servant.Client.Core (ResponseF (..))
import System.Process.Typed qualified as System

import Control.Monad
import Control.Monad.IO.Class
import Data.Text.Lazy.Encoding qualified as TL
import Data.Time qualified as Time
import Database.PostgreSQL.Entity.DBT
import Database.PostgreSQL.Simple (Only (..))
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Effectful.PostgreSQL.Transact.Effect
import Flora.Model.Package
import Flora.Model.Release.Query qualified as Query
import Flora.Model.Release.Types
import Flora.Model.Release.Update (updateReadme)
import Flora.Model.Release.Update qualified as Update
import Flora.OddJobs.Types
import Flora.ThirdParties.Hackage.API (VersionedPackage (..))
import Flora.ThirdParties.Hackage.Client qualified as Hackage
import System.Process.Typed qualified as System

scheduleReadmeJob :: Pool PG.Connection -> ReleaseId -> PackageName -> Version -> IO Job
scheduleReadmeJob pool rid package version =
Expand Down Expand Up @@ -114,7 +115,7 @@ makeReadme pay@MkReadmePayload{..} = localDomain "fetch-readme" $ do
Left e@(FailureResponse _ response) -> do
-- If the README simply doesn't exist, we skip it by marking it as successful.
if response.responseStatusCode == notFound404
then updateReadme mpReleaseId Nothing
then Update.updateReadme mpReleaseId Nothing Inexistent
else throw e
Left e -> throw e
Right bodyText -> do
Expand All @@ -131,7 +132,7 @@ makeReadme pay@MkReadmePayload{..} = localDomain "fetch-readme" $ do
let readmeBody :: Lucid.Html ()
readmeBody = Lucid.toHtmlRaw @Text $ TL.toStrict htmlTxt

Update.updateReadme mpReleaseId (Just $ MkTextHtml readmeBody)
Update.updateReadme mpReleaseId (Just $ MkTextHtml readmeBody) Imported

fetchUploadTime :: FetchUploadTimePayload -> JobsRunner ()
fetchUploadTime payload@FetchUploadTimePayload{packageName, packageVersion, releaseId} = localDomain "fetch-upload-time" $ do
Expand Down Expand Up @@ -159,7 +160,10 @@ fetchNewIndex = localDomain "index-import" $ do
System.runProcess_ "cd 01-index && tar -xf 01-index.tar"
System.runProcess_ "make import-from-hackage"
logInfo_ "New index processed"
releases <- Query.getPackageReleasesWithoutReadme
pool <- getPool
liftIO $ forkIO $ forM_ releases $ \(releaseId, version, packagename) -> do
scheduleReadmeJob pool releaseId packagename version
liftIO $ void $ scheduleIndexImportJob pool

runner :: Job -> JobsRunner ()
Expand Down
6 changes: 3 additions & 3 deletions src/FloraWeb/Server/Pages/Admin.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
module FloraWeb.Server.Pages.Admin where

import Control.Concurrent (forkIO)
import Control.Concurrent.Async qualified as Async
import Control.Monad
import Control.Monad.IO.Class
import Data.Proxy (Proxy (..))
import Database.PostgreSQL.Entity.DBT
Expand All @@ -11,9 +14,6 @@ import OddJobs.Types qualified as OddJobs
import Optics.Core
import Servant (HasServer (..), hoistServer)

import Control.Concurrent (forkIO)
import Control.Concurrent.Async qualified as Async
import Control.Monad
import Flora.Environment (FloraEnv (..))
import Flora.Model.Admin.Report
import Flora.Model.Package.Query qualified as Query
Expand Down

0 comments on commit 5071115

Please sign in to comment.