Skip to content

Commit

Permalink
feat: keep proceedings cache up to date via celery (#8449)
Browse files Browse the repository at this point in the history
* refactor: better control proceedings caching

* refactor: move methods from views to utils

* chore: revert accidental settings change

* fix: eliminate circular import

get_schedule() with name=None should perhaps be an anti-pattern

* feat: task to recompute proceedings daily

* chore: proceedings cache lifetime = 1 day

* fix: ensure finalization is immediately reflected

* chore: update beat comments in docker-compose

* style: undo a couple whitespace changes

* test: update / refactor tests

* test: test task

* refactor: disallow positional arg to task

* refactor: add trivial test of old task
  • Loading branch information
jennifer-richards authored Feb 5, 2025
1 parent 1fbedd7 commit 060320d
Show file tree
Hide file tree
Showing 8 changed files with 542 additions and 368 deletions.
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ services:
# stop_grace_period: 1m
# volumes:
# - .:/workspace
# - app-assets:/assets

volumes:
postgresdb-data:
Expand Down
32 changes: 32 additions & 0 deletions ietf/meeting/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,42 @@
# Celery task definitions
#
from celery import shared_task
from django.utils import timezone

from ietf.utils import log
from .models import Meeting
from .utils import generate_proceedings_content
from .views import generate_agenda_data


@shared_task
def agenda_data_refresh():
generate_agenda_data(force_refresh=True)


@shared_task
def proceedings_content_refresh_task(*, all=False):
"""Refresh meeting proceedings cache
If `all` is `False`, then refreshes the cache for meetings whose numbers modulo
24 equal the current hour number (0-23). Scheduling the task once per hour will
then result in all proceedings being recomputed daily, with no more than two per
hour (now) or a few per hour in the next decade. That keeps the computation time
to under a couple minutes on our current production system.
If `all` is True, refreshes all meetings
"""
now = timezone.now()

for meeting in Meeting.objects.filter(type_id="ietf").order_by("number"):
if meeting.proceedings_format_version == 1:
continue # skip v1 proceedings, they're stored externally
num = meeting.get_number() # convert str -> int
if num is None:
log.log(
f"Not refreshing proceedings for meeting {meeting.number}: "
f"type is 'ietf' but get_number() returned None"
)
elif all or (num % 24 == now.hour):
log.log(f"Refreshing proceedings for meeting {meeting.number}...")
generate_proceedings_content(meeting, force_refresh=True)
51 changes: 51 additions & 0 deletions ietf/meeting/tests_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright The IETF Trust 2025, All Rights Reserved

import datetime
from mock import patch, call
from ietf.utils.test_utils import TestCase
from .factories import MeetingFactory
from .tasks import proceedings_content_refresh_task, agenda_data_refresh


class TaskTests(TestCase):
@patch("ietf.meeting.tasks.generate_agenda_data")
def test_agenda_data_refresh(self, mock_generate):
agenda_data_refresh()
self.assertTrue(mock_generate.called)
self.assertEqual(mock_generate.call_args, call(force_refresh=True))

@patch("ietf.meeting.tasks.generate_proceedings_content")
def test_proceedings_content_refresh_task(self, mock_generate):
# Generate a couple of meetings
meeting120 = MeetingFactory(type_id="ietf", number="120") # 24 * 5
meeting127 = MeetingFactory(type_id="ietf", number="127") # 24 * 5 + 7

# Times to be returned
now_utc = datetime.datetime.now(tz=datetime.timezone.utc)
hour_00_utc = now_utc.replace(hour=0)
hour_01_utc = now_utc.replace(hour=1)
hour_07_utc = now_utc.replace(hour=7)

# hour 00 - should call meeting with number % 24 == 0
with patch("ietf.meeting.tasks.timezone.now", return_value=hour_00_utc):
proceedings_content_refresh_task()
self.assertEqual(mock_generate.call_count, 1)
self.assertEqual(mock_generate.call_args, call(meeting120, force_refresh=True))
mock_generate.reset_mock()

# hour 01 - should call no meetings
with patch("ietf.meeting.tasks.timezone.now", return_value=hour_01_utc):
proceedings_content_refresh_task()
self.assertEqual(mock_generate.call_count, 0)

# hour 07 - should call meeting with number % 24 == 0
with patch("ietf.meeting.tasks.timezone.now", return_value=hour_07_utc):
proceedings_content_refresh_task()
self.assertEqual(mock_generate.call_count, 1)
self.assertEqual(mock_generate.call_args, call(meeting127, force_refresh=True))
mock_generate.reset_mock()

# With all=True, all should be called regardless of time. Reuse hour_01_utc which called none before
with patch("ietf.meeting.tasks.timezone.now", return_value=hour_01_utc):
proceedings_content_refresh_task(all=True)
self.assertEqual(mock_generate.call_count, 2)
91 changes: 76 additions & 15 deletions ietf/meeting/tests_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from django.http import QueryDict, FileResponse
from django.template import Context, Template
from django.utils import timezone
from django.utils.safestring import mark_safe
from django.utils.text import slugify

import debug # pyflakes:ignore
Expand All @@ -46,7 +47,7 @@
from ietf.meeting.helpers import send_interim_minutes_reminder, populate_important_dates, update_important_dates
from ietf.meeting.models import Session, TimeSlot, Meeting, SchedTimeSessAssignment, Schedule, SessionPresentation, SlideSubmission, SchedulingEvent, Room, Constraint, ConstraintName
from ietf.meeting.test_data import make_meeting_test_data, make_interim_meeting, make_interim_test_data
from ietf.meeting.utils import condition_slide_order
from ietf.meeting.utils import condition_slide_order, generate_proceedings_content
from ietf.meeting.utils import add_event_info_to_session_qs, participants_for_meeting
from ietf.meeting.utils import create_recording, delete_recording, get_next_sequence, bluesheet_data
from ietf.meeting.views import session_draft_list, parse_agenda_filter_params, sessions_post_save, agenda_extract_schedule
Expand Down Expand Up @@ -8296,8 +8297,7 @@ def _proceedings_file():
path = Path(settings.BASE_DIR) / 'meeting/test_procmat.pdf'
return path.open('rb')

def _assertMeetingHostsDisplayed(self, response, meeting):
pq = PyQuery(response.content)
def _assertMeetingHostsDisplayed(self, pq: PyQuery, meeting):
host_divs = pq('div.host-logo')
self.assertEqual(len(host_divs), meeting.meetinghosts.count(), 'Should have a logo for every meeting host')
self.assertEqual(
Expand All @@ -8313,12 +8313,11 @@ def _assertMeetingHostsDisplayed(self, response, meeting):
'Correct image and name for each host should appear in the correct order'
)

def _assertProceedingsMaterialsDisplayed(self, response, meeting):
def _assertProceedingsMaterialsDisplayed(self, pq: PyQuery, meeting):
"""Checks that all (and only) active materials are linked with correct href and title"""
expected_materials = [
m for m in meeting.proceedings_materials.order_by('type__order') if m.active()
]
pq = PyQuery(response.content)
links = pq('div.proceedings-material a')
self.assertEqual(len(links), len(expected_materials), 'Should have an entry for each active ProceedingsMaterial')
self.assertEqual(
Expand All @@ -8327,20 +8326,18 @@ def _assertProceedingsMaterialsDisplayed(self, response, meeting):
'Correct title and link for each ProceedingsMaterial should appear in the correct order'
)

def _assertGroupSessions(self, response, meeting):
def _assertGroupSessions(self, pq: PyQuery):
"""Checks that group/sessions are present"""
pq = PyQuery(response.content)
sections = ["plenaries", "gen", "iab", "editorial", "irtf", "training"]
for section in sections:
self.assertEqual(len(pq(f"#{section}")), 1, f"{section} section should exists in proceedings")

def test_proceedings(self):
"""Proceedings should be displayed correctly
Currently only tests that the view responds with a 200 response code and checks the ProceedingsMaterials
at the top of the proceedings. Ought to actually test the display of the individual group/session
materials as well.
Proceedings contents are tested in detail when testing generate_proceedings_content.
"""
# number must be >97 (settings.PROCEEDINGS_VERSION_CHANGES)
meeting = make_meeting_test_data(meeting=MeetingFactory(type_id='ietf', number='100'))
session = Session.objects.filter(meeting=meeting, group__acronym="mars").first()
GroupEventFactory(group=session.group,type='status_update')
Expand All @@ -8365,16 +8362,72 @@ def test_proceedings(self):
self._create_proceedings_materials(meeting)

url = urlreverse("ietf.meeting.views.proceedings", kwargs=dict(num=meeting.number))
r = self.client.get(url)
cached_content = mark_safe("<p>Fake proceedings content</p>")
with patch("ietf.meeting.views.generate_proceedings_content") as mock_gpc:
mock_gpc.return_value = cached_content
r = self.client.get(url)
self.assertEqual(r.status_code, 200)
self.assertIn(cached_content, r.content.decode())
self.assertTemplateUsed(r, "meeting/proceedings_wrapper.html")
self.assertTemplateNotUsed(r, "meeting/proceedings.html")

# These are rendered in proceedings_wrapper.html, so test them here
if len(meeting.city) > 0:
self.assertContains(r, meeting.city)
if len(meeting.venue_name) > 0:
self.assertContains(r, meeting.venue_name)
self._assertMeetingHostsDisplayed(PyQuery(r.content), meeting)

@patch("ietf.meeting.utils.caches")
def test_generate_proceedings_content(self, mock_caches):
# number must be >97 (settings.PROCEEDINGS_VERSION_CHANGES)
meeting = make_meeting_test_data(meeting=MeetingFactory(type_id='ietf', number='100'))

# First, check that by default a value in the cache is used without doing any other computation
mock_default_cache = mock_caches["default"]
mock_default_cache.get.return_value = "a cached value"
result = generate_proceedings_content(meeting)
self.assertEqual(result, "a cached value")
self.assertFalse(mock_default_cache.set.called)
self.assertTrue(mock_default_cache.get.called)
cache_key = mock_default_cache.get.call_args.args[0]
mock_default_cache.get.reset_mock()

# Now set up for actual computation of the proceedings content.
session = Session.objects.filter(meeting=meeting, group__acronym="mars").first()
GroupEventFactory(group=session.group,type='status_update')
SessionPresentationFactory(document__type_id='recording',session=session)
SessionPresentationFactory(document__type_id='recording',session=session,document__title="Audio recording for tests")

# Add various group sessions
groups = []
parent_groups = [
GroupFactory.create(type_id="area", acronym="gen"),
GroupFactory.create(acronym="iab"),
GroupFactory.create(acronym="irtf"),
]
for parent in parent_groups:
groups.append(GroupFactory.create(parent=parent))
for acronym in ["rsab", "edu"]:
groups.append(GroupFactory.create(acronym=acronym))
for group in groups:
SessionFactory(meeting=meeting, group=group)

self.write_materials_files(meeting, session)
self._create_proceedings_materials(meeting)

# Now "empty" the mock cache and see that we compute the expected proceedings content.
mock_default_cache.get.return_value = None
proceedings_content = generate_proceedings_content(meeting)
self.assertTrue(mock_default_cache.get.called)
self.assertEqual(mock_default_cache.get.call_args.args[0], cache_key, "same cache key each time")
self.assertTrue(mock_default_cache.set.called)
self.assertEqual(mock_default_cache.set.call_args, call(cache_key, proceedings_content, timeout=86400))
mock_default_cache.get.reset_mock()
mock_default_cache.set.reset_mock()

# standard items on every proceedings
pq = PyQuery(r.content)
pq = PyQuery(proceedings_content)
self.assertNotEqual(
pq('a[href="{}"]'.format(
urlreverse('ietf.meeting.views.proceedings_overview', kwargs=dict(num=meeting.number)))
Expand Down Expand Up @@ -8405,9 +8458,17 @@ def test_proceedings(self):
)

# configurable contents
self._assertMeetingHostsDisplayed(r, meeting)
self._assertProceedingsMaterialsDisplayed(r, meeting)
self._assertGroupSessions(r, meeting)
self._assertProceedingsMaterialsDisplayed(pq, meeting)
self._assertGroupSessions(pq)

# Finally, repeat the first cache test, but now with force_refresh=True. The cached value
# should be ignored and we should recompute the proceedings as before.
mock_default_cache.get.return_value = "a cached value"
result = generate_proceedings_content(meeting, force_refresh=True)
self.assertEqual(result, proceedings_content) # should have recomputed the same thing
self.assertFalse(mock_default_cache.get.called, "don't bother reading cache when force_refresh is True")
self.assertTrue(mock_default_cache.set.called)
self.assertEqual(mock_default_cache.set.call_args, call(cache_key, proceedings_content, timeout=86400))

def test_named_session(self):
"""Session with a name should appear separately in the proceedings"""
Expand Down
Loading

0 comments on commit 060320d

Please sign in to comment.