Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: keep proceedings cache up to date via celery #8449

Merged
merged 15 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ services:
# stop_grace_period: 1m
# volumes:
# - .:/workspace
# - app-assets:/assets

volumes:
postgresdb-data:
Expand Down
32 changes: 32 additions & 0 deletions ietf/meeting/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,42 @@
# Celery task definitions
#
from celery import shared_task
from django.utils import timezone

from ietf.utils import log
from .models import Meeting
from .utils import generate_proceedings_content
from .views import generate_agenda_data


@shared_task
def agenda_data_refresh():
generate_agenda_data(force_refresh=True)


@shared_task
def proceedings_content_refresh_task(*, all=False):
"""Refresh meeting proceedings cache

If `all` is `False`, then refreshes the cache for meetings whose numbers modulo
24 equal the current hour number (0-23). Scheduling the task once per hour will
then result in all proceedings being recomputed daily, with no more than two per
hour (now) or a few per hour in the next decade. That keeps the computation time
to under a couple minutes on our current production system.

If `all` is True, refreshes all meetings
"""
now = timezone.now()

for meeting in Meeting.objects.filter(type_id="ietf").order_by("number"):
if meeting.proceedings_format_version == 1:
continue # skip v1 proceedings, they're stored externally
num = meeting.get_number() # convert str -> int
if num is None:
log.log(
f"Not refreshing proceedings for meeting {meeting.number}: "
f"type is 'ietf' but get_number() returned None"
)
elif all or (num % 24 == now.hour):
log.log(f"Refreshing proceedings for meeting {meeting.number}...")
generate_proceedings_content(meeting, force_refresh=True)
51 changes: 51 additions & 0 deletions ietf/meeting/tests_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright The IETF Trust 2025, All Rights Reserved

import datetime
from mock import patch, call
from ietf.utils.test_utils import TestCase
from .factories import MeetingFactory
from .tasks import proceedings_content_refresh_task, agenda_data_refresh


class TaskTests(TestCase):
@patch("ietf.meeting.tasks.generate_agenda_data")
def test_agenda_data_refresh(self, mock_generate):
agenda_data_refresh()
self.assertTrue(mock_generate.called)
self.assertEqual(mock_generate.call_args, call(force_refresh=True))

@patch("ietf.meeting.tasks.generate_proceedings_content")
def test_proceedings_content_refresh_task(self, mock_generate):
# Generate a couple of meetings
meeting120 = MeetingFactory(type_id="ietf", number="120") # 24 * 5
meeting127 = MeetingFactory(type_id="ietf", number="127") # 24 * 5 + 7

# Times to be returned
now_utc = datetime.datetime.now(tz=datetime.timezone.utc)
hour_00_utc = now_utc.replace(hour=0)
hour_01_utc = now_utc.replace(hour=1)
hour_07_utc = now_utc.replace(hour=7)

# hour 00 - should call meeting with number % 24 == 0
with patch("ietf.meeting.tasks.timezone.now", return_value=hour_00_utc):
proceedings_content_refresh_task()
self.assertEqual(mock_generate.call_count, 1)
self.assertEqual(mock_generate.call_args, call(meeting120, force_refresh=True))
mock_generate.reset_mock()

# hour 01 - should call no meetings
with patch("ietf.meeting.tasks.timezone.now", return_value=hour_01_utc):
proceedings_content_refresh_task()
self.assertEqual(mock_generate.call_count, 0)

# hour 07 - should call meeting with number % 24 == 0
with patch("ietf.meeting.tasks.timezone.now", return_value=hour_07_utc):
proceedings_content_refresh_task()
self.assertEqual(mock_generate.call_count, 1)
self.assertEqual(mock_generate.call_args, call(meeting127, force_refresh=True))
mock_generate.reset_mock()

# With all=True, all should be called regardless of time. Reuse hour_01_utc which called none before
with patch("ietf.meeting.tasks.timezone.now", return_value=hour_01_utc):
proceedings_content_refresh_task(all=True)
self.assertEqual(mock_generate.call_count, 2)
91 changes: 76 additions & 15 deletions ietf/meeting/tests_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from django.http import QueryDict, FileResponse
from django.template import Context, Template
from django.utils import timezone
from django.utils.safestring import mark_safe
from django.utils.text import slugify

import debug # pyflakes:ignore
Expand All @@ -46,7 +47,7 @@
from ietf.meeting.helpers import send_interim_minutes_reminder, populate_important_dates, update_important_dates
from ietf.meeting.models import Session, TimeSlot, Meeting, SchedTimeSessAssignment, Schedule, SessionPresentation, SlideSubmission, SchedulingEvent, Room, Constraint, ConstraintName
from ietf.meeting.test_data import make_meeting_test_data, make_interim_meeting, make_interim_test_data
from ietf.meeting.utils import condition_slide_order
from ietf.meeting.utils import condition_slide_order, generate_proceedings_content
from ietf.meeting.utils import add_event_info_to_session_qs, participants_for_meeting
from ietf.meeting.utils import create_recording, delete_recording, get_next_sequence, bluesheet_data
from ietf.meeting.views import session_draft_list, parse_agenda_filter_params, sessions_post_save, agenda_extract_schedule
Expand Down Expand Up @@ -8296,8 +8297,7 @@ def _proceedings_file():
path = Path(settings.BASE_DIR) / 'meeting/test_procmat.pdf'
return path.open('rb')

def _assertMeetingHostsDisplayed(self, response, meeting):
pq = PyQuery(response.content)
def _assertMeetingHostsDisplayed(self, pq: PyQuery, meeting):
host_divs = pq('div.host-logo')
self.assertEqual(len(host_divs), meeting.meetinghosts.count(), 'Should have a logo for every meeting host')
self.assertEqual(
Expand All @@ -8313,12 +8313,11 @@ def _assertMeetingHostsDisplayed(self, response, meeting):
'Correct image and name for each host should appear in the correct order'
)

def _assertProceedingsMaterialsDisplayed(self, response, meeting):
def _assertProceedingsMaterialsDisplayed(self, pq: PyQuery, meeting):
"""Checks that all (and only) active materials are linked with correct href and title"""
expected_materials = [
m for m in meeting.proceedings_materials.order_by('type__order') if m.active()
]
pq = PyQuery(response.content)
links = pq('div.proceedings-material a')
self.assertEqual(len(links), len(expected_materials), 'Should have an entry for each active ProceedingsMaterial')
self.assertEqual(
Expand All @@ -8327,20 +8326,18 @@ def _assertProceedingsMaterialsDisplayed(self, response, meeting):
'Correct title and link for each ProceedingsMaterial should appear in the correct order'
)

def _assertGroupSessions(self, response, meeting):
def _assertGroupSessions(self, pq: PyQuery):
"""Checks that group/sessions are present"""
pq = PyQuery(response.content)
sections = ["plenaries", "gen", "iab", "editorial", "irtf", "training"]
for section in sections:
self.assertEqual(len(pq(f"#{section}")), 1, f"{section} section should exists in proceedings")

def test_proceedings(self):
"""Proceedings should be displayed correctly

Currently only tests that the view responds with a 200 response code and checks the ProceedingsMaterials
at the top of the proceedings. Ought to actually test the display of the individual group/session
materials as well.
Proceedings contents are tested in detail when testing generate_proceedings_content.
"""
# number must be >97 (settings.PROCEEDINGS_VERSION_CHANGES)
meeting = make_meeting_test_data(meeting=MeetingFactory(type_id='ietf', number='100'))
session = Session.objects.filter(meeting=meeting, group__acronym="mars").first()
GroupEventFactory(group=session.group,type='status_update')
Expand All @@ -8365,16 +8362,72 @@ def test_proceedings(self):
self._create_proceedings_materials(meeting)

url = urlreverse("ietf.meeting.views.proceedings", kwargs=dict(num=meeting.number))
r = self.client.get(url)
cached_content = mark_safe("<p>Fake proceedings content</p>")
with patch("ietf.meeting.views.generate_proceedings_content") as mock_gpc:
mock_gpc.return_value = cached_content
r = self.client.get(url)
self.assertEqual(r.status_code, 200)
self.assertIn(cached_content, r.content.decode())
self.assertTemplateUsed(r, "meeting/proceedings_wrapper.html")
self.assertTemplateNotUsed(r, "meeting/proceedings.html")

# These are rendered in proceedings_wrapper.html, so test them here
if len(meeting.city) > 0:
self.assertContains(r, meeting.city)
if len(meeting.venue_name) > 0:
self.assertContains(r, meeting.venue_name)
self._assertMeetingHostsDisplayed(PyQuery(r.content), meeting)

@patch("ietf.meeting.utils.caches")
def test_generate_proceedings_content(self, mock_caches):
# number must be >97 (settings.PROCEEDINGS_VERSION_CHANGES)
meeting = make_meeting_test_data(meeting=MeetingFactory(type_id='ietf', number='100'))

# First, check that by default a value in the cache is used without doing any other computation
mock_default_cache = mock_caches["default"]
mock_default_cache.get.return_value = "a cached value"
result = generate_proceedings_content(meeting)
self.assertEqual(result, "a cached value")
self.assertFalse(mock_default_cache.set.called)
self.assertTrue(mock_default_cache.get.called)
cache_key = mock_default_cache.get.call_args.args[0]
mock_default_cache.get.reset_mock()

# Now set up for actual computation of the proceedings content.
session = Session.objects.filter(meeting=meeting, group__acronym="mars").first()
GroupEventFactory(group=session.group,type='status_update')
SessionPresentationFactory(document__type_id='recording',session=session)
SessionPresentationFactory(document__type_id='recording',session=session,document__title="Audio recording for tests")

# Add various group sessions
groups = []
parent_groups = [
GroupFactory.create(type_id="area", acronym="gen"),
GroupFactory.create(acronym="iab"),
GroupFactory.create(acronym="irtf"),
]
for parent in parent_groups:
groups.append(GroupFactory.create(parent=parent))
for acronym in ["rsab", "edu"]:
groups.append(GroupFactory.create(acronym=acronym))
for group in groups:
SessionFactory(meeting=meeting, group=group)

self.write_materials_files(meeting, session)
self._create_proceedings_materials(meeting)

# Now "empty" the mock cache and see that we compute the expected proceedings content.
mock_default_cache.get.return_value = None
proceedings_content = generate_proceedings_content(meeting)
self.assertTrue(mock_default_cache.get.called)
self.assertEqual(mock_default_cache.get.call_args.args[0], cache_key, "same cache key each time")
self.assertTrue(mock_default_cache.set.called)
self.assertEqual(mock_default_cache.set.call_args, call(cache_key, proceedings_content, timeout=86400))
mock_default_cache.get.reset_mock()
mock_default_cache.set.reset_mock()

# standard items on every proceedings
pq = PyQuery(r.content)
pq = PyQuery(proceedings_content)
self.assertNotEqual(
pq('a[href="{}"]'.format(
urlreverse('ietf.meeting.views.proceedings_overview', kwargs=dict(num=meeting.number)))
Expand Down Expand Up @@ -8405,9 +8458,17 @@ def test_proceedings(self):
)

# configurable contents
self._assertMeetingHostsDisplayed(r, meeting)
self._assertProceedingsMaterialsDisplayed(r, meeting)
self._assertGroupSessions(r, meeting)
self._assertProceedingsMaterialsDisplayed(pq, meeting)
self._assertGroupSessions(pq)

# Finally, repeat the first cache test, but now with force_refresh=True. The cached value
# should be ignored and we should recompute the proceedings as before.
mock_default_cache.get.return_value = "a cached value"
result = generate_proceedings_content(meeting, force_refresh=True)
self.assertEqual(result, proceedings_content) # should have recomputed the same thing
self.assertFalse(mock_default_cache.get.called, "don't bother reading cache when force_refresh is True")
self.assertTrue(mock_default_cache.set.called)
self.assertEqual(mock_default_cache.set.call_args, call(cache_key, proceedings_content, timeout=86400))

def test_named_session(self):
"""Session with a name should appear separately in the proceedings"""
Expand Down
Loading
Loading