Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: keep proceedings cache up to date via celery #8449

Merged
merged 15 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ services:
# stop_grace_period: 1m
# volumes:
# - .:/workspace
# - app-assets:/assets

volumes:
postgresdb-data:
Expand Down
32 changes: 32 additions & 0 deletions ietf/meeting/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,42 @@
# Celery task definitions
#
from celery import shared_task
from django.utils import timezone

from ietf.utils import log
from .models import Meeting
from .utils import generate_proceedings_content
from .views import generate_agenda_data


@shared_task
def agenda_data_refresh():
generate_agenda_data(force_refresh=True)


@shared_task
def proceedings_content_refresh_task(all=False):
"""Refresh meeting proceedings cache

If `all` is `False`, then refreshes the cache for meetings whose numbers modulo
24 equal the current hour number (0-23). Scheduling the task once per hour will
then result in all proceedings being recomputed daily, with no more than two per
hour (now) or a few per hour in the next decade. That keeps the computation time
to under a couple minutes on our current production system.

If `all` is True, refreshes all meetings
"""
now = timezone.now()

for meeting in Meeting.objects.filter(type_id="ietf").order_by("number"):
if meeting.proceedings_format_version == 1:
continue # skip v1 proceedings, they're stored externally
num = meeting.get_number() # convert str -> int
if num is None:
log.log(
f"Not refreshing proceedings for meeting {meeting.number}: "
f"type is 'ietf' but get_number() returned None"
)
elif all or (num % 24 == now.hour):
log.log(f"Refreshing proceedings for meeting {meeting.number}...")
generate_proceedings_content(meeting, force_refresh=True)
171 changes: 170 additions & 1 deletion ietf/meeting/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import datetime
import itertools
import os
from hashlib import sha384

import pytz
import subprocess
import tempfile
Expand All @@ -12,7 +14,8 @@

from django.conf import settings
from django.contrib import messages
from django.db.models import OuterRef, Subquery, TextField, Q, Value
from django.core.cache import caches
from django.db.models import OuterRef, Subquery, TextField, Q, Value, Max
from django.db.models.functions import Coalesce
from django.template.loader import render_to_string
from django.utils import timezone
Expand Down Expand Up @@ -997,3 +1000,169 @@ def participants_for_meeting(meeting):
sessions = meeting.session_set.filter(Q(type='plenary') | Q(group__type__in=['wg', 'rg']))
attended = Attended.objects.filter(session__in=sessions).values_list('person', flat=True).distinct()
return (checked_in, attended)


def generate_proceedings_content(meeting, force_refresh=False):
"""Render proceedings content for a meeting and update cache

:meeting: meeting whose proceedings should be rendered
:force_refresh: true to force regeneration and cache refresh
"""
cache = caches["default"]
cache_version = Document.objects.filter(session__meeting__number=meeting.number).aggregate(Max('time'))["time__max"]
# Include proceedings_final in the bare_key so we'll always reflect that accurately, even at the cost of
# a recomputation in the view
bare_key = f"proceedings.{meeting.number}.{cache_version}.final={meeting.proceedings_final}"
cache_key = sha384(bare_key.encode("utf8")).hexdigest()
if not force_refresh:
cached_content = cache.get(cache_key, None)
if cached_content is not None:
return cached_content

def area_and_group_acronyms_from_session(s):
area = s.group_parent_at_the_time()
if area == None:
area = s.group.parent
group = s.group_at_the_time()
return (area.acronym, group.acronym)

schedule = meeting.schedule
sessions = (
meeting.session_set.with_current_status()
.filter(Q(timeslotassignments__schedule__in=[schedule, schedule.base if schedule else None])
| Q(current_status='notmeet'))
.select_related()
.order_by('-current_status')
)

plenaries, _ = organize_proceedings_sessions(
sessions.filter(name__icontains='plenary')
.exclude(current_status='notmeet')
)
irtf_meeting, irtf_not_meeting = organize_proceedings_sessions(
sessions.filter(group__parent__acronym = 'irtf').order_by('group__acronym')
)
# per Colin (datatracker #5010) - don't report not meeting rags
irtf_not_meeting = [item for item in irtf_not_meeting if item["group"].type_id != "rag"]
irtf = {"meeting_groups":irtf_meeting, "not_meeting_groups":irtf_not_meeting}

training, _ = organize_proceedings_sessions(
sessions.filter(group__acronym__in=['edu','iaoc'], type_id__in=['regular', 'other',])
.exclude(current_status='notmeet')
)
iab, _ = organize_proceedings_sessions(
sessions.filter(group__parent__acronym = 'iab')
.exclude(current_status='notmeet')
)
editorial, _ = organize_proceedings_sessions(
sessions.filter(group__acronym__in=['rsab','rswg'])
.exclude(current_status='notmeet')
)

ietf = sessions.filter(group__parent__type__slug = 'area').exclude(group__acronym__in=['edu','iepg','tools'])
ietf = list(ietf)
ietf.sort(key=lambda s: area_and_group_acronyms_from_session(s))
ietf_areas = []
for area, area_sessions in itertools.groupby(ietf, key=lambda s: s.group_parent_at_the_time()):
meeting_groups, not_meeting_groups = organize_proceedings_sessions(area_sessions)
ietf_areas.append((area, meeting_groups, not_meeting_groups))

with timezone.override(meeting.tz()):
rendered_content = render_to_string(
"meeting/proceedings.html",
{
'meeting': meeting,
'plenaries': plenaries,
'training': training,
'irtf': irtf,
'iab': iab,
'editorial': editorial,
'ietf_areas': ietf_areas,
'meetinghost_logo': {
'max_height': settings.MEETINGHOST_LOGO_MAX_DISPLAY_HEIGHT,
'max_width': settings.MEETINGHOST_LOGO_MAX_DISPLAY_WIDTH,
}
},
)
cache.set(
cache_key,
rendered_content,
timeout=86400, # one day, in seconds
)
return rendered_content


def organize_proceedings_sessions(sessions):
# Collect sessions by Group, then bin by session name (including sessions with blank names).
# If all of a group's sessions are 'notmeet', the processed data goes in not_meeting_sessions.
# Otherwise, the data goes in meeting_sessions.
meeting_groups = []
not_meeting_groups = []
for group_acronym, group_sessions in itertools.groupby(sessions, key=lambda s: s.group.acronym):
by_name = {}
is_meeting = False
all_canceled = True
group = None
for s in sorted(
group_sessions,
key=lambda gs: (
gs.official_timeslotassignment().timeslot.time
if gs.official_timeslotassignment() else datetime.datetime(datetime.MAXYEAR, 1, 1)
),
):
group = s.group
if s.current_status != 'notmeet':
is_meeting = True
if s.current_status != 'canceled':
all_canceled = False
by_name.setdefault(s.name, [])
if s.current_status != 'notmeet' or s.presentations.exists():
by_name[s.name].append(s) # for notmeet, only include sessions with materials
for sess_name, ss in by_name.items():
session = ss[0] if ss else None
def _format_materials(items):
"""Format session/material for template

Input is a list of (session, materials) pairs. The materials value can be a single value or a list.
"""
material_times = {} # key is material, value is first timestamp it appeared
for s, mats in items:
tsa = s.official_timeslotassignment()
timestamp = tsa.timeslot.time if tsa else None
if not isinstance(mats, list):
mats = [mats]
for mat in mats:
if mat and mat not in material_times:
material_times[mat] = timestamp
n_mats = len(material_times)
result = []
if n_mats == 1:
result.append({'material': list(material_times)[0]}) # no 'time' when only a single material
elif n_mats > 1:
for mat, timestamp in material_times.items():
result.append({'material': mat, 'time': timestamp})
return result

entry = {
'group': group,
'name': sess_name,
'session': session,
'canceled': all_canceled,
'has_materials': s.presentations.exists(),
'agendas': _format_materials((s, s.agenda()) for s in ss),
'minutes': _format_materials((s, s.minutes()) for s in ss),
'bluesheets': _format_materials((s, s.bluesheets()) for s in ss),
'recordings': _format_materials((s, s.recordings()) for s in ss),
'meetecho_recordings': _format_materials((s, [s.session_recording_url()]) for s in ss),
'chatlogs': _format_materials((s, s.chatlogs()) for s in ss),
'slides': _format_materials((s, s.slides()) for s in ss),
'drafts': _format_materials((s, s.drafts()) for s in ss),
'last_update': session.last_update if hasattr(session, 'last_update') else None
}
if session and session.meeting.type_id == 'ietf' and not session.meeting.proceedings_final:
entry['attendances'] = _format_materials((s, s) for s in ss if Attended.objects.filter(session=s).exists())
if is_meeting:
meeting_groups.append(entry)
else:
not_meeting_groups.append(entry)
return meeting_groups, not_meeting_groups
Loading
Loading