diff --git a/api/views.py b/api/views.py index 00b16bbc..b1541345 100644 --- a/api/views.py +++ b/api/views.py @@ -1,6 +1,7 @@ from collections import Sequence -from django.shortcuts import get_object_or_404 +from django.http import Http404 +from django.shortcuts import get_object_or_404, redirect from rest_framework import viewsets from rest_framework.generics import ListAPIView from rest_framework.response import Response @@ -9,7 +10,7 @@ from api.versioning import check_api_version_redirect from core.filters import parse_querystring from core.forms import get_table_dynamic_form -from core.models import Dataset, Table +from core.models import Dataset, DataUrlRedirect, Table from core.templatetags.utils import obfuscate from . import paginators @@ -21,7 +22,14 @@ class DatasetViewSet(viewsets.ModelViewSet): @check_api_version_redirect def retrieve(self, request, slug): - obj = get_object_or_404(self.get_queryset(), slug=slug) + try: + obj = self.get_queryset().get(slug=slug) + except Dataset.DoesNotExist: + redirect_url = DataUrlRedirect.redirect_from(request) + if redirect_url: + return redirect(redirect_url) + raise Http404 + serializer = DatasetDetailSerializer(obj, context=self.get_serializer_context(),) return Response(serializer.data) @@ -96,6 +104,9 @@ def handle_exception(self, exc): @check_api_version_redirect def get(self, *args, **kwargs): + redirect_url = DataUrlRedirect.redirect_from(self.request) + if redirect_url: + return redirect(redirect_url) return super().get(*args, **kwargs) diff --git a/core/admin.py b/core/admin.py index 0932aaf2..c0df00c4 100644 --- a/core/admin.py +++ b/core/admin.py @@ -190,4 +190,17 @@ def has_delete_permission(self, request, *args, **kwargs): return request.user.is_superuser +class DataUrlRedirectAdmin(admin.ModelAdmin): + list_display = [ + "id", + "dataset_prev", + "dataset_dest", + "tablename_prev", + "tablename_dest", + "field_prev", + "field_dest", + ] + + admin.site.register(models.TableFile, TableFileAdmin) +admin.site.register(models.DataUrlRedirect, DataUrlRedirectAdmin) diff --git a/core/migrations/0028_auto_20201016_1519.py b/core/migrations/0028_auto_20201016_1519.py new file mode 100644 index 00000000..5ce1c68c --- /dev/null +++ b/core/migrations/0028_auto_20201016_1519.py @@ -0,0 +1,25 @@ +# Generated by Django 3.1.1 on 2020-10-16 18:19 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("core", "0027_auto_20201007_1707"), + ] + + operations = [ + migrations.CreateModel( + name="DataUrlRedirect", + fields=[ + ("id", models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), + ("dataset_prev", models.SlugField(default="")), + ("dataset_dest", models.SlugField(default="")), + ("tablename_prev", models.SlugField(default="")), + ("tablename_dest", models.SlugField(default="")), + ("field_prev", models.SlugField(default="")), + ("field_dest", models.SlugField(default="")), + ], + ), + ] diff --git a/core/models.py b/core/models.py index adcd4cea..ffbbdd8e 100644 --- a/core/models.py +++ b/core/models.py @@ -2,8 +2,9 @@ import random import string from collections import OrderedDict, namedtuple +from copy import deepcopy from textwrap import dedent -from urllib.parse import urlparse +from urllib.parse import urlencode, urlparse import django.contrib.postgres.indexes as pg_indexes import django.db.models.indexes as django_indexes @@ -675,3 +676,89 @@ def readable_size(self): @property def admin_url(self): return reverse("admin:core_tablefile_change", args=[self.id]) + + +class DataUrlRedirect(models.Model): + dataset_prev = models.SlugField(default="", blank=True) + dataset_dest = models.SlugField(default="", blank=True) + tablename_prev = models.SlugField(default="", blank=True) + tablename_dest = models.SlugField(default="", blank=True) + field_prev = models.SlugField(default="", blank=True) + field_dest = models.SlugField(default="", blank=True) + + @property + def redirect_map(self): + map = {} + dataset_url_names = [ + "core:dataset-detail", + "core:dataset-files-detail", + "api-v1:dataset-detail", + ] + + if self.dataset_prev != self.dataset_dest: + map.update( + **{ + reverse(n, args=[self.dataset_prev]): reverse(n, args=[self.dataset_dest]) + for n in dataset_url_names + } + ) + + if self.tablename_dest != self.tablename_prev: + table_url_names = [ + "core:dataset-table-detail", + "api-v1:dataset-table-data", + ] + map.update( + **{ + reverse(n, args=[self.dataset_prev, self.tablename_prev]): reverse( + n, args=[self.dataset_dest, self.tablename_dest] + ) + for n in table_url_names + } + ) + + return map + + @classmethod + def redirect_from(cls, request): + path = request.path + redirects = {} + fields_map = {} + + for r in cls.objects.all().iterator(): + redirects.update(**r.redirect_map) + + if r.field_prev != r.field_dest: + key = (r.dataset_dest, r.tablename_dest, r.field_prev) + fields_map[key] = r.field_dest + + # Order prefixes begining by the most complex ones + redirect_url = "" + for url_prefix in sorted(redirects, reverse=True): + if path.startswith(url_prefix): + redirect_url_prefix = redirects[url_prefix] + redirect_url = path.replace(url_prefix, redirect_url_prefix) + break + + qs = "" + query_params = getattr(request, "GET", {}) + redirect_qs = deepcopy(query_params) + if query_params: + url = redirect_url or path + for key in fields_map: + dataset, tablename, field_prev = key + has_field_update = all([f"/{dataset}/" in url, f"/{tablename}/" in url, field_prev in redirect_qs]) + if has_field_update: + value = redirect_qs[field_prev] + redirect_qs.pop(field_prev) + redirect_qs[fields_map[key]] = value + + qs = urlencode(redirect_qs) + + if not redirect_url and qs and set(query_params.keys()) != set(redirect_qs.keys()): + redirect_url = path + + if not redirect_url: + return "" + + return redirect_url + (f"?{qs}" if qs else "") diff --git a/core/tests/test_views.py b/core/tests/test_views.py index 0c0d7440..38c54aa6 100644 --- a/core/tests/test_views.py +++ b/core/tests/test_views.py @@ -1,11 +1,12 @@ from unittest.mock import patch +from django.conf import settings from django.core.management import call_command -from django.test import override_settings +from django.test import TestCase, override_settings from django.urls import reverse from model_bakery import baker -from core.models import TableFile +from core.models import Dataset, DataUrlRedirect, TableFile from core.tests.utils import BaseTestCaseWithSampleDataset from traffic_control.tests.util import TrafficControlClient from utils.tests import DjangoAssertionsMixin @@ -123,3 +124,211 @@ def test_return_empty_list_if_no_visible_table(self): assert 200 == response.status_code self.assertTemplateUsed(response, "404.html") assert response.context["message"] + + +class DataRedirectsTests(TestCase): + client_class = TrafficControlClient + + def setUp(self): + self.client.force_login(baker.make(settings.AUTH_USER_MODEL)) + self.dataset_redirects = [ + baker.make(DataUrlRedirect, dataset_prev="gastos_diretos", dataset_dest="govbr"), + baker.make(DataUrlRedirect, dataset_prev="bens_candidatos", dataset_dest="bem_declarado"), + ] + + def test_redirect_dataset_urls(self): + response = self.client.get("/dataset/gastos_diretos/") + + self.assertRedirects(response, "/dataset/govbr/", fetch_redirect_response=False) + + def test_dataset_redirect_fetch_data_from_db(self): + for ds_redirect in self.dataset_redirects: + url = reverse("core:dataset-detail", args=[ds_redirect.dataset_prev]) + redirect_url = reverse("core:dataset-detail", args=[ds_redirect.dataset_dest]) + + response = self.client.get(url) + + self.assertRedirects(response, redirect_url, fetch_redirect_response=False) + + def test_dataset_redirect_fetch_data_from_db_for_files_urls(self): + for ds_redirect in self.dataset_redirects: + url = reverse("core:dataset-files-detail", args=[ds_redirect.dataset_prev]) + redirect_url = reverse("core:dataset-files-detail", args=[ds_redirect.dataset_dest]) + + response = self.client.get(url) + + self.assertRedirects(response, redirect_url, fetch_redirect_response=False) + + def test_dataset_redirect_fetch_data_from_db_for_api_urls(self): + for ds_redirect in self.dataset_redirects: + url = reverse("api-v1:dataset-detail", args=[ds_redirect.dataset_prev]) + redirect_url = reverse("api-v1:dataset-detail", args=[ds_redirect.dataset_dest]) + + response = self.client.get(url) + + self.assertRedirects(response, redirect_url, fetch_redirect_response=False) + + def test_dataset_redirect_fetch_data_from_db_for_tabledata(self): + for ds_redirect in self.dataset_redirects: + url = reverse("core:dataset-table-detail", args=[ds_redirect.dataset_prev, "caso"]) + redirect_url = reverse("core:dataset-table-detail", args=[ds_redirect.dataset_dest, "caso"]) + + response = self.client.get(url) + + self.assertRedirects(response, redirect_url, fetch_redirect_response=False) + + def test_dataset_redirect_fetch_data_from_db_for_api_tabledata(self): + for ds_redirect in self.dataset_redirects: + url = reverse("api-v1:dataset-table-data", args=[ds_redirect.dataset_prev, "caso"]) + redirect_url = reverse("api-v1:dataset-table-data", args=[ds_redirect.dataset_dest, "caso"]) + + response = self.client.get(url) + + self.assertRedirects(response, redirect_url, fetch_redirect_response=False) + + def test_dataset_redirect_fetch_data_from_db_for_tabledata_with_table_redirect(self): + ds_redirect = self.dataset_redirects[0] + ds_redirect.tablename_prev = "caso2019" + ds_redirect.tablename_dest = "caso2020" + ds_redirect.save() + + url = reverse("core:dataset-table-detail", args=[ds_redirect.dataset_prev, "caso2019"]) + redirect_url = reverse("core:dataset-table-detail", args=[ds_redirect.dataset_dest, "caso2020"]) + + response = self.client.get(url) + + self.assertRedirects(response, redirect_url, fetch_redirect_response=False) + + def test_dataset_redirect_fetch_data_from_db_for_api_tabledata_with_table_redirect(self): + ds_redirect = self.dataset_redirects[0] + ds_redirect.tablename_prev = "caso2019" + ds_redirect.tablename_dest = "caso2020" + ds_redirect.save() + + url = reverse("api-v1:dataset-table-data", args=[ds_redirect.dataset_prev, "caso2019"]) + redirect_url = reverse("api-v1:dataset-table-data", args=[ds_redirect.dataset_dest, "caso2020"]) + + response = self.client.get(url) + + self.assertRedirects(response, redirect_url, fetch_redirect_response=False) + + def test_table_only_redirect(self): + baker.make(Dataset, slug="covid19") + ds_redirect = self.dataset_redirects[0] + ds_redirect.dataset_prev = "covid19" + ds_redirect.dataset_dest = "covid19" + ds_redirect.tablename_prev = "caso2020" + ds_redirect.tablename_dest = "caso2021" + ds_redirect.save() + + url = reverse("core:dataset-table-detail", args=["covid19", "caso2020"]) + redirect_url = reverse("core:dataset-table-detail", args=["covid19", "caso2021"]) + + response = self.client.get(url) + + self.assertRedirects(response, redirect_url, fetch_redirect_response=False) + + # API as well + url = reverse("api-v1:dataset-table-data", args=["covid19", "caso2020"]) + redirect_url = reverse("api-v1:dataset-table-data", args=["covid19", "caso2021"]) + + def test_avoid_infinite_redirect_if_same_dataset_config(self): + ds_redirect = self.dataset_redirects[0] + ds_redirect.dataset_prev = "foo" + ds_redirect.dataset_dest = "foo" + ds_redirect.save() + + url = reverse("core:dataset-detail", args=[ds_redirect.dataset_prev]) + response = self.client.get(url) + + assert 404 == response.status_code + + def test_avoid_infinite_redirect_if_same_dataset_config_in_table(self): + ds_redirect = self.dataset_redirects[0] + ds_redirect.dataset_prev = "foo" + ds_redirect.dataset_dest = "foo" + ds_redirect.tablename_prev = "bar" + ds_redirect.tablename_dest = "bar" + ds_redirect.save() + + url = reverse("api-v1:dataset-table-data", args=["foo", "bar"]) + response = self.client.get(url) + + assert 404 == response.status_code + + def test_redirect_dataset_plus_field_name_redirect(self): + ds_redirect = self.dataset_redirects[0] + ds_redirect.tablename_prev = "caso2020" + ds_redirect.tablename_dest = "caso2020" + ds_redirect.field_prev = "query1" + ds_redirect.field_dest = "newquery" + ds_redirect.save() + + url = reverse("api-v1:dataset-table-data", args=[ds_redirect.dataset_prev, "caso2020"]) + redirect_url = ( + reverse("api-v1:dataset-table-data", args=[ds_redirect.dataset_dest, "caso2020"]) + + "?newquery=foo&query2=bar" # noqa + ) + response = self.client.get(url, data={"query1": "foo", "query2": "bar"}) + + self.assertRedirects(response, redirect_url, fetch_redirect_response=False) + + def test_redirect_dataset_plus_table_pluse_field_name_redirect(self): + ds_redirect = self.dataset_redirects[0] + ds_redirect.tablename_prev = "caso2019" + ds_redirect.tablename_dest = "caso2020" + ds_redirect.field_prev = "query1" + ds_redirect.field_dest = "newquery" + ds_redirect.save() + + url = reverse("api-v1:dataset-table-data", args=[ds_redirect.dataset_prev, "caso2019"]) + redirect_url = ( + reverse("api-v1:dataset-table-data", args=[ds_redirect.dataset_dest, "caso2020"]) + + "?newquery=foo&query2=bar" # noqa + ) + response = self.client.get(url, data={"query1": "foo", "query2": "bar"}) + + self.assertRedirects(response, redirect_url, fetch_redirect_response=False) + + def test_redirect_only_querystring_using_new_fieldname(self): + ds_redirect = self.dataset_redirects[0] + ds_redirect.dataset_dest = "dataset" + ds_redirect.dataset_prev = "dataset" + ds_redirect.tablename_prev = "caso2020" + ds_redirect.tablename_dest = "caso2020" + ds_redirect.field_prev = "query1" + ds_redirect.field_dest = "newquery" + ds_redirect.save() + baker.make("core.Table", dataset__slug="dataset", name="caso2020", dataset__show=True) + + # API endpoint + url = reverse("api-v1:dataset-table-data", args=["dataset", "caso2020"]) + redirect_url = reverse("api-v1:dataset-table-data", args=["dataset", "caso2020"]) + "?newquery=foo&query2=bar" + response = self.client.get(url, data={"query1": "foo", "query2": "bar"}) + self.assertRedirects(response, redirect_url, fetch_redirect_response=False) + + # Dataset table route + url = reverse("core:dataset-table-detail", args=["dataset", "caso2020"]) + redirect_url = reverse("core:dataset-table-detail", args=["dataset", "caso2020"]) + "?newquery=foo&query2=bar" + response = self.client.get(url, data={"query1": "foo", "query2": "bar"}) + self.assertRedirects(response, redirect_url, fetch_redirect_response=False) + + def test_avoid_infinite_redirect_if_same_fieldname_config(self): + ds_redirect = self.dataset_redirects[0] + ds_redirect.dataset_dest = "dataset" + ds_redirect.dataset_prev = "dataset" + ds_redirect.tablename_prev = "caso2020" + ds_redirect.tablename_dest = "caso2020" + ds_redirect.field_prev = "query1" + ds_redirect.field_dest = "query1" + ds_redirect.save() + + # API url + url = reverse("api-v1:dataset-table-data", args=[ds_redirect.dataset_prev, ds_redirect.tablename_prev]) + response = self.client.get(url, data={"query1": "foo", "query2": "bar"}) + self.assertEqual(404, response.status_code) + + # Dataset table route + url = reverse("core:dataset-table-detail", args=[ds_redirect.dataset_prev, ds_redirect.tablename_prev]) + response = self.client.get(url, data={"query1": "foo", "query2": "bar"}) + self.assertEqual(404, response.status_code) diff --git a/core/views.py b/core/views.py index 400e13e2..cd085334 100644 --- a/core/views.py +++ b/core/views.py @@ -7,13 +7,13 @@ from django.core.paginator import Paginator from django.db.models import Q from django.http import StreamingHttpResponse -from django.shortcuts import get_object_or_404, redirect, render +from django.shortcuts import redirect, render from django.urls import reverse from core.filters import parse_querystring from core.forms import ContactForm, DatasetSearchForm, get_table_dynamic_form from core.middlewares import disable_non_logged_user_cache -from core.models import Dataset, Table +from core.models import Dataset, DataUrlRedirect, Table from core.templatetags.utils import obfuscate from core.util import cached_http_get_json from data_activities_log.activites import recent_activities @@ -109,6 +109,10 @@ def dataset_detail(request, slug, tablename=""): try: dataset = Dataset.objects.get(slug=slug) except Dataset.DoesNotExist: + redirect_url = DataUrlRedirect.redirect_from(request) + if redirect_url: + return redirect(redirect_url) + context = {"message": "Dataset does not exist"} return render(request, "404.html", context, status=404) @@ -122,6 +126,9 @@ def dataset_detail(request, slug, tablename=""): except Table.DoesNotExist: context = {"message": "Table does not exist"} try: + redirect_url = DataUrlRedirect.redirect_from(request) + if redirect_url: + return redirect(redirect_url) # log 404 request only if hidden table exist hidden_table = dataset.get_table(tablename, allow_hidden=True) if hidden_table: @@ -130,6 +137,11 @@ def dataset_detail(request, slug, tablename=""): pass return render(request, "404.html", context, status=404) + # check for querystring fields that should be redirected + redirect_url = DataUrlRedirect.redirect_from(request) + if redirect_url: + return redirect(redirect_url) + querystring = request.GET.copy() page_number = querystring.pop("page", ["1"])[0].strip() or "1" items_per_page = querystring.pop("items", [str(settings.ROWS_PER_PAGE)])[0].strip() or str(settings.ROWS_PER_PAGE) @@ -233,7 +245,16 @@ def contributors(request): def dataset_files_detail(request, slug): - dataset = get_object_or_404(Dataset, slug=slug) + try: + dataset = Dataset.objects.get(slug=slug) + except Dataset.DoesNotExist: + redirect_url = DataUrlRedirect.redirect_from(request) + if redirect_url: + return redirect(redirect_url) + + context = {"message": "Dataset does not exist"} + return render(request, "404.html", context, status=404) + try: all_files = dataset.all_files except ObjectDoesNotExist: