From d9b97d8ca9f0ba13392bdc58c7836b708b2369f9 Mon Sep 17 00:00:00 2001 From: Johannes Hentschel Date: Wed, 13 Sep 2023 23:46:17 +0200 Subject: [PATCH 1/3] replaces old annotation workflow with new version_release workflow (annotation workflow will make its re-entry as pre-commit hook) --- .github/workflows/annotation_branch.yml | 41 ----- .github/workflows/helper.py | 72 ++++++++ .github/workflows/main_branch.yml | 32 ---- .github/workflows/update_pages.py | 225 ------------------------ .github/workflows/version_release.yml | 68 +++++++ 5 files changed, 140 insertions(+), 298 deletions(-) delete mode 100644 .github/workflows/annotation_branch.yml create mode 100644 .github/workflows/helper.py delete mode 100644 .github/workflows/main_branch.yml delete mode 100644 .github/workflows/update_pages.py create mode 100644 .github/workflows/version_release.yml diff --git a/.github/workflows/annotation_branch.yml b/.github/workflows/annotation_branch.yml deleted file mode 100644 index 00ea142..0000000 --- a/.github/workflows/annotation_branch.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: Push to annotation branch - -on: - push: - branches-ignore: - - main - - -jobs: - ms3_review: - if: > - (github.event.pusher.name != 'ms3-bot' && github.event.pusher.name != 'github-actions[bot]') - runs-on: ubuntu-latest - steps: - - name: Checkout corpus repository - uses: actions/checkout@v3 - with: - fetch-depth: 0 - ref: ${{ github.event.pull_request.head.ref }} - token: ${{ secrets.MS3_BOT_TOKEN }} - path: ${{ github.event.repository.name }} - - - name: Run 'ms3 review' via dcml_corpus_workflow action - uses: DCMLab/dcml_corpus_workflow@v2.6.2 - id: act_docker - with: - ms3-command: ${{ github.event_name }} - env: - Token: "${{ secrets.MS3_BOT_TOKEN }}" - commitFrom: "" # it start from main branch - comment_msg: "${{ github.event.head_commit.message }}" - directory: "${{ github.workspace }}" - working_dir: ${{ github.event.repository.name }} - - - name: Cancel the run if skipped - working-directory: ${{ github.event.repository.name }} - if: (steps.act_docker.outputs.skipped == 'true') - run: | - gh run cancel ${{ github.run_id }} - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/helper.py b/.github/workflows/helper.py new file mode 100644 index 0000000..f6105ad --- /dev/null +++ b/.github/workflows/helper.py @@ -0,0 +1,72 @@ +import json +import argparse +import re +import os +def create_new_tag(tag): + if not (re.match(r'^v\d', tag)): + raise Exception(f'tag: {tag} is not giving in the correct format e.i v0.0') + + # Notice that this will make tag version of three digits become two digits + # e.i 3.2.1 -> 3.3 + digits_tags = (re.match(r'^v\d+.\d+', tag)).group()[1::].split('.') + if len(digits_tags) != 2: + raise Exception(f'tag: {tag} must contain two version digits') + + new_digit = int(digits_tags[1]) + 1 + return "v" + str(digits_tags[0]) + "." + str(new_digit) + +def store_tag(tag): + with open(os.environ['GITHUB_OUTPUT'], 'a') as fh: + print(f'new_tag={tag}', file=fh) + +def update_files_with_tag(old_tag, new_tag): + # This function needs to take care of updating + # .zenodo.json and CITATION.cff + # TO-DO: make zenodo.json robust to search for version tags + if os.path.isfile(".zenodo.json"): + try: + with open(".zenodo.json", "r") as f: + data = json.load(f) + + data["version"] = data["version"].replace(old_tag,new_tag) + data["title"] = data["title"].replace(old_tag,new_tag) + + with open(".zenodo.json", "w") as f: + json.dump(data, f) + + except Exception as e: + print(e) + + if os.path.isfile("CITATION.cff"): + try: + with open("CITATION.cff", "r", encoding="utf-8") as file: + citation = file.read() + modified_citation = citation.replace(old_tag, new_tag) + with open("CITATION.cff", "w", encoding="utf-8") as file: + file.write(modified_citation) + except Exception as e: + print(e) + +def main(args): + tag = args.tag + new_tag = "v2.0" + if not tag: + print(f"Warning: a latest release with a tag does not exist in current repository, starting from {new_tag}") + else: + new_tag = create_new_tag(tag) + print(f"Repository with tag: {tag}, creating a new tag with: {new_tag}") + update_files_with_tag(tag,new_tag) + store_tag(new_tag) + +def run(): + args = parser.parse_args() + main(args) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--tag", type=str, + help="Require: latest tag", + required=True + ) + run() \ No newline at end of file diff --git a/.github/workflows/main_branch.yml b/.github/workflows/main_branch.yml deleted file mode 100644 index 250464b..0000000 --- a/.github/workflows/main_branch.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Push to main branch - -on: - push: - branches: - - main -jobs: - ms3_review: - if: > - contains(github.event.head_commit.message,'dcml_corpus_workflow') - || (github.actor != 'ms3-bot' && github.event.pusher.name != 'github-actions[bot]') - runs-on: ubuntu-latest - steps: - - - name: Checkout corpus repository - uses: actions/checkout@v3 - with: - fetch-depth: 0 - token: ${{ secrets.MS3_BOT_TOKEN }} - path: ${{ github.event.repository.name }} - - - name: Run 'ms3 review' via dcml_corpus_workflow action - uses: DCMLab/dcml_corpus_workflow@v2.6.2 - id: act_docker - with: - ms3-command: "push_to_main" - env: - Token: "${{ secrets.MS3_BOT_TOKEN }}" - commitFrom: "${{ github.event.before }}" - comment_msg: "${{ github.event.head_commit.message }}" - directory: "${{ github.workspace }}" - working_dir: ${{ github.event.repository.name }} diff --git a/.github/workflows/update_pages.py b/.github/workflows/update_pages.py deleted file mode 100644 index 2078a97..0000000 --- a/.github/workflows/update_pages.py +++ /dev/null @@ -1,225 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 -import argparse -import os -import sys -import io -import base64 -from shutil import copy - -import corpusstats -import pandas as pd -from ms3 import resolve_dir - -INDEX_FNAME = "index.md" -GANTT_FNAME = "gantt.md" -STATS_FNAME = "stats.md" -JEKYLL_CFG_FNAME = "_config.yml" -STYLE_FNAME = "assets/css/style.scss" - - -def make_index_file(gantt=True, stats=True): - file = "" - if gantt: - file += f"* [Modulation plans]({GANTT_FNAME})\n" - if stats: - file +=f"* [Corpus state]({STATS_FNAME})\n" - return file - -def generate_stats_text(pie_string, table_string): - STATS_FILE = f""" -# Corpus Status - -## Vital statistics - -{table_string} - -## Completion ratios - -{pie_string} -""" - return STATS_FILE - - -JEKYLL_CFG_FILE = "theme: jekyll-theme-tactile " - -STYLE_FILE = """--- ---- - -@import "{{ site.theme }}"; - -.inner { - max-width: 95%; - width: 1024px; -} -""" - - - - - - -def write_to_file(args, filename, content_str): - path = check_dir(".") if args.out is None else args.out - fname = os.path.join(path, filename) - _ = check_and_create( - os.path.dirname(fname) - ) # in case the file name included path components - with open(fname, "w", encoding="utf-8") as f: - f.writelines(content_str) - - -def write_gantt_file(args, gantt_path=None): - if gantt_path is None: - gantt_path = ( - check_dir("gantt") - if args.out is None - else check_dir(os.path.join(args.out, "gantt")) - ) - fnames = sorted(os.listdir(gantt_path)) - file_content = "\n".join( - f'' - for f in fnames) - write_to_file(args, GANTT_FNAME, file_content) - - -def write_stats_file(args): - try: - p = corpusstats.Provider(args.github, args.token) - except: - print(f"corpusstats failed with the following message: {sys.exc_info()[1]}") - return False - pie_string = "" - pie_array = [] - for s in p.tabular_stats: - plot = p.pie_chart(s) - img = io.BytesIO() - plot.savefig(img, format="png") - img.seek(0) - img = base64.encodebytes(img.getvalue()).decode("utf-8") - pie_array.append( - f'
' - ) - pie_string = "".join(pie_array) - - vital_stats = pd.DataFrame.from_dict(p.stats, orient="index") - vital_stats = vital_stats.iloc[0:6, 0:2] - vital_stats = vital_stats.to_markdown(index=False, headers=[]) - full_text = generate_stats_text(pie_string, vital_stats) - write_to_file(args, STATS_FNAME, full_text) - return True - - - -def check_and_create(d): - """ Turn input into an existing, absolute directory path. - """ - if not os.path.isdir(d): - d = resolve_dir(os.path.join(os.getcwd(), d)) - if not os.path.isdir(d): - os.makedirs(d) - print(f"Created directory {d}") - return resolve_dir(d) - - -def check_dir(d): - if not os.path.isdir(d): - d = resolve_dir(os.path.join(os.getcwd(), d)) - if not os.path.isdir(d): - print(d + " needs to be an existing directory") - return - return resolve_dir(d) - - -def copy_gantt_files(args): - destination = check_dir(".") if args.out is None else args.out - destination = check_and_create(os.path.join(destination, 'gantt')) - for file in sorted(os.listdir(args.dir)): - if file.endswith('.html'): - source = os.path.join(args.dir, file) - copy(source, destination) - print(f"Copied {source} to {destination}.") - return destination - -def main(args): - given = sum(arg is not None for arg in (args.github, args.token)) - stats, gantt = False, False - if given == 2: - stats = write_stats_file(args) - elif given == 1: - print(f"You need to specify both a repository and a token.") - if args.dir is not None: - destination = copy_gantt_files(args) - write_gantt_file(args, destination) - gantt=True - if sum((stats, gantt)) > 0: - index_file = make_index_file(gantt=gantt, stats=stats) - write_to_file(args, INDEX_FNAME, index_file) - write_to_file(args, JEKYLL_CFG_FNAME, JEKYLL_CFG_FILE) - write_to_file(args, STYLE_FNAME, STYLE_FILE) - else: - print("No page was generated.") - - -################################################################################ -# COMMANDLINE INTERFACE -################################################################################ -if __name__ == "__main__": - parser = argparse.ArgumentParser( - formatter_class=argparse.RawDescriptionHelpFormatter, - description="""\ ---------------------------------------------------------- -| Script for updating GitHub pages for a DCML subcorpus | ---------------------------------------------------------- - -Description goes here - -""", - ) - parser.add_argument( - "-g", - "--github", - metavar="owner/repository", - help="If you want to generate corpusstats, you need to pass the repo in the form owner/repository_name and an access token.", - ) - parser.add_argument( - "-t", - "--token", - metavar="ACCESS_TOKEN", - help="Token that grants access to the repository in question.", - ) - parser.add_argument( - "-d", - "--dir", - metavar="DIR", - type=check_dir, - help="Pass a directory to scan it for gantt charts and write the file gantt.md", - ) - parser.add_argument( - "-o", - "--out", - metavar="OUT_DIR", - type=check_and_create, - help="""Output directory.""", - ) - parser.add_argument( - "-l", - "--level", - default="INFO", - help="Set logging to one of the levels {DEBUG, INFO, WARNING, ERROR, CRITICAL}.", - ) - args = parser.parse_args() - # logging_levels = { - # 'DEBUG': logging.DEBUG, - # 'INFO': logging.INFO, - # 'WARNING': logging.WARNING, - # 'ERROR': logging.ERROR, - # 'CRITICAL': logging.CRITICAL, - # 'D': logging.DEBUG, - # 'I': logging.INFO, - # 'W': logging.WARNING, - # 'E': logging.ERROR, - # 'C': logging.CRITICAL - # } - # logging.basicConfig(level=logging_levels[args.level.upper()]) - main(args) diff --git a/.github/workflows/version_release.yml b/.github/workflows/version_release.yml new file mode 100644 index 0000000..66b5478 --- /dev/null +++ b/.github/workflows/version_release.yml @@ -0,0 +1,68 @@ +on: + pull_request: + types: + - closed + +jobs: + if_merged: + if: github.event.pull_request.merged == true + runs-on: ubuntu-latest + steps: + + - name: Checkout corpus repository + uses: actions/checkout@v3 + with: + token: ${{ secrets.MS3_BOT_TOKEN }} + + # this step could be replaced with dcml_docker action + - name: "Get ms3 package & transform" + id: tag + continue-on-error: true + run: | + res=$(gh api -H "Accept: application/vnd.github+json" repos/${{ github.event.repository.full_name }}/releases/latest --jq '.tag_name') + echo "tag_version=${res}" >> $GITHUB_OUTPUT + env: + GITHUB_TOKEN: ${{ secrets.MS3_BOT_TOKEN }} + + # this step could be replaced with dcml_docker action + - name: Setup Github credentials + run: | + git config --global user.name "ms3-bot" + git config --global user.email dcml.annotators@epfl.ch + + - name: "Generate new tag version" + id: generate_tag + run: | + ls -a + python .github/workflows/helper.py --tag "${{ steps.tag.outputs.tag_version }}" + ls -a + cat .zenodo.json + + # this step could be replaced with dcml_docker action + - name: "Get ms3 package & transform" + run: | + pip install --upgrade pip + pip install ms3 + ls -a + ms3 transform -M -N -X -F -D + ls -a + + - name: "Create release" + uses: "marvinpinto/action-automatic-releases@latest" + with: + repo_token: "${{ secrets.MS3_BOT_TOKEN }}" + automatic_release_tag: "${{ steps.generate_tag.outputs.new_tag }}" + prerelease: false + title: "${{ github.event.pull_request.title }}" + files: | + .zenodo.json + CITATION.cff + ${{ github.event.repository.name }}.zip + ${{ github.event.repository.name }}.datapackage.json + abc.datapackage.json.errors + + - name: "Update release body" + run: | + gh release edit "${{ steps.generate_tag.outputs.new_tag }}" --notes "${{ github.event.pull_request.body }}" --repo "${{ github.event.repository.full_name }}" + env: + GITHUB_TOKEN: ${{ secrets.MS3_BOT_TOKEN }} \ No newline at end of file From 427d5295ec4832a37bd0c2b7c5ed6c608966cc14 Mon Sep 17 00:00:00 2001 From: Johannes Hentschel Date: Wed, 13 Sep 2023 23:47:27 +0200 Subject: [PATCH 2/3] adds PyCharm and VScode folders to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index eed6331..80f7a88 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ *.mscz, .~* .mscbackup +.idea/ +.vscode/ From 007fad929ffdecfe1ce6d504986bb9d7e5090e59 Mon Sep 17 00:00:00 2001 From: Johannes Hentschel Date: Wed, 13 Sep 2023 23:57:06 +0200 Subject: [PATCH 3/3] adds template files --- .zenodo.json | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 29 +++++++++++++++++--- 2 files changed, 100 insertions(+), 3 deletions(-) create mode 100644 .zenodo.json diff --git a/.zenodo.json b/.zenodo.json new file mode 100644 index 0000000..90b90b0 --- /dev/null +++ b/.zenodo.json @@ -0,0 +1,74 @@ +{ + "license": "CC-BY-NC-SA-4.0", + "description": "

This corpus of annotated MuseScore files has been created within the DCML corpus initiative and employs the DCML harmony annotation standard. It is one out of nine similar corpora that have been grouped together to An Annotated Corpus of Tonal Piano Music from the Long 19th Century which comes with a data report that is currently under review.

\n\n

The dataset lives on GitHub (link under "Related identifiers") and is stored on Zenodo purely for conservation and automatic DOI generation for new GitHub releases. For technical reasons, we include only brief, generic instructions on how to use the data. For more detailed documentation, please refer to the dataset's GitHub page.

\n\n

What is included

\n\n

The dataset includes annotated MusicScores .mscx files that have been created with MuseScore 3.6.2 and can be opened with any MuseScore 3, or later version. Apart from that, the score information (measures, notes, harmony labels) have been extracted in the form of TSV files which can be found respectively in the folders measures, notes, and harmonies. They have been extracted with the Python library ms3 and its documentation has a column glossary for looking up the meaning of a column.

\n\n

Getting the data

\n\n

You can download the dataset as a ZIP file from Zenodo or GitHub. Please note that these automatically generated ZIP files do not include submodules, which would appear as empty folders. If you need ZIP files, you will need to find the submodule repositories (e.g. via GitHub) and download them individually.

\n\n

Apart from that, there is the possibility to git-clone the GitHub repository to your disk. This has the advantage that it allows to version-control any changes you want to make to the dataset and to ask for your changes to be included ("merged") in a future version.

", + "contributors": [ + { + "orcid": "0000-0002-6329-7492", + "type": "DataCollector", + "name": "Amelia Brey" + } + ],"title": "{{ pretty_repo_name }}", + "keywords": [ + "music research", + "music theory", + "music analysis", + "music history", + "corpus studies", + "corpora", + "symbolic dataset", + "scores", + "annotated dataset", + "harmony", + "key annotations", + "chord annotations", + "phrase annotations", + "cadence annotations" + ], + "grants": [ + { + "id": "10.13039/501100001711::105216_182811" + } + ], + "upload_type": "dataset", + "version": "{{ corpus_release }}", + "communities": [ + { + "identifier": "dcml" + }, + { + "identifier": "epfl" + } + ], + "publication_date": "2023-05-23", + "creators": [ + { + "orcid": "0000-0002-1986-9545", + "affiliation": "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne", + "name": "Johannes Hentschel" + }, + { + "orcid": "0000-0003-1455-5990", + "affiliation": "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne", + "name": "Yannis Rammos" + }, + { + "orcid": "0000-0002-4323-7257", + "affiliation": "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne", + "name": "Martin Rohrmeier" + } + ], + "access_right": "open", + "related_identifiers": [ + { + "scheme": "url", + "identifier": "https://github.com/DCMLab/{{ repo_name }}/tree/{{ corpus_release }}", + "relation": "references" + }, + { + "scheme": "url", + "identifier": "https://dcmlab.github.io/{{ repo_name }}/", + "relation": "isDocumentedBy" + } + ] +} + diff --git a/README.md b/README.md index 5a7ebe9..b20339e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,28 @@ -# annotation_workflow_template +![Version](https://img.shields.io/github/v/release/DCMLab/{{ repo_name }}?display_name=tag) +[![DOI](https://zenodo.org/badge/{{ zenodo_badge_id }}.svg)](https://zenodo.org/badge/latestdoi/{{ zenodo_badge_id }}) +![GitHub repo size](https://img.shields.io/github/repo-size/DCMLab/{{ repo_name }}) +![License](https://img.shields.io/badge/license-CC%20BY--NC--SA%204.0-9cf) -This repo holds the current version of the DCML annotation workflow which is pulled by all subcorpus repos upon push to their main branch. -Please note that the `meta_ corpora` branch should be used with collections of corpora. +This is a README file for a data repository originating from the [DCML corpus initiative](https://github.com/DCMLab/dcml_corpora) +and serves as welcome page for both + +* the GitHub repo [https://github.com/DCMLab/{{ repo_name }}](https://github.com/DCMLab/{{ repo_name }}) and the corresponding +* documentation page [https://dcmlab.github.io/{{ repo_name }}](https://dcmlab.github.io/{{ repo_name }}) + +For information on how to obtain and use the dataset, please refer to [this documentation page](https://dcmlab.github.io/{{ repo_name }}/introduction). + +# {{ pretty_repo_name }} + + +## Cite as + +## Version history + +See the [GitHub releases](https://github.com/DCMLab/{{ repo_name }}/releases). + +## License + +Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License ([CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)). + +## File naming convention \ No newline at end of file