diff --git a/.github/workflows/dali_tests.yml b/.github/workflows/dali_tests.yml index b0e6e8de8..b1b8103ef 100644 --- a/.github/workflows/dali_tests.yml +++ b/.github/workflows/dali_tests.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python: [3.7, 3.8, 3.9] + python: [3.8, 3.9] os: [ubuntu-latest] steps: diff --git a/.github/workflows/sphinx-linkcheck.yml b/.github/workflows/sphinx-linkcheck.yml index c703aba4c..995e1c13d 100644 --- a/.github/workflows/sphinx-linkcheck.yml +++ b/.github/workflows/sphinx-linkcheck.yml @@ -13,10 +13,10 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Set up Python 3.7 + - name: Set up Python 3.9 uses: actions/setup-python@v1 with: - python-version: 3.7 + python-version: 3.9 - name: python dependencies run: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9bc20365e..e0e2950b1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python: [3.7, 3.8, 3.9] + python: [3.8, 3.9] os: [ubuntu-latest, windows-latest] steps: diff --git a/docs/source/conf.py b/docs/source/conf.py index 733a40c7c..2679646c9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -52,7 +52,7 @@ def package_list_from_file(file): """List up package name (not containing version and extras) from a package list file""" mocked_packages = [] - with open(file, "r") as fp: + with open(file) as fp: for ln in fp.readlines(): # Example: `tqdm>=4.41.0` => `tqdm` # `[` is for package with extras diff --git a/docs/source/solo/methods/base.rst b/docs/source/solo/methods/base.rst index 94929df99..3b3464a1a 100644 --- a/docs/source/solo/methods/base.rst +++ b/docs/source/solo/methods/base.rst @@ -45,9 +45,9 @@ validation_step .. automethod:: solo.methods.base.BaseMethod.validation_step :noindex: -validation_epoch_end +on_validation_epoch_end ~~~~~~~~~~~~~~~~~~~~ -.. automethod:: solo.methods.base.BaseMethod.validation_epoch_end +.. automethod:: solo.methods.base.BaseMethod.on_validation_epoch_end :noindex: @@ -104,7 +104,7 @@ validation_step .. automethod:: solo.methods.base.BaseMethod.validation_step :noindex: -validation_epoch_end +on_validation_epoch_end ~~~~~~~~~~~~~~~~~~~~ -.. automethod:: solo.methods.base.BaseMethod.validation_epoch_end +.. automethod:: solo.methods.base.BaseMethod.on_validation_epoch_end :noindex: diff --git a/docs/source/solo/methods/linear.rst b/docs/source/solo/methods/linear.rst index 10f785b9d..cc17505cc 100644 --- a/docs/source/solo/methods/linear.rst +++ b/docs/source/solo/methods/linear.rst @@ -35,7 +35,7 @@ validation_step .. automethod:: solo.methods.linear.LinearModel.validation_step :noindex: -validation_epoch_end +on_validation_epoch_end ~~~~~~~~~~~~~~~~~~~~ -.. automethod:: solo.methods.linear.LinearModel.validation_epoch_end +.. automethod:: solo.methods.linear.LinearModel.on_validation_epoch_end :noindex: diff --git a/main_umap.py b/main_umap.py index a6607b665..aa61bd199 100644 --- a/main_umap.py +++ b/main_umap.py @@ -21,6 +21,8 @@ import os from pathlib import Path +from omegaconf import OmegaConf + from solo.args.umap import parse_args_umap from solo.data.classification_dataloader import prepare_data from solo.methods import METHODS @@ -38,15 +40,14 @@ def main(): # load arguments with open(args_path) as f: method_args = json.load(f) + cfg = OmegaConf.create(method_args) # build the model model = ( METHODS[method_args["method"]] - .load_from_checkpoint(ckpt_path, strict=False, **method_args) + .load_from_checkpoint(ckpt_path, strict=False, cfg=cfg) .backbone ) - model.cuda() - # prepare data train_loader, val_loader = prepare_data( args.dataset, @@ -55,7 +56,7 @@ def main(): data_format=args.data_format, batch_size=args.batch_size, num_workers=args.num_workers, - auto_augment=args.auto_augment, + auto_augment=False, ) umap = OfflineUMAP() diff --git a/requirements.txt b/requirements.txt index 55fe0ddce..bafca8262 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,8 @@ torch>=1.10.0 torchvision>=0.11.1 einops -pytorch-lightning>=1.7.0, <1.9.0 +pytorch-lightning==2.0.2 torchmetrics>=0.6.0, <0.12.0 -lightning-bolts>=0.6.0 tqdm wandb scipy diff --git a/scripts/finetune/imagenet-100/mae.yaml b/scripts/finetune/imagenet-100/mae.yaml index a51b72549..a8e2dfb50 100644 --- a/scripts/finetune/imagenet-100/mae.yaml +++ b/scripts/finetune/imagenet-100/mae.yaml @@ -49,4 +49,4 @@ devices: [0, 1, 2, 3, 4, 5, 6, 7] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/finetune/imagenet/mae.yaml b/scripts/finetune/imagenet/mae.yaml index f3c0453a5..fc821348a 100644 --- a/scripts/finetune/imagenet/mae.yaml +++ b/scripts/finetune/imagenet/mae.yaml @@ -49,4 +49,4 @@ devices: [0, 1, 2, 3, 4, 5, 6, 7] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet-100/barlow.yaml b/scripts/linear/imagenet-100/barlow.yaml index 534859bd4..f984833a0 100644 --- a/scripts/linear/imagenet-100/barlow.yaml +++ b/scripts/linear/imagenet-100/barlow.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet-100/byol.yaml b/scripts/linear/imagenet-100/byol.yaml index e167722ba..5fcfe9e3f 100644 --- a/scripts/linear/imagenet-100/byol.yaml +++ b/scripts/linear/imagenet-100/byol.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet-100/deepclusterv2.yaml b/scripts/linear/imagenet-100/deepclusterv2.yaml index 4d4061930..0c68b6d4e 100644 --- a/scripts/linear/imagenet-100/deepclusterv2.yaml +++ b/scripts/linear/imagenet-100/deepclusterv2.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet-100/dino.yaml b/scripts/linear/imagenet-100/dino.yaml index edacd281b..261da18f3 100644 --- a/scripts/linear/imagenet-100/dino.yaml +++ b/scripts/linear/imagenet-100/dino.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet-100/mocov2plus.yaml b/scripts/linear/imagenet-100/mocov2plus.yaml index 55d15a03c..6f08d9fc2 100644 --- a/scripts/linear/imagenet-100/mocov2plus.yaml +++ b/scripts/linear/imagenet-100/mocov2plus.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet-100/mocov3.yaml b/scripts/linear/imagenet-100/mocov3.yaml index 30beaf130..d13dbf2b3 100644 --- a/scripts/linear/imagenet-100/mocov3.yaml +++ b/scripts/linear/imagenet-100/mocov3.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet-100/mocov3_vit.yaml b/scripts/linear/imagenet-100/mocov3_vit.yaml index 92a298e81..58f79c2bb 100644 --- a/scripts/linear/imagenet-100/mocov3_vit.yaml +++ b/scripts/linear/imagenet-100/mocov3_vit.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet-100/nnclr.yaml b/scripts/linear/imagenet-100/nnclr.yaml index ac197f94f..c4e27a443 100644 --- a/scripts/linear/imagenet-100/nnclr.yaml +++ b/scripts/linear/imagenet-100/nnclr.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet-100/ressl.yaml b/scripts/linear/imagenet-100/ressl.yaml index e8e87d8bd..800811224 100644 --- a/scripts/linear/imagenet-100/ressl.yaml +++ b/scripts/linear/imagenet-100/ressl.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet-100/simclr.yaml b/scripts/linear/imagenet-100/simclr.yaml index 04e312fe0..a40694c8c 100644 --- a/scripts/linear/imagenet-100/simclr.yaml +++ b/scripts/linear/imagenet-100/simclr.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet-100/simsiam.yaml b/scripts/linear/imagenet-100/simsiam.yaml index b7d9ddae5..7ecd4b4ab 100644 --- a/scripts/linear/imagenet-100/simsiam.yaml +++ b/scripts/linear/imagenet-100/simsiam.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet-100/swav.yaml b/scripts/linear/imagenet-100/swav.yaml index f0155b52b..08e606ff6 100644 --- a/scripts/linear/imagenet-100/swav.yaml +++ b/scripts/linear/imagenet-100/swav.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet-100/vibcreg.yaml b/scripts/linear/imagenet-100/vibcreg.yaml index d4ad39f70..463d70fde 100644 --- a/scripts/linear/imagenet-100/vibcreg.yaml +++ b/scripts/linear/imagenet-100/vibcreg.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet-100/vicreg.yaml b/scripts/linear/imagenet-100/vicreg.yaml index 0d0150b22..253b5c74e 100644 --- a/scripts/linear/imagenet-100/vicreg.yaml +++ b/scripts/linear/imagenet-100/vicreg.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet/barlow.yaml b/scripts/linear/imagenet/barlow.yaml index 61d32abcd..0d0947d50 100644 --- a/scripts/linear/imagenet/barlow.yaml +++ b/scripts/linear/imagenet/barlow.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet/byol.yaml b/scripts/linear/imagenet/byol.yaml index 12aef3266..ba44afdee 100644 --- a/scripts/linear/imagenet/byol.yaml +++ b/scripts/linear/imagenet/byol.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/linear/imagenet/mocov2plus.yaml b/scripts/linear/imagenet/mocov2plus.yaml index 8b1a0ea8d..3cf52182d 100644 --- a/scripts/linear/imagenet/mocov2plus.yaml +++ b/scripts/linear/imagenet/mocov2plus.yaml @@ -42,4 +42,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar-multicrop/swav.yaml b/scripts/pretrain/cifar-multicrop/swav.yaml index 5a9c436c7..c36b7669f 100644 --- a/scripts/pretrain/cifar-multicrop/swav.yaml +++ b/scripts/pretrain/cifar-multicrop/swav.yaml @@ -55,4 +55,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/barlow.yaml b/scripts/pretrain/cifar/barlow.yaml index 86c1aa684..728f14ba7 100644 --- a/scripts/pretrain/cifar/barlow.yaml +++ b/scripts/pretrain/cifar/barlow.yaml @@ -50,4 +50,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/byol.yaml b/scripts/pretrain/cifar/byol.yaml index d3c163be9..eec69496f 100644 --- a/scripts/pretrain/cifar/byol.yaml +++ b/scripts/pretrain/cifar/byol.yaml @@ -53,4 +53,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/deepclusterv2.yaml b/scripts/pretrain/cifar/deepclusterv2.yaml index c3159f234..f8847859c 100644 --- a/scripts/pretrain/cifar/deepclusterv2.yaml +++ b/scripts/pretrain/cifar/deepclusterv2.yaml @@ -53,4 +53,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/dino.yaml b/scripts/pretrain/cifar/dino.yaml index 843cbb6c7..008e3abfe 100644 --- a/scripts/pretrain/cifar/dino.yaml +++ b/scripts/pretrain/cifar/dino.yaml @@ -53,4 +53,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/mae.yaml b/scripts/pretrain/cifar/mae.yaml index 939c40fcd..0d8f8bad7 100644 --- a/scripts/pretrain/cifar/mae.yaml +++ b/scripts/pretrain/cifar/mae.yaml @@ -53,4 +53,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/mocov2plus.yaml b/scripts/pretrain/cifar/mocov2plus.yaml index 6e3b137bc..8c990b196 100644 --- a/scripts/pretrain/cifar/mocov2plus.yaml +++ b/scripts/pretrain/cifar/mocov2plus.yaml @@ -50,4 +50,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/mocov3.yaml b/scripts/pretrain/cifar/mocov3.yaml index a23b8c1e5..9eccbd2d5 100644 --- a/scripts/pretrain/cifar/mocov3.yaml +++ b/scripts/pretrain/cifar/mocov3.yaml @@ -54,4 +54,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/nnbyol.yaml b/scripts/pretrain/cifar/nnbyol.yaml index 331101099..5cec47bac 100644 --- a/scripts/pretrain/cifar/nnbyol.yaml +++ b/scripts/pretrain/cifar/nnbyol.yaml @@ -54,4 +54,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/nnclr.yaml b/scripts/pretrain/cifar/nnclr.yaml index d10f35278..2786f365e 100644 --- a/scripts/pretrain/cifar/nnclr.yaml +++ b/scripts/pretrain/cifar/nnclr.yaml @@ -52,4 +52,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/nnsiam.yaml b/scripts/pretrain/cifar/nnsiam.yaml index 9d1102c69..3d611e7ac 100644 --- a/scripts/pretrain/cifar/nnsiam.yaml +++ b/scripts/pretrain/cifar/nnsiam.yaml @@ -50,4 +50,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/ressl.yaml b/scripts/pretrain/cifar/ressl.yaml index 1bc44a70c..7272f622f 100644 --- a/scripts/pretrain/cifar/ressl.yaml +++ b/scripts/pretrain/cifar/ressl.yaml @@ -53,4 +53,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/simclr.yaml b/scripts/pretrain/cifar/simclr.yaml index 6902362d4..0531365a7 100644 --- a/scripts/pretrain/cifar/simclr.yaml +++ b/scripts/pretrain/cifar/simclr.yaml @@ -50,4 +50,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/simsiam.yaml b/scripts/pretrain/cifar/simsiam.yaml index bc9874a4f..dec94d430 100644 --- a/scripts/pretrain/cifar/simsiam.yaml +++ b/scripts/pretrain/cifar/simsiam.yaml @@ -47,4 +47,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/supcon.yaml b/scripts/pretrain/cifar/supcon.yaml index 392069de7..365317b85 100644 --- a/scripts/pretrain/cifar/supcon.yaml +++ b/scripts/pretrain/cifar/supcon.yaml @@ -46,4 +46,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/swav.yaml b/scripts/pretrain/cifar/swav.yaml index 14f71dd14..01d6c431b 100644 --- a/scripts/pretrain/cifar/swav.yaml +++ b/scripts/pretrain/cifar/swav.yaml @@ -54,4 +54,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/vibcreg.yaml b/scripts/pretrain/cifar/vibcreg.yaml index a8deb2cc5..ebc2404f9 100644 --- a/scripts/pretrain/cifar/vibcreg.yaml +++ b/scripts/pretrain/cifar/vibcreg.yaml @@ -74,4 +74,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/vicreg.yaml b/scripts/pretrain/cifar/vicreg.yaml index 4f04c7097..0a8db3111 100644 --- a/scripts/pretrain/cifar/vicreg.yaml +++ b/scripts/pretrain/cifar/vicreg.yaml @@ -80,4 +80,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/cifar/wmse.yaml b/scripts/pretrain/cifar/wmse.yaml index 76d6f6529..7b77e45ec 100644 --- a/scripts/pretrain/cifar/wmse.yaml +++ b/scripts/pretrain/cifar/wmse.yaml @@ -70,4 +70,4 @@ devices: [0] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/custom/byol.yaml b/scripts/pretrain/custom/byol.yaml index 943ee52b9..517dcb4a7 100644 --- a/scripts/pretrain/custom/byol.yaml +++ b/scripts/pretrain/custom/byol.yaml @@ -60,4 +60,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100-multicrop/byol.yaml b/scripts/pretrain/imagenet-100-multicrop/byol.yaml index 345647d39..f17ede003 100644 --- a/scripts/pretrain/imagenet-100-multicrop/byol.yaml +++ b/scripts/pretrain/imagenet-100-multicrop/byol.yaml @@ -53,4 +53,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100-multicrop/simclr.yaml b/scripts/pretrain/imagenet-100-multicrop/simclr.yaml index 082bb6666..cbd804bdf 100644 --- a/scripts/pretrain/imagenet-100-multicrop/simclr.yaml +++ b/scripts/pretrain/imagenet-100-multicrop/simclr.yaml @@ -50,4 +50,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100-multicrop/supcon.yaml b/scripts/pretrain/imagenet-100-multicrop/supcon.yaml index e6b44e8cb..5de0a77c4 100644 --- a/scripts/pretrain/imagenet-100-multicrop/supcon.yaml +++ b/scripts/pretrain/imagenet-100-multicrop/supcon.yaml @@ -46,4 +46,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/barlow.yaml b/scripts/pretrain/imagenet-100/barlow.yaml index 6b56eb81c..ddd2da670 100644 --- a/scripts/pretrain/imagenet-100/barlow.yaml +++ b/scripts/pretrain/imagenet-100/barlow.yaml @@ -50,4 +50,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/byol.yaml b/scripts/pretrain/imagenet-100/byol.yaml index c4a0170bc..35cd7d560 100644 --- a/scripts/pretrain/imagenet-100/byol.yaml +++ b/scripts/pretrain/imagenet-100/byol.yaml @@ -53,4 +53,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/deepclusterv2.yaml b/scripts/pretrain/imagenet-100/deepclusterv2.yaml index 673bd8a96..f6c023f0d 100644 --- a/scripts/pretrain/imagenet-100/deepclusterv2.yaml +++ b/scripts/pretrain/imagenet-100/deepclusterv2.yaml @@ -55,4 +55,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/dino.yaml b/scripts/pretrain/imagenet-100/dino.yaml index b38fbd75a..1129e125c 100644 --- a/scripts/pretrain/imagenet-100/dino.yaml +++ b/scripts/pretrain/imagenet-100/dino.yaml @@ -54,4 +54,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/dino_vit.yaml b/scripts/pretrain/imagenet-100/dino_vit.yaml index 2d70d023b..89ff43b6e 100644 --- a/scripts/pretrain/imagenet-100/dino_vit.yaml +++ b/scripts/pretrain/imagenet-100/dino_vit.yaml @@ -51,4 +51,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/mae.yaml b/scripts/pretrain/imagenet-100/mae.yaml index bab22bcdb..7366cd647 100644 --- a/scripts/pretrain/imagenet-100/mae.yaml +++ b/scripts/pretrain/imagenet-100/mae.yaml @@ -51,4 +51,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/mocov2plus.yaml b/scripts/pretrain/imagenet-100/mocov2plus.yaml index a097a9ddb..afbe0b4cf 100644 --- a/scripts/pretrain/imagenet-100/mocov2plus.yaml +++ b/scripts/pretrain/imagenet-100/mocov2plus.yaml @@ -50,4 +50,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/mocov3.yaml b/scripts/pretrain/imagenet-100/mocov3.yaml index fc44e8808..df5d4def3 100644 --- a/scripts/pretrain/imagenet-100/mocov3.yaml +++ b/scripts/pretrain/imagenet-100/mocov3.yaml @@ -54,4 +54,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/mocov3_vit.yaml b/scripts/pretrain/imagenet-100/mocov3_vit.yaml index 1eda53764..af942c546 100644 --- a/scripts/pretrain/imagenet-100/mocov3_vit.yaml +++ b/scripts/pretrain/imagenet-100/mocov3_vit.yaml @@ -50,4 +50,4 @@ devices: [0, 1, 2, 3, 4, 5, 6, 7] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/nnclr.yaml b/scripts/pretrain/imagenet-100/nnclr.yaml index 08ec68063..422b7beca 100644 --- a/scripts/pretrain/imagenet-100/nnclr.yaml +++ b/scripts/pretrain/imagenet-100/nnclr.yaml @@ -52,4 +52,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/ressl.yaml b/scripts/pretrain/imagenet-100/ressl.yaml index 1e7402df0..70416d60e 100644 --- a/scripts/pretrain/imagenet-100/ressl.yaml +++ b/scripts/pretrain/imagenet-100/ressl.yaml @@ -53,4 +53,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/simclr.yaml b/scripts/pretrain/imagenet-100/simclr.yaml index 478851c14..8a07198f8 100644 --- a/scripts/pretrain/imagenet-100/simclr.yaml +++ b/scripts/pretrain/imagenet-100/simclr.yaml @@ -50,4 +50,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/simsiam.yaml b/scripts/pretrain/imagenet-100/simsiam.yaml index 06c27a740..dab8055b7 100644 --- a/scripts/pretrain/imagenet-100/simsiam.yaml +++ b/scripts/pretrain/imagenet-100/simsiam.yaml @@ -48,4 +48,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/supcon.yaml b/scripts/pretrain/imagenet-100/supcon.yaml index a101c56db..0b91b8815 100644 --- a/scripts/pretrain/imagenet-100/supcon.yaml +++ b/scripts/pretrain/imagenet-100/supcon.yaml @@ -46,4 +46,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/swav.yaml b/scripts/pretrain/imagenet-100/swav.yaml index 17aae7878..1833f54a2 100644 --- a/scripts/pretrain/imagenet-100/swav.yaml +++ b/scripts/pretrain/imagenet-100/swav.yaml @@ -54,4 +54,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/vibcreg.yaml b/scripts/pretrain/imagenet-100/vibcreg.yaml index 7f343983f..ba9c89100 100644 --- a/scripts/pretrain/imagenet-100/vibcreg.yaml +++ b/scripts/pretrain/imagenet-100/vibcreg.yaml @@ -53,4 +53,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/vicreg.yaml b/scripts/pretrain/imagenet-100/vicreg.yaml index 7263b5b4c..68e817fb2 100644 --- a/scripts/pretrain/imagenet-100/vicreg.yaml +++ b/scripts/pretrain/imagenet-100/vicreg.yaml @@ -81,4 +81,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet-100/wmse.yaml b/scripts/pretrain/imagenet-100/wmse.yaml index 80713b5eb..3e1747842 100644 --- a/scripts/pretrain/imagenet-100/wmse.yaml +++ b/scripts/pretrain/imagenet-100/wmse.yaml @@ -47,4 +47,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet/barlow.yaml b/scripts/pretrain/imagenet/barlow.yaml index 5ef3d958a..e799282bf 100644 --- a/scripts/pretrain/imagenet/barlow.yaml +++ b/scripts/pretrain/imagenet/barlow.yaml @@ -51,4 +51,4 @@ devices: [0, 1, 2, 3] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet/byol.yaml b/scripts/pretrain/imagenet/byol.yaml index 2636108a8..23a1069c4 100644 --- a/scripts/pretrain/imagenet/byol.yaml +++ b/scripts/pretrain/imagenet/byol.yaml @@ -53,5 +53,5 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed accumulate_grad_batches: 16 diff --git a/scripts/pretrain/imagenet/mae.yaml b/scripts/pretrain/imagenet/mae.yaml index b9e327c8d..96886368c 100644 --- a/scripts/pretrain/imagenet/mae.yaml +++ b/scripts/pretrain/imagenet/mae.yaml @@ -54,4 +54,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/scripts/pretrain/imagenet/mocov2plus.yaml b/scripts/pretrain/imagenet/mocov2plus.yaml index d86043de3..0fabde502 100644 --- a/scripts/pretrain/imagenet/mocov2plus.yaml +++ b/scripts/pretrain/imagenet/mocov2plus.yaml @@ -50,4 +50,4 @@ devices: [0, 1] sync_batchnorm: True accelerator: "gpu" strategy: "ddp" -precision: 16 +precision: 16-mixed diff --git a/solo/args/linear.py b/solo/args/linear.py index 78e8f8650..f89c60091 100644 --- a/solo/args/linear.py +++ b/solo/args/linear.py @@ -1,5 +1,4 @@ import os -from multiprocessing.managers import BaseManager import omegaconf from omegaconf import OmegaConf @@ -158,7 +157,7 @@ def parse_cfg(cfg: omegaconf.DictConfig): # even if the custom dataset doesn't have any labels cfg.data.num_classes = max( 1, - len([entry.name for entry in os.scandir(cfg.data.train_path) if entry.is_dir]), + sum(entry.is_dir() for entry in os.scandir(cfg.data.train_path)), ) if cfg.data.format == "dali": diff --git a/solo/args/pretrain.py b/solo/args/pretrain.py index 6c16d8028..36b5ff6db 100644 --- a/solo/args/pretrain.py +++ b/solo/args/pretrain.py @@ -124,7 +124,7 @@ def parse_cfg(cfg: omegaconf.DictConfig): # even if the custom dataset doesn't have any labels cfg.data.num_classes = max( 1, - len([entry.name for entry in os.scandir(cfg.data.train_path) if entry.is_dir]), + sum(entry.is_dir() for entry in os.scandir(cfg.data.train_path)), ) # find number of big/small crops diff --git a/solo/backbones/poolformer/poolformer.py b/solo/backbones/poolformer/poolformer.py index e558b75e6..b93f772c4 100644 --- a/solo/backbones/poolformer/poolformer.py +++ b/solo/backbones/poolformer/poolformer.py @@ -25,8 +25,7 @@ import torch import torch.nn as nn from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD -from timm.models.layers import DropPath, trunc_normal_ -from timm.models.layers.helpers import to_2tuple +from timm.models.layers import DropPath, trunc_normal_, to_2tuple from timm.models.registry import register_model @@ -197,10 +196,10 @@ def __init__( self.use_layer_scale = use_layer_scale if use_layer_scale: self.layer_scale_1 = nn.Parameter( - layer_scale_init_value * torch.ones((dim)), requires_grad=True + layer_scale_init_value * torch.ones(dim), requires_grad=True ) self.layer_scale_2 = nn.Parameter( - layer_scale_init_value * torch.ones((dim)), requires_grad=True + layer_scale_init_value * torch.ones(dim), requires_grad=True ) def forward(self, x): diff --git a/solo/backbones/wide_resnet/wide_resnet.py b/solo/backbones/wide_resnet/wide_resnet.py index 5c4214f4d..86839ad45 100644 --- a/solo/backbones/wide_resnet/wide_resnet.py +++ b/solo/backbones/wide_resnet/wide_resnet.py @@ -30,7 +30,7 @@ class WideResnetBasicBlock(nn.Module): def __init__( self, in_planes, out_planes, stride, drop_rate=0.0, activate_before_residual=False ): - super(WideResnetBasicBlock, self).__init__() + super().__init__() self.bn1 = nn.BatchNorm2d(in_planes, momentum=0.001, eps=0.001) self.relu1 = nn.LeakyReLU(negative_slope=0.1, inplace=False) self.conv1 = nn.Conv2d( @@ -73,7 +73,7 @@ def __init__( drop_rate=0.0, activate_before_residual=False, ): - super(WideResnetNetworkBlock, self).__init__() + super().__init__() self.layer = self._make_layer( block, in_planes, out_planes, nb_layers, stride, drop_rate, activate_before_residual ) @@ -100,7 +100,7 @@ def forward(self, x): class WideResNet(nn.Module): def __init__(self, first_stride=1, depth=28, widen_factor=2, drop_rate=0.0, **kwargs): - super(WideResNet, self).__init__() + super().__init__() channels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor] self.num_features = channels[-1] assert (depth - 4) % 6 == 0 diff --git a/solo/data/h5_dataset.py b/solo/data/h5_dataset.py index 46eb0a371..8aaf3236d 100644 --- a/solo/data/h5_dataset.py +++ b/solo/data/h5_dataset.py @@ -66,7 +66,7 @@ def __init__( if dataset == "imagenet100": script_folder = Path(os.path.dirname(__file__)) classes_file = script_folder / "dataset_subset" / "imagenet100_classes.txt" - with open(classes_file, "r") as f: + with open(classes_file) as f: self.classes = f.readline().strip().split() self.classes = sorted(self.classes) self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)} @@ -107,7 +107,7 @@ def _load_h5_data_info(self): f.write(f"{class_name}/{img_name} {y}\n") else: # load data info file that was already generated by previous runs - with open(h5_data_info_file, "r") as f: + with open(h5_data_info_file) as f: for line in f: class_name_img, y = line.strip().split(" ") class_name, img_name = class_name_img.split("/") diff --git a/solo/data/pretrain_dataloader.py b/solo/data/pretrain_dataloader.py index faf1fc76d..96cb7d030 100644 --- a/solo/data/pretrain_dataloader.py +++ b/solo/data/pretrain_dataloader.py @@ -172,7 +172,7 @@ def __call__(self, x: Image) -> List[torch.Tensor]: return out def __repr__(self) -> str: - return "\n".join([str(transform) for transform in self.transforms]) + return "\n".join(str(transform) for transform in self.transforms) def build_transform_pipeline(dataset, cfg): diff --git a/solo/methods/base.py b/solo/methods/base.py index 362424fe3..93b66509f 100644 --- a/solo/methods/base.py +++ b/solo/methods/base.py @@ -26,7 +26,6 @@ import torch import torch.nn as nn import torch.nn.functional as F -from pl_bolts.optimizers.lr_scheduler import LinearWarmupCosineAnnealingLR from solo.backbones import ( convnext_base, convnext_large, @@ -52,6 +51,7 @@ ) from solo.utils.knn import WeightedKNNClassifier from solo.utils.lars import LARS +from solo.utils.lr_scheduler import LinearWarmupCosineAnnealingLR from solo.utils.metrics import accuracy_at_k, weighted_mean from solo.utils.misc import omegaconf_select, remove_bias_and_norm_from_weight_decay from solo.utils.momentum import MomentumUpdater, initialize_momentum_params @@ -142,8 +142,8 @@ def __init__(self, cfg: omegaconf.DictConfig): warmup_start_lr (float): initial learning rate for warmup scheduler. Defaults to 0.00003. warmup_epochs (float): number of warmup epochs. Defaults to 10. - lr_decay_steps (Sequence, optional): steps to decay the learning rate if scheduler is - step. Defaults to None. + lr_decay_steps (Sequence, optional): steps to decay the learning rate if + scheduler is step. Defaults to None. interval (str): interval to update the lr scheduler. Defaults to 'step'. knn_eval: enabled (bool): enables online knn evaluation while training. @@ -179,7 +179,8 @@ def __init__(self, cfg: omegaconf.DictConfig): self.cfg: omegaconf.DictConfig = cfg - ########## Backbone ########## + ############################## + # Backbone self.backbone_args: Dict[str, Any] = cfg.backbone.kwargs assert cfg.backbone.name in BaseMethod._BACKBONES self.base_model: Callable = self._BACKBONES[cfg.backbone.name] @@ -257,6 +258,9 @@ def __init__(self, cfg: omegaconf.DictConfig): # for performance self.no_channel_last = cfg.performance.disable_channel_last + # keep track of validation metrics + self.validation_step_outputs = [] + @staticmethod def add_and_assert_specific_cfg(cfg: omegaconf.DictConfig) -> omegaconf.DictConfig: """Adds method specific default values/checks for config. @@ -279,7 +283,7 @@ def add_and_assert_specific_cfg(cfg: omegaconf.DictConfig) -> omegaconf.DictConf cfg.optimizer.kwargs = omegaconf_select(cfg, "optimizer.kwargs", {}) # default for acc grad batches - cfg.accumulate_grad_batches = omegaconf_select(cfg, "accumulate_grad_batches", None) + cfg.accumulate_grad_batches = omegaconf_select(cfg, "accumulate_grad_batches", 1) # default parameters for the scheduler cfg.scheduler.lr_decay_steps = omegaconf_select(cfg, "scheduler.lr_decay_steps", None) @@ -397,14 +401,14 @@ def configure_optimizers(self) -> Tuple[List, List]: return [optimizer], [scheduler] - def optimizer_zero_grad(self, epoch, batch_idx, optimizer, optimizer_idx): + def optimizer_zero_grad(self, epoch, batch_idx, optimizer): """ This improves performance marginally. It should be fine since we are not affected by any of the downsides descrited in https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html#torch.optim.Optimizer.zero_grad Implemented as in here - https://pytorch-lightning.readthedocs.io/en/1.5.10/guides/speed.html#set-grads-to-none + https://lightning.ai/docs/pytorch/latest/advanced/speed.html?highlight=set%20grads%20none """ try: optimizer.zero_grad(set_to_none=True) @@ -551,7 +555,11 @@ def base_validation_step(self, X: torch.Tensor, targets: torch.Tensor) -> Dict: return self._base_shared_step(X, targets) def validation_step( - self, batch: List[torch.Tensor], batch_idx: int, dataloader_idx: int = None + self, + batch: List[torch.Tensor], + batch_idx: int, + dataloader_idx: int = None, + update_validation_step_outputs: bool = True, ) -> Dict[str, Any]: """Validation step for pytorch lightning. It does all the shared operations, such as forwarding a batch of images, computing logits and computing metrics. @@ -559,6 +567,8 @@ def validation_step( Args: batch (List[torch.Tensor]):a batch of data in the format of [img_indexes, X, Y]. batch_idx (int): index of the batch. + update_validation_step_outputs (bool): whether or not to append the + metrics to validation_step_outputs Returns: Dict[str, Any]: dict with the batch_size (used for averaging), the classification loss @@ -579,20 +589,19 @@ def validation_step( "val_acc1": out["acc1"], "val_acc5": out["acc5"], } + if update_validation_step_outputs: + self.validation_step_outputs.append(metrics) return metrics - def validation_epoch_end(self, outs: List[Dict[str, Any]]): + def on_validation_epoch_end(self): """Averages the losses and accuracies of all the validation batches. This is needed because the last batch can be smaller than the others, slightly skewing the metrics. - - Args: - outs (List[Dict[str, Any]]): list of outputs of the validation step. """ - val_loss = weighted_mean(outs, "val_loss", "batch_size") - val_acc1 = weighted_mean(outs, "val_acc1", "batch_size") - val_acc5 = weighted_mean(outs, "val_acc5", "batch_size") + val_loss = weighted_mean(self.validation_step_outputs, "val_loss", "batch_size") + val_acc1 = weighted_mean(self.validation_step_outputs, "val_acc1", "batch_size") + val_acc5 = weighted_mean(self.validation_step_outputs, "val_acc5", "batch_size") log = {"val_loss": val_loss, "val_acc1": val_acc1, "val_acc5": val_acc5} @@ -618,7 +627,8 @@ def __init__( momentum: base_tau (float): base value of the weighting decrease coefficient in [0,1]. final_tau (float): final value of the weighting decrease coefficient in [0,1]. - classifier (bool): whether or not to train a classifier on top of the momentum backbone. + classifier (bool): whether or not to train a classifier on top of the + momentum backbone. """ super().__init__(cfg) @@ -824,56 +834,78 @@ def on_train_batch_end(self, outputs: Dict[str, Any], batch: Sequence[Any], batc self.last_step = self.trainer.global_step def validation_step( - self, batch: List[torch.Tensor], batch_idx: int, dataloader_idx: int = None + self, + batch: List[torch.Tensor], + batch_idx: int, + dataloader_idx: int = None, + update_validation_step_outputs: bool = True, ) -> Tuple[Dict[str, Any], Dict[str, Any]]: """Validation step for pytorch lightning. It performs all the shared operations for the momentum backbone and classifier, such as forwarding a batch of images in the momentum backbone and classifier and computing statistics. + Args: batch (List[torch.Tensor]): a batch of data in the format of [X, Y]. batch_idx (int): index of the batch. + update_validation_step_outputs (bool): whether or not to append the + metrics to validation_step_outputs + Returns: Tuple(Dict[str, Any], Dict[str, Any]): tuple of dicts containing the batch_size (used for averaging), the classification loss and accuracies for both the online and the momentum classifiers. """ - parent_metrics = super().validation_step(batch, batch_idx) + metrics = super().validation_step(batch, batch_idx, update_validation_step_outputs=False) X, targets = batch - batch_size = targets.size(0) out = self._shared_step_momentum(X, targets) - metrics = None if self.momentum_classifier is not None: - metrics = { - "batch_size": batch_size, - "momentum_val_loss": out["loss"], - "momentum_val_acc1": out["acc1"], - "momentum_val_acc5": out["acc5"], - } + metrics.update( + { + "momentum_val_loss": out["loss"], + "momentum_val_acc1": out["acc1"], + "momentum_val_acc5": out["acc5"], + } + ) - return parent_metrics, metrics + if update_validation_step_outputs: + self.validation_step_outputs.append(metrics) - def validation_epoch_end(self, outs: Tuple[List[Dict[str, Any]]]): + return metrics + + def on_validation_epoch_end(self): """Averages the losses and accuracies of the momentum backbone / classifier for all the validation batches. This is needed because the last batch can be smaller than the others, slightly skewing the metrics. - Args: - outs (Tuple[List[Dict[str, Any]]]):): list of outputs of the validation step for self - and the parent. """ - parent_outs = [out[0] for out in outs] - super().validation_epoch_end(parent_outs) + # base method metrics + val_loss = weighted_mean(self.validation_step_outputs, "val_loss", "batch_size") + val_acc1 = weighted_mean(self.validation_step_outputs, "val_acc1", "batch_size") + val_acc5 = weighted_mean(self.validation_step_outputs, "val_acc5", "batch_size") - if self.momentum_classifier is not None: - momentum_outs = [out[1] for out in outs] + log = {"val_loss": val_loss, "val_acc1": val_acc1, "val_acc5": val_acc5} + + if self.knn_eval and not self.trainer.sanity_checking: + val_knn_acc1, val_knn_acc5 = self.knn.compute() + log.update({"val_knn_acc1": val_knn_acc1, "val_knn_acc5": val_knn_acc5}) + + self.log_dict(log, sync_dist=True) - val_loss = weighted_mean(momentum_outs, "momentum_val_loss", "batch_size") - val_acc1 = weighted_mean(momentum_outs, "momentum_val_acc1", "batch_size") - val_acc5 = weighted_mean(momentum_outs, "momentum_val_acc5", "batch_size") + # momentum method metrics + if self.momentum_classifier is not None: + val_loss = weighted_mean( + self.validation_step_outputs, "momentum_val_loss", "batch_size" + ) + val_acc1 = weighted_mean( + self.validation_step_outputs, "momentum_val_acc1", "batch_size" + ) + val_acc5 = weighted_mean( + self.validation_step_outputs, "momentum_val_acc5", "batch_size" + ) log = { "momentum_val_loss": val_loss, diff --git a/solo/methods/linear.py b/solo/methods/linear.py index f67cf31bf..d84cc6e4b 100644 --- a/solo/methods/linear.py +++ b/solo/methods/linear.py @@ -19,13 +19,14 @@ import logging from typing import Any, Callable, Dict, List, Tuple, Union + import omegaconf import pytorch_lightning as pl import torch import torch.nn as nn import torch.nn.functional as F -from pl_bolts.optimizers.lr_scheduler import LinearWarmupCosineAnnealingLR from solo.utils.lars import LARS +from solo.utils.lr_scheduler import LinearWarmupCosineAnnealingLR from solo.utils.metrics import accuracy_at_k, weighted_mean from solo.utils.misc import ( omegaconf_select, @@ -79,8 +80,8 @@ def __init__( warmup_start_lr (float): initial learning rate for warmup scheduler. Defaults to 0.00003. warmup_epochs (float): number of warmup epochs. Defaults to 10. - lr_decay_steps (Sequence, optional): steps to decay the learning rate if scheduler is - step. Defaults to None. + lr_decay_steps (Sequence, optional): steps to decay the learning rate + if scheduler is step. Defaults to None. interval (str): interval to update the lr scheduler. Defaults to 'step'. finetune (bool): whether or not to finetune the backbone. Defaults to False. @@ -90,9 +91,9 @@ def __init__( speeds up training considerably. Defaults to False. https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html#converting-existing-models - loss_func (Callable): loss function to use (for mixup, label smoothing or default). Defaults to None - mixup_func (Callable, optional). function to convert data and targets with mixup/cutmix. - Defaults to None. + loss_func (Callable): loss function to use (for mixup, label smoothing or default). + Defaults to None mixup_func (Callable, optional). function to convert data and targets + with mixup/cutmix. Defaults to None. """ super().__init__() @@ -154,6 +155,9 @@ def __init__( for param in self.backbone.parameters(): param.requires_grad = False + # keep track of validation metrics + self.validation_step_outputs = [] + @staticmethod def add_and_assert_specific_cfg(cfg: omegaconf.DictConfig) -> omegaconf.DictConfig: """Adds method specific default values/checks for config. @@ -177,7 +181,7 @@ def add_and_assert_specific_cfg(cfg: omegaconf.DictConfig) -> omegaconf.DictConf cfg.finetune = omegaconf_select(cfg, "finetune", False) # default for acc grad batches - cfg.accumulate_grad_batches = omegaconf_select(cfg, "accumulate_grad_batches", None) + cfg.accumulate_grad_batches = omegaconf_select(cfg, "accumulate_grad_batches", 1) # default parameters for the scheduler cfg.scheduler.lr_decay_steps = omegaconf_select(cfg, "scheduler.lr_decay_steps", None) @@ -203,7 +207,10 @@ def configure_optimizers(self) -> Tuple[List, List]: if self.layer_decay > 0: assert self.finetune, "Only with use layer weight decay with finetune on." - msg = "Method should implement no_weight_decay() that returns a set of parameter names to ignore from weight decay" + msg = ( + "Method should implement no_weight_decay() that returns " + "a set of parameter names to ignore from weight decay" + ) assert hasattr(self.backbone, "no_weight_decay"), msg learnable_params = param_groups_layer_decay( @@ -364,26 +371,25 @@ def validation_step(self, batch: torch.Tensor, batch_idx: int) -> Dict[str, Any] out = self.shared_step(batch, batch_idx) - results = { + metrics = { "batch_size": out["batch_size"], "val_loss": out["loss"], "val_acc1": out["acc1"], "val_acc5": out["acc5"], } - return results + self.validation_step_outputs.append(metrics) + return metrics - def validation_epoch_end(self, outs: List[Dict[str, Any]]): + def on_validation_epoch_end(self): """Averages the losses and accuracies of all the validation batches. This is needed because the last batch can be smaller than the others, slightly skewing the metrics. - - Args: - outs (List[Dict[str, Any]]): list of outputs of the validation step. """ - val_loss = weighted_mean(outs, "val_loss", "batch_size") - val_acc1 = weighted_mean(outs, "val_acc1", "batch_size") - val_acc5 = weighted_mean(outs, "val_acc5", "batch_size") + val_loss = weighted_mean(self.validation_step_outputs, "val_loss", "batch_size") + val_acc1 = weighted_mean(self.validation_step_outputs, "val_acc1", "batch_size") + val_acc5 = weighted_mean(self.validation_step_outputs, "val_acc5", "batch_size") + self.validation_step_outputs.clear() log = {"val_loss": val_loss, "val_acc1": val_acc1, "val_acc5": val_acc5} self.log_dict(log, sync_dist=True) diff --git a/solo/utils/auto_umap.py b/solo/utils/auto_umap.py index c926632ec..7c3971870 100644 --- a/solo/utils/auto_umap.py +++ b/solo/utils/auto_umap.py @@ -90,8 +90,8 @@ def add_and_assert_specific_cfg(cfg: DictConfig) -> DictConfig: @staticmethod def random_string(letter_count=4, digit_count=4): tmp_random = random.Random(time.time()) - rand_str = "".join((tmp_random.choice(string.ascii_lowercase) for x in range(letter_count))) - rand_str += "".join((tmp_random.choice(string.digits) for x in range(digit_count))) + rand_str = "".join(tmp_random.choice(string.ascii_lowercase) for _ in range(letter_count)) + rand_str += "".join(tmp_random.choice(string.digits) for _ in range(digit_count)) rand_str = list(rand_str) tmp_random.shuffle(rand_str) return "".join(rand_str) @@ -150,7 +150,10 @@ def plot(self, trainer: pl.Trainer, module: pl.LightningModule): # set module to eval model and collect all feature representations module.eval() with torch.no_grad(): - for x, y in trainer.val_dataloaders[0]: + val_dataloader = trainer.val_dataloaders + if isinstance(val_dataloader, list): + val_dataloader = val_dataloader[0] + for x, y in val_dataloader: x = x.to(device, non_blocking=True) y = y.to(device, non_blocking=True) diff --git a/solo/utils/checkpointer.py b/solo/utils/checkpointer.py index 311f32393..14ded7bd0 100644 --- a/solo/utils/checkpointer.py +++ b/solo/utils/checkpointer.py @@ -79,8 +79,8 @@ def add_and_assert_specific_cfg(cfg: DictConfig) -> DictConfig: @staticmethod def random_string(letter_count=4, digit_count=4): tmp_random = random.Random(time.time()) - rand_str = "".join((tmp_random.choice(string.ascii_lowercase) for _ in range(letter_count))) - rand_str += "".join((tmp_random.choice(string.digits) for _ in range(digit_count))) + rand_str = "".join(tmp_random.choice(string.ascii_lowercase) for _ in range(letter_count)) + rand_str += "".join(tmp_random.choice(string.digits) for _ in range(digit_count)) rand_str = list(rand_str) tmp_random.shuffle(rand_str) return "".join(rand_str) diff --git a/solo/utils/lr_scheduler.py b/solo/utils/lr_scheduler.py new file mode 100644 index 000000000..38c2c4489 --- /dev/null +++ b/solo/utils/lr_scheduler.py @@ -0,0 +1,149 @@ +# Copied from Pytorch Lightning Bolts +# https://github.com/Lightning-Universe/lightning-bolts/blob/master/src/pl_bolts/optimizers/lr_scheduler.py +# To avoid a dependency + + +import math +import warnings +from typing import List + +from torch.optim import Optimizer +from torch.optim.lr_scheduler import _LRScheduler + + +class LinearWarmupCosineAnnealingLR(_LRScheduler): + """Sets the learning rate of each parameter group to follow a linear warmup schedule + between warmup_start_lr and base_lr followed by a cosine annealing schedule + between base_lr and eta_min. + + .. warning:: + It is recommended to call :func:`.step()` for :class:`LinearWarmupCosineAnnealingLR` + after each iteration as calling it after each epoch will keep the starting lr at + warmup_start_lr for the first epoch which is 0 in most cases. + + .. warning:: + passing epoch to :func:`.step()` is being deprecated and comes with an + EPOCH_DEPRECATION_WARNING. It calls the :func:`_get_closed_form_lr()` + method for this scheduler instead of :func:`get_lr()`. Though this does not + change the behavior of the scheduler, when passing epoch param to :func:`.step()`, + the user should call the :func:`.step()` function before calling + train and validation methods. + + Example: + >>> import torch.nn as nn + >>> from torch.optim import Adam + >>> # + >>> layer = nn.Linear(10, 1) + >>> optimizer = Adam(layer.parameters(), lr=0.02) + >>> scheduler = LinearWarmupCosineAnnealingLR(optimizer, warmup_epochs=10, max_epochs=40) + >>> # the default case + >>> for epoch in range(40): + ... # train(...) + ... # validate(...) + ... scheduler.step() + >>> # passing epoch param case + >>> for epoch in range(40): + ... scheduler.step(epoch) + ... # train(...) + ... # validate(...) + """ + + def __init__( + self, + optimizer: Optimizer, + warmup_epochs: int, + max_epochs: int, + warmup_start_lr: float = 0.0, + eta_min: float = 0.0, + last_epoch: int = -1, + ) -> None: + """ + Args: + optimizer (Optimizer): Wrapped optimizer. + warmup_epochs (int): Maximum number of iterations for linear warmup + max_epochs (int): Maximum number of iterations + warmup_start_lr (float): Learning rate to start the linear warmup. Default: 0. + eta_min (float): Minimum learning rate. Default: 0. + last_epoch (int): The index of last epoch. Default: -1. + """ + self.warmup_epochs = warmup_epochs + self.max_epochs = max_epochs + self.warmup_start_lr = warmup_start_lr + self.eta_min = eta_min + + super().__init__(optimizer, last_epoch) + + def get_lr(self) -> List[float]: + """Compute learning rate using chainable form of the scheduler.""" + if not self._get_lr_called_within_step: + warnings.warn( + "To get the last learning rate computed by the scheduler, " + "please use `get_last_lr()`.", + UserWarning, + ) + + if self.last_epoch == 0: + return [self.warmup_start_lr] * len(self.base_lrs) + if self.last_epoch < self.warmup_epochs: + return [ + group["lr"] + (base_lr - self.warmup_start_lr) / (self.warmup_epochs - 1) + for base_lr, group in zip(self.base_lrs, self.optimizer.param_groups) + ] + if self.last_epoch == self.warmup_epochs: + return self.base_lrs + if (self.last_epoch - 1 - self.max_epochs) % ( + 2 * (self.max_epochs - self.warmup_epochs) + ) == 0: + return [ + group["lr"] + + (base_lr - self.eta_min) + * (1 - math.cos(math.pi / (self.max_epochs - self.warmup_epochs))) + / 2 + for base_lr, group in zip(self.base_lrs, self.optimizer.param_groups) + ] + + return [ + ( + 1 + + math.cos( + math.pi + * (self.last_epoch - self.warmup_epochs) + / (self.max_epochs - self.warmup_epochs) + ) + ) + / ( + 1 + + math.cos( + math.pi + * (self.last_epoch - self.warmup_epochs - 1) + / (self.max_epochs - self.warmup_epochs) + ) + ) + * (group["lr"] - self.eta_min) + + self.eta_min + for group in self.optimizer.param_groups + ] + + def _get_closed_form_lr(self) -> List[float]: + """Called when epoch is passed as a param to the `step` function of the scheduler.""" + if self.last_epoch < self.warmup_epochs: + return [ + self.warmup_start_lr + + self.last_epoch * (base_lr - self.warmup_start_lr) / (self.warmup_epochs - 1) + for base_lr in self.base_lrs + ] + + return [ + self.eta_min + + 0.5 + * (base_lr - self.eta_min) + * ( + 1 + + math.cos( + math.pi + * (self.last_epoch - self.warmup_epochs) + / (self.max_epochs - self.warmup_epochs) + ) + ) + for base_lr in self.base_lrs + ] diff --git a/solo/utils/misc.py b/solo/utils/misc.py index 6bb3fb8f1..317a1409f 100644 --- a/solo/utils/misc.py +++ b/solo/utils/misc.py @@ -295,7 +295,8 @@ def generate_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False): """Adapted from https://github.com/facebookresearch/mae. grid_size: int of the grid height and width return: - pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token) + pos_embed: [grid_size*grid_size, embed_dim] or + [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token) """ grid_h = np.arange(grid_size, dtype=np.float32) @@ -331,7 +332,7 @@ def generate_1d_sincos_pos_embed_from_grid(embed_dim, pos): """ assert embed_dim % 2 == 0 - omega = np.arange(embed_dim // 2, dtype=np.float) + omega = np.arange(embed_dim // 2, dtype=float) omega /= embed_dim / 2.0 omega = 1.0 / 10000**omega # (D/2,) diff --git a/solo/utils/whitening.py b/solo/utils/whitening.py index 5daea1157..06524b4f8 100644 --- a/solo/utils/whitening.py +++ b/solo/utils/whitening.py @@ -36,7 +36,7 @@ def __init__(self, output_dim: int, eps: float = 0.0): to 0.0. """ - super(Whitening2d, self).__init__() + super().__init__() self.output_dim = output_dim self.eps = eps @@ -175,7 +175,7 @@ def __init__( momentum: float = 0.1, affine: bool = True, ): - super(IterNorm, self).__init__() + super().__init__() # assert dim == 4, 'IterNorm does not support 2D' self.T = T self.eps = eps