From c3fa66d901f1a5dd7fa250e6299ac27827c3e59d Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Fri, 27 Oct 2023 12:08:29 +0100
Subject: [PATCH 01/19] registry wip

---
 configs/SkinLesionSegmentation.yaml           |   6 +
 deeplightning/model/__init__.py               |  12 ++
 deeplightning/{models => model}/cnn.py        |   0
 deeplightning/{models => model}/convmixer.py  |   0
 deeplightning/{models => model}/dcgan.py      |   0
 deeplightning/{models => model}/gan.py        |   0
 deeplightning/{models => model}/lenet.py      |   0
 deeplightning/{models => model}/mlpmixer.py   |   0
 .../{models => model}/mobilenetv2.py          |   0
 .../{models => model}/mobilenetv3.py          |   0
 deeplightning/{models => model}/resnet.py     |   0
 deeplightning/{models => model}/unet.py       |   0
 deeplightning/{models => model}/vit.py        |   0
 deeplightning/{models => model}/vit2.py       |   0
 deeplightning/{models => model}/vit_old.py    |   0
 deeplightning/{models => model}/vqvae.py      |   0
 deeplightning/models/__init__.py              |  12 --
 deeplightning/task/specs.py                   |  34 +++++
 deeplightning/trainer/metrics/__init__.py     |   1 +
 deeplightning/utils/metrics.py                | 131 ++++++++++++------
 20 files changed, 138 insertions(+), 58 deletions(-)
 create mode 100755 deeplightning/model/__init__.py
 rename deeplightning/{models => model}/cnn.py (100%)
 rename deeplightning/{models => model}/convmixer.py (100%)
 rename deeplightning/{models => model}/dcgan.py (100%)
 rename deeplightning/{models => model}/gan.py (100%)
 rename deeplightning/{models => model}/lenet.py (100%)
 rename deeplightning/{models => model}/mlpmixer.py (100%)
 rename deeplightning/{models => model}/mobilenetv2.py (100%)
 rename deeplightning/{models => model}/mobilenetv3.py (100%)
 rename deeplightning/{models => model}/resnet.py (100%)
 rename deeplightning/{models => model}/unet.py (100%)
 rename deeplightning/{models => model}/vit.py (100%)
 rename deeplightning/{models => model}/vit2.py (100%)
 rename deeplightning/{models => model}/vit_old.py (100%)
 rename deeplightning/{models => model}/vqvae.py (100%)
 delete mode 100755 deeplightning/models/__init__.py
 create mode 100644 deeplightning/task/specs.py
 create mode 100644 deeplightning/trainer/metrics/__init__.py

diff --git a/configs/SkinLesionSegmentation.yaml b/configs/SkinLesionSegmentation.yaml
index f9df6dd..f6e8a2f 100755
--- a/configs/SkinLesionSegmentation.yaml
+++ b/configs/SkinLesionSegmentation.yaml
@@ -3,6 +3,7 @@ modes:
   test: false
   
 task: SemanticSegmentation
+
 data:
   dataset: HAM10000
   root: /Users/pme/research/data/HAM10000
@@ -51,6 +52,11 @@ engine:
   num_nodes: 1
   precision: 32
 
+metrics:
+  train: default
+  val: default
+  test: default
+  
 train:
   num_epochs: 10
   val_every_n_epoch: 1
diff --git a/deeplightning/model/__init__.py b/deeplightning/model/__init__.py
new file mode 100755
index 0000000..38b9fc0
--- /dev/null
+++ b/deeplightning/model/__init__.py
@@ -0,0 +1,12 @@
+from deeplightning.model.cnn import *
+from deeplightning.model.convmixer import *
+from deeplightning.model.dcgan import *
+from deeplightning.model.gan import *
+from deeplightning.model.lenet import *
+from deeplightning.model.mlpmixer import *
+from deeplightning.model.mobilenetv2 import *
+from deeplightning.model.mobilenetv3 import *
+from deeplightning.model.resnet import *
+from deeplightning.model.unet import *
+from deeplightning.model.vit import *
+from deeplightning.model.vqvae import *
diff --git a/deeplightning/models/cnn.py b/deeplightning/model/cnn.py
similarity index 100%
rename from deeplightning/models/cnn.py
rename to deeplightning/model/cnn.py
diff --git a/deeplightning/models/convmixer.py b/deeplightning/model/convmixer.py
similarity index 100%
rename from deeplightning/models/convmixer.py
rename to deeplightning/model/convmixer.py
diff --git a/deeplightning/models/dcgan.py b/deeplightning/model/dcgan.py
similarity index 100%
rename from deeplightning/models/dcgan.py
rename to deeplightning/model/dcgan.py
diff --git a/deeplightning/models/gan.py b/deeplightning/model/gan.py
similarity index 100%
rename from deeplightning/models/gan.py
rename to deeplightning/model/gan.py
diff --git a/deeplightning/models/lenet.py b/deeplightning/model/lenet.py
similarity index 100%
rename from deeplightning/models/lenet.py
rename to deeplightning/model/lenet.py
diff --git a/deeplightning/models/mlpmixer.py b/deeplightning/model/mlpmixer.py
similarity index 100%
rename from deeplightning/models/mlpmixer.py
rename to deeplightning/model/mlpmixer.py
diff --git a/deeplightning/models/mobilenetv2.py b/deeplightning/model/mobilenetv2.py
similarity index 100%
rename from deeplightning/models/mobilenetv2.py
rename to deeplightning/model/mobilenetv2.py
diff --git a/deeplightning/models/mobilenetv3.py b/deeplightning/model/mobilenetv3.py
similarity index 100%
rename from deeplightning/models/mobilenetv3.py
rename to deeplightning/model/mobilenetv3.py
diff --git a/deeplightning/models/resnet.py b/deeplightning/model/resnet.py
similarity index 100%
rename from deeplightning/models/resnet.py
rename to deeplightning/model/resnet.py
diff --git a/deeplightning/models/unet.py b/deeplightning/model/unet.py
similarity index 100%
rename from deeplightning/models/unet.py
rename to deeplightning/model/unet.py
diff --git a/deeplightning/models/vit.py b/deeplightning/model/vit.py
similarity index 100%
rename from deeplightning/models/vit.py
rename to deeplightning/model/vit.py
diff --git a/deeplightning/models/vit2.py b/deeplightning/model/vit2.py
similarity index 100%
rename from deeplightning/models/vit2.py
rename to deeplightning/model/vit2.py
diff --git a/deeplightning/models/vit_old.py b/deeplightning/model/vit_old.py
similarity index 100%
rename from deeplightning/models/vit_old.py
rename to deeplightning/model/vit_old.py
diff --git a/deeplightning/models/vqvae.py b/deeplightning/model/vqvae.py
similarity index 100%
rename from deeplightning/models/vqvae.py
rename to deeplightning/model/vqvae.py
diff --git a/deeplightning/models/__init__.py b/deeplightning/models/__init__.py
deleted file mode 100755
index 0af42ee..0000000
--- a/deeplightning/models/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from deeplightning.models.cnn import *
-from deeplightning.models.convmixer import *
-from deeplightning.models.dcgan import *
-from deeplightning.models.gan import *
-from deeplightning.models.lenet import *
-from deeplightning.models.mlpmixer import *
-from deeplightning.models.mobilenetv2 import *
-from deeplightning.models.mobilenetv3 import *
-from deeplightning.models.resnet import *
-from deeplightning.models.unet import *
-from deeplightning.models.vit import *
-from deeplightning.models.vqvae import *
diff --git a/deeplightning/task/specs.py b/deeplightning/task/specs.py
new file mode 100644
index 0000000..1bf471b
--- /dev/null
+++ b/deeplightning/task/specs.py
@@ -0,0 +1,34 @@
+from typing import Any
+from omegaconf import OmegaConf
+from deeplightning.registry import METRICS_REGISTRY, TASK_REGISTRY
+
+
+__TASKS__ = [
+    "ImageClassification",
+    "SemanticSegmentation",
+]
+
+
+#|TODO register metrics with decorator
+__METRICS__ = {"classification_accuracy": None}
+
+
+class TaskSpecification():
+    def __init__(self, cfg: OmegaConf):
+        assert cfg.task in __TASKS__
+        self.task = cfg.task
+
+
+class ImageClassificationTask(TaskSpecification):
+    def __init__(self, cfg: OmegaConf):
+        super().__init__()
+        if cfg.task = 
+        self.metrics = [
+            "classification_accuracy",
+        ]
+        for m in self.metrics:
+            assert m in METRICS_REGISTRY.get_element_names()
+
+
+def ImageClassificationSpec(cfg: OmegaConf) -> ImageClassificationTask:
+    return ImageClassificationTask(cfg)
\ No newline at end of file
diff --git a/deeplightning/trainer/metrics/__init__.py b/deeplightning/trainer/metrics/__init__.py
new file mode 100644
index 0000000..c58acfa
--- /dev/null
+++ b/deeplightning/trainer/metrics/__init__.py
@@ -0,0 +1 @@
+from deeplightning.trainer.metrics import *
\ No newline at end of file
diff --git a/deeplightning/utils/metrics.py b/deeplightning/utils/metrics.py
index dc2b211..9aeb698 100755
--- a/deeplightning/utils/metrics.py
+++ b/deeplightning/utils/metrics.py
@@ -9,93 +9,132 @@
 from torchmetrics.functional.classification.accuracy import accuracy
 import seaborn as sn
 import numpy as np
-from matplotlib.figure import Figure as pltFigure
+from matplotlib.figure import Figure
 from matplotlib import pyplot as plt
 
+from deeplightning.registry import METRICS_REGISTRY
 
 
-class Metric_PrecisionRecallCurve(MulticlassPrecisionRecallCurve):
-	"""Precision-Recall metric class; inherits methods from 
-	torchmetrics parent class.
+__all__ = [
+	"ClassificationAccuracy", "classification_accuracy",
+	"PrecisionRecallCurve", "precision_recall_curve",
+	"ConfusionMatrix", "confusion_matrix",
+]
+
+
+class ClassificationAccuracy(MulticlassAccuracy):
+    """Classification Accuracy metric, inheriting from torchmetrics
+    """
+    def __init__(self, cfg: OmegaConf):
+        self.num_classes = cfg.model.network.params.num_classes
+        args = {
+            "num_classes": self.num_classes,
+        }
+        super().__init__(**args)
+
+
+@METRICS_REGISTRY.register_element()
+def classification_accuracy(cfg) -> ClassificationAccuracy:
+    return ClassificationAccuracy(cfg)
+
+
+class PrecisionRecallCurve(MulticlassPrecisionRecallCurve):
+	"""Precision-Recall metric class, inheriting from torchmetrics
 	"""
 	def __init__(self, cfg: OmegaConf):
 		self.num_classes = cfg.model.network.params.num_classes
 		args = {
-			"task": "binary" if self.num_classes == 2 else "multiclass",
 			"num_classes": self.num_classes,
-			}
+		}
 		super().__init__(**args)
 
-
-	def draw(self, precision: Tensor, recall: Tensor, thresholds: Tensor, subset: str,  epoch: int) -> pltFigure:
-		"""Draw Confusion Matrix as a figure, to be logged as artifact media.
-
-		Parameters
-		----------
-		precision : precisions.
-		recall : recalls.
-		thresholds: threshold
-		subset : data subset (e.g. 'train' or 'val), to be used
-			as a label in the figure.
-		epoch : current epoch, to be used as a label in the figure.
+	def draw(self,
+		precision: Tensor,
+		recall: Tensor,
+		thresholds: Tensor,
+		stage: str,
+		epoch: int
+	) -> Figure:
+		"""Draw Precision-Recall Curve as a figure, to be logged as artifact media
+
+		Args:
+			precision: precisions values
+			recall: recalls values
+			thresholds: threshold values
+			stage: data subset {"train", "val", "test"}, for labelling
+			epoch: current epoch, for labelling
 		"""
-		assert self.num_classes == len(precision) and self.num_classes == len(recall)
+		assert self.num_classes == len(precision)
+		assert self.num_classes == len(recall)
 		
+		# draw figure
 		fig = plt.figure()
 		for i in range(self.num_classes):
 			plt.plot(recall[i].cpu(), precision[i].cpu(), label=i)
-		plt.title(f"Precision-Recall Curve [{subset}, epoch {epoch}]")
+		plt.title(f"Precision-Recall Curve [{stage}, epoch {epoch}]")
 		plt.xlabel("Recall")
 		plt.ylabel("Precision")
 		if self.num_classes <= 10:
 			plt.legend(loc="lower left", title="class", fontsize='small')
 		plt.close()
 		return fig
+	
 
-		
-class Metric_ConfusionMatrix(MulticlassConfusionMatrix):
-	"""Confusion Matrix metric class; inherits methods from 
-	torchmetrics parent class.
+@METRICS_REGISTRY.register_element()
+def precision_recall_curve(cfg) -> PrecisionRecallCurve:
+    return PrecisionRecallCurve(cfg)
+
+
+class ConfusionMatrix(MulticlassConfusionMatrix):
+	"""Confusion Matrix metric class, inheriting from torchmetrics
 	"""
 	def __init__(self, cfg: OmegaConf):
 		self.num_classes = cfg.model.network.params.num_classes
 		args = {
-			"task": "binary" if self.num_classes == 2 else "multiclass",
 			"num_classes": self.num_classes,
-			"normalize": "true",  # 'true' normalizes over the true labels (targets)
+			"normalize": "true",  # 'true' normalizes over true labels (targets)
 		}
 		super().__init__(**args)
 
 
-	def draw(self, confusion_matrix: Tensor, subset: str,  epoch: int) -> pltFigure:
-		"""Draw Confusion Matrix as a figure, to be logged as artifact media.
+	def draw(self, 
+		confusion_matrix: Tensor, 
+		stage: str,
+		epoch: int,
+	) -> Figure:
+		"""Draw Confusion Matrix as a figure, to be logged as artifact media
+
+		Args:
+			confusion_matrix: confusion matrix values
+			stage: data subset {"train", "val", "test"}, for labelling
+			epoch: current epoch, for labelling
 		"""
-		assert self.num_classes == confusion_matrix.shape[0] and self.num_classes == confusion_matrix.shape[1]
+		assert self.num_classes == confusion_matrix.shape[0]
+		assert self.num_classes == confusion_matrix.shape[1]
+		
+		# round confusion matrix values
 		confusion_matrix = np.round(100*confusion_matrix.cpu().numpy()).astype(int)
 		
+		# draw figure
 		fig = plt.subplot()
-		cbar_args = {"label": "Correct predictions (%), normalized by true class"}
-		sn.heatmap(data = confusion_matrix, annot = True, fmt = "g", square = True, 
-			cmap = "Blues", vmin=0, vmax=100, cbar_kws=cbar_args)
-		plt.title(f"Confusion Matrix [{subset}, epoch {epoch}]")
+		cbar_args = {
+			"label": "Correct predictions (%), normalized by true class"}
+		sn.heatmap(
+			data = confusion_matrix, 
+			annot=True, fmt="g", square=True, cmap="Blues", 
+			vmin=0, vmax=100, cbar_kws=cbar_args)
+		plt.title(f"Confusion Matrix [{stage}, epoch {epoch}]")
 		plt.xlabel("Predicted class")
 		plt.ylabel("True class")
 		plt.close()
 		return fig
-		
 
-class Metric_Accuracy(MulticlassAccuracy):
-	"""Accuracy metric class; inherits methods from 
-	torchmetrics parent class.
-	"""
-	def __init__(self, cfg: OmegaConf):
-		self.num_classes = cfg.model.network.params.num_classes
-		args = {
-			"task": "binary" if self.num_classes == 2 else "multiclass",
-			"num_classes": self.num_classes,
-		}
-		super().__init__(**args)
-	
+
+@METRICS_REGISTRY.register_element()	
+def confusion_matrix(cfg) -> ConfusionMatrix:
+    return ConfusionMatrix(cfg)
+
+
 
 def metric_accuracy(logits: Tensor, target: Tensor, task: str, num_classes: int) -> Tensor:
 	preds = torch.argmax(logits, dim=1)

From 3fbfb122b926d9ac4a9c25bb738956b6c9dc8b80 Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Fri, 27 Oct 2023 12:16:36 +0100
Subject: [PATCH 02/19] empty base config

---
 configs/SkinLesionSegmentation.yaml |  4 +-
 configs/_base.yaml                  | 69 ++++++++++++++++++++++++++++
 configs/base.yaml                   | 71 -----------------------------
 3 files changed, 71 insertions(+), 73 deletions(-)
 create mode 100755 configs/_base.yaml
 delete mode 100755 configs/base.yaml

diff --git a/configs/SkinLesionSegmentation.yaml b/configs/SkinLesionSegmentation.yaml
index f6e8a2f..493df94 100755
--- a/configs/SkinLesionSegmentation.yaml
+++ b/configs/SkinLesionSegmentation.yaml
@@ -1,9 +1,9 @@
+task: SemanticSegmentation
+
 modes: 
   train: true
   test: false
   
-task: SemanticSegmentation
-
 data:
   dataset: HAM10000
   root: /Users/pme/research/data/HAM10000
diff --git a/configs/_base.yaml b/configs/_base.yaml
new file mode 100755
index 0000000..e70f97b
--- /dev/null
+++ b/configs/_base.yaml
@@ -0,0 +1,69 @@
+task: 
+
+modes: 
+  train: 
+  test: 
+  
+data:
+  dataset: 
+  root: 
+  num_workers: 
+  batch_size: 
+  module: 
+    target: 
+  train_transforms:
+  test_transforms:
+ 
+model:
+  module: 
+    target: 
+  network:
+    target: 
+    params: 
+  optimizer:
+    target: 
+    params:
+  scheduler:
+    target: 
+    params:
+    call:
+      interval: 
+      frequency: 
+  loss:
+    target: 
+    params:
+
+engine:
+  accelerator: 
+  strategy: 
+  devices: 
+  num_nodes: 
+  precision: 
+
+metrics:
+  train: 
+  val: 
+  test: 
+  
+train:
+  num_epochs: 
+  val_every_n_epoch: 
+  grad_accum_from_epoch: 
+  grad_accum_every_n_batches: 
+  ckpt_resume_path: 
+  ckpt_monitor_metric:
+  ckpt_every_n_epochs: 
+  ckpt_save_top_k: 
+  early_stop_metric: 
+  early_stop_delta: 
+  early_stop_patience: 
+
+test:
+  ckpt_test_path:
+
+logger:
+  name: 
+  project_name: 
+  tags: 
+  notes: 
+  log_every_n_steps: 
\ No newline at end of file
diff --git a/configs/base.yaml b/configs/base.yaml
deleted file mode 100755
index 55f8d5b..0000000
--- a/configs/base.yaml
+++ /dev/null
@@ -1,71 +0,0 @@
-modes: 
-  train: true
-  test: false
-  
-task: ImageClassification
-
-data:
-  root: /Users/pme/data/
-  dataset: MNIST
-  image_size: 28
-  num_channels: 1
-  num_classes: 10
-  num_workers: 4
-  batch_size: 256
-  module:
-    target: deeplightning.data.dataloaders.image.mnist.MNIST
-
-model:
-  module:
-    target: deeplightning.task.image.classification.TaskModule
-  network:
-    #target: deeplightning.models.cnn.SymbolCNN
-    target: deeplightning.models.mobilenetv3.mobilenet_v3_small 
-    params: 
-      #num_classes: 10
-      #num_channels: 1
-      num_classes: 10
-      num_channels: 1
-  optimizer:
-    target: torch.optim.SGD
-    params:
-      lr: 0.01
-      weight_decay: 0.01
-      momentum: 0.9
-  scheduler:
-    target: torch.optim.lr_scheduler.ExponentialLR
-    params:
-      gamma: 0.99
-    call:
-      interval: "epoch"
-      frequency: 1
-  loss:
-    target: torch.nn.CrossEntropyLoss
-    params:
-  
-engine:
-  accelerator: cpu  # {cpu,gpu}
-  strategy: auto    # {auto, ddp, deepspeed}
-  devices: 1        # {1, [0,1]}
-  num_nodes: 1
-  precision: 32
-
-train:
-  num_epochs: 1
-  val_every_n_epoch: 1
-  grad_accum_from_epoch: 0
-  grad_accum_every_n_batches: 1
-  ckpt_resume_path: null
-  ckpt_monitor_metric: val_acc  # used in `ModelCheckpoint` callback
-  ckpt_every_n_epochs: 1
-  ckpt_save_top_k: 1
-  early_stop_metric: null  # used in `EarlyStopping` callback
-  early_stop_delta: 0.001
-  early_stop_patience: 3
-
-logger:
-  name: wandb
-  project_name: trial
-  tags: ["_"] # cannot be empty
-  notes: null
-  log_every_n_steps: 20
\ No newline at end of file

From 85fc6ebcb0bf2f18c38708e177dadc18d39f0e70 Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Sun, 29 Oct 2023 01:46:26 +0100
Subject: [PATCH 03/19] refactor tests

---
 conda_env.yaml                                | 13 ++++----
 condaenv.sqtapdy1.requirements.txt            | 21 +++++++++++++
 configs/ImageReconstruction_vqvae.yaml        | 10 ++-----
 .../dummy_config.yaml => configs/_dummy.yaml  | 26 ++++++++++------
 deeplightning/config/load.py                  |  4 +--
 deeplightning/init/initializers.py            |  3 +-
 deeplightning/registry.py                     | 15 +++++-----
 deeplightning/task/vision/classification.py   |  2 +-
 deeplightning/trainer/trainer.py              |  4 +--
 tests/__init__.py                             |  8 +++++
 tests/run_all_tests.sh                        |  6 ++--
 tests/test_checkpoint.py                      | 30 +++++++++++++------
 tests/test_configs.py                         | 20 +++++++++++++
 tests/test_trainer.py                         | 30 +++++--------------
 14 files changed, 122 insertions(+), 70 deletions(-)
 create mode 100644 condaenv.sqtapdy1.requirements.txt
 rename tests/helpers/dummy_config.yaml => configs/_dummy.yaml (71%)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_configs.py

diff --git a/conda_env.yaml b/conda_env.yaml
index 309facc..0f0debd 100755
--- a/conda_env.yaml
+++ b/conda_env.yaml
@@ -3,8 +3,8 @@ channels:
   - defaults
   - conda-forge
 dependencies:
-  - pip==23.0.1
-  - python==3.10 # numpy==1.22 not working with python==3.9
+  - pip
+  - python==3.10
   - pip:
     - colorama==0.4.4
     - deepspeed==0.5.10
@@ -17,13 +17,14 @@ dependencies:
     - matplotlib==3.5.1
     - numpy==1.23.5
     - omegaconf==2.1.1
-    - opencv-python==4.7.0.72 #==4.1.2.30
+    - opencv-python==4.7.0.72
     - pandas==1.5.3
+    - pytest
     - pyyaml==6.0
     - seaborn==0.12.0
-    - torch==2.0.0 #==1.13.1
+    - torch==2.0.0
     - torchaudio==2.0.1
     - torchlibrosa==0.1.0
-    - torchmetrics==0.11.4 #0.11.0
-    - torchvision==0.15.1  #0.11.3
+    - torchmetrics==1.2.0
+    - torchvision==0.15.1
     - wandb==0.12.21
\ No newline at end of file
diff --git a/condaenv.sqtapdy1.requirements.txt b/condaenv.sqtapdy1.requirements.txt
new file mode 100644
index 0000000..9fb1cdc
--- /dev/null
+++ b/condaenv.sqtapdy1.requirements.txt
@@ -0,0 +1,21 @@
+colorama==0.4.4
+deepspeed==0.5.10
+einops==0.4.0
+flask==2.0.3
+imagesize==1.4.1
+ipython
+librosa==0.9.2
+lightning==2.0.0
+matplotlib==3.5.1
+numpy==1.23.5
+omegaconf==2.1.1
+opencv-python==4.7.0.72
+pandas==1.5.3
+pyyaml==6.0
+seaborn==0.12.0
+torch==2.0.0
+torchaudio==2.0.1
+torchlibrosa==0.1.0
+torchmetrics==0.11.4
+torchvision==0.15.1
+wandb==0.12.21
\ No newline at end of file
diff --git a/configs/ImageReconstruction_vqvae.yaml b/configs/ImageReconstruction_vqvae.yaml
index 08f34c2..c4579bd 100755
--- a/configs/ImageReconstruction_vqvae.yaml
+++ b/configs/ImageReconstruction_vqvae.yaml
@@ -39,8 +39,8 @@ model:
     target: deeplightning.modules.loss.vqvae_loss.VQVAE_Loss
     params:
       smooth_l1_loss: True
-      num_tokens: ${model.network.params.num_tokens}
-      kl_div_loss_weight: ${model.network.params.kl_div_loss_weight}
+      num_tokens: 8  # ${model.network.params.num_tokens}
+      kl_div_loss_weight: 0.0  # ${model.network.params.kl_div_loss_weight}
   
 engine:
   backend: deepspeed_stage_3
@@ -64,8 +64,4 @@ log_to_wandb: true
 project_name: trial
 tags: ["image", "reconstruction", "vqvae"] # cannot be empty
 notes: null
-  type: pytorch_lightning.loggers.MLFlowLogger
-  params:
-    experiment_name: Default
-    tracking_uri: mlruns
-  log_every_n_steps: 10
\ No newline at end of file
+log_every_n_steps: 20
\ No newline at end of file
diff --git a/tests/helpers/dummy_config.yaml b/configs/_dummy.yaml
similarity index 71%
rename from tests/helpers/dummy_config.yaml
rename to configs/_dummy.yaml
index 3d7860b..ffdfc28 100755
--- a/tests/helpers/dummy_config.yaml
+++ b/configs/_dummy.yaml
@@ -36,25 +36,33 @@ model:
     params:
 
 engine:
-  backend: deepspeed_stage_3
-  gpus: null
+  accelerator: cpu
+  strategy: auto
+  devices: auto
   num_nodes: 1
   precision: 32
 
+metrics:
+  train: default
+  val: default
+  test: default
+
 train:
   num_epochs: 1
   val_every_n_epoch: 1
   grad_accum_from_epoch: 0
   grad_accum_every_n_batches: 1
   ckpt_resume_path: null
+  ckpt_monitor_metric: null  # used in `ModelCheckpoint` callback
   ckpt_every_n_epochs: 1
+  ckpt_save_top_k: 1
   early_stop_metric: null
+  early_stop_delta: 0.001
+  early_stop_patience: 3
 
 logger:
-  log_to_wandb: false
-  target: pytorch_lightning.loggers.MLFlowLogger
-  params:
-    experiment_name: Default
-    tracking_uri: mlruns
-  log_every_n_steps: 10
-  flush_logs_every_n_steps: 50
\ No newline at end of file
+  name: wandb
+  project_name: unittests
+  tags: ["_"] # cannot be empty
+  notes: null
+  log_every_n_steps: 20
\ No newline at end of file
diff --git a/deeplightning/config/load.py b/deeplightning/config/load.py
index d64ed2f..c304826 100755
--- a/deeplightning/config/load.py
+++ b/deeplightning/config/load.py
@@ -21,7 +21,7 @@ def load_config(config_file: str = "configs/base.yaml") -> OmegaConf:
     """
     cfg = OmegaConf.load(config_file)
     cfg = merge_defaults(cfg)
-    cfg = check_consistency(cfg)
+    #cfg = check_consistency(cfg)
     cfg = runtime_compute(cfg)
     OmegaConf.resolve(cfg)
     #config_print(OmegaConf.to_yaml(cfg))
@@ -103,4 +103,4 @@ def log_config(cfg: OmegaConf, path: str) -> None:
     
     if not os.path.exists(path):
         os.makedirs(path, exist_ok=True)
-    OmegaConf.save(cfg, f = os.path.join(path, "cfg.yaml"))
\ No newline at end of file
+    OmegaConf.save(cfg, f=os.path.join(path, "cfg.yaml"))
\ No newline at end of file
diff --git a/deeplightning/init/initializers.py b/deeplightning/init/initializers.py
index d8cbf15..dfa916c 100755
--- a/deeplightning/init/initializers.py
+++ b/deeplightning/init/initializers.py
@@ -7,7 +7,7 @@
 from deeplightning.config.defaults import __ConfigGroups__
 from deeplightning.init.imports import init_module
 from deeplightning.trainer.trainer import DLTrainer
-from deeplightning.registry import __MetricsRegistry__
+#from deeplightning.registry import __MetricsRegistry__
 
 
 
@@ -53,4 +53,5 @@ def init_everything(cfg: OmegaConf) -> Tuple[LightningModule, LightningDataModul
 def init_metrics(cfg: OmegaConf, device: torch.device) -> dict:
     """ Initialize performance metrics
     """
+    raise NotImplementedError
     return {k: v(cfg).to(device) for k, v in __MetricsRegistry__[cfg.task].items()}
\ No newline at end of file
diff --git a/deeplightning/registry.py b/deeplightning/registry.py
index e21f972..6009bde 100755
--- a/deeplightning/registry.py
+++ b/deeplightning/registry.py
@@ -1,4 +1,4 @@
-#from lightning.pytorch.loggers import WandbLogger
+from lightning.pytorch.loggers import WandbLogger
 
 #from deeplightning.logger.wandb import wandbLogger
 """
@@ -95,7 +95,7 @@ def get_element_names(self) -> List:
 
 
 
-'''
+
 __TaskRegistry__ = [
     # Image
     "ImageClassification",
@@ -106,6 +106,12 @@ def get_element_names(self) -> List:
     "AudioClassification",
 ]
 
+__LoggerRegistry__ = {
+    "wandb": WandbLogger,
+}
+
+
+'''
 __HooksRegistry__ = {
     # Image
     "ImageClassification": {
@@ -181,9 +187,4 @@ def get_element_names(self) -> List:
     },
 }
 
-
-
-__LoggerRegistry__ = {
-    "wandb": WandbLogger,
-}
 '''
\ No newline at end of file
diff --git a/deeplightning/task/vision/classification.py b/deeplightning/task/vision/classification.py
index 8f5eaa3..6f73226 100755
--- a/deeplightning/task/vision/classification.py
+++ b/deeplightning/task/vision/classification.py
@@ -48,7 +48,7 @@ def __init__(self, cfg: OmegaConf):
 
         # Initialise metrics to track during training
         torch_device = torch.device("cuda") if cfg.engine.accelerator == "gpu" else torch.device('cpu')
-        self.metrics = init_metrics(cfg, device=torch_device)
+        #self.metrics = init_metrics(cfg, device=torch_device)
 
         # Initialise label to track metrics against
         self.step_label = "iteration"
diff --git a/deeplightning/trainer/trainer.py b/deeplightning/trainer/trainer.py
index 04566db..019c3ac 100755
--- a/deeplightning/trainer/trainer.py
+++ b/deeplightning/trainer/trainer.py
@@ -16,8 +16,6 @@
 from deeplightning.logger.helpers import add_logger_params_to_config
 from deeplightning.logger.wandb import init_wandb_metrics
 from deeplightning.utils.messages import config_print
-from deeplightning.registry import (__LoggerRegistry__, 
-                                    __HooksRegistry__)
 from deeplightning.utils.python_utils import flatten_dict
 
 
@@ -84,7 +82,7 @@ def init_logger(self, cfg: OmegaConf) -> None:
 
             # intialize step label for each metrics
             logger.step_label = init_wandb_metrics(
-                metric_names = __HooksRegistry__[cfg.task]["LOGGED_METRICS_NAMES"],
+                metric_names = [f"{x}_{y}" for x in cfg.metrics for y in cfg.metrics[x]],  #__HooksRegistry__[cfg.task]["LOGGED_METRICS_NAMES"],
                 step_label = "iteration",
             )
 
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..a6042fc
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1,8 @@
+import os
+import sys
+
+
+PROJECT_PATH = os.getcwd()
+SOURCE_PATH = os.path.join(PROJECT_PATH, "deeplightning")
+
+sys.path.append(SOURCE_PATH)
\ No newline at end of file
diff --git a/tests/run_all_tests.sh b/tests/run_all_tests.sh
index 82d0be3..585bc28 100755
--- a/tests/run_all_tests.sh
+++ b/tests/run_all_tests.sh
@@ -2,8 +2,10 @@
 
 # before running the script make sure to activate `deeplightning`
 # environment and install `pytest` library
-#conda activate deeplightning
-#pip install pytest
+# ```
+# conda activate deeplightning
+# pip install pytest
+# ````
 
 # run `pytest --help` to see argument options
 
diff --git a/tests/test_checkpoint.py b/tests/test_checkpoint.py
index f355280..fa64667 100755
--- a/tests/test_checkpoint.py
+++ b/tests/test_checkpoint.py
@@ -3,7 +3,6 @@
 sys.path.insert(0, "..")
 import torch
 import lightning as pl
-from lightning.utilities import rank_zero_only
 import pytest
 
 from tests.helpers.tools import compare_model_params
@@ -16,9 +15,9 @@
 CKPT_PATH = os.path.join(TMP_DIR, "last.ckpt")
 
 
-def setup_trainer(strategy, precision, gpus):
-    
-    ckpt_callback = pl.callbacks.ModelCheckpoint(
+def setup_trainer(accelerator, strategy, devices, precision):
+
+    ckpt_callback = pl.pytorch.callbacks.ModelCheckpoint(
             dirpath = TMP_DIR, 
             save_last = True, 
             every_n_epochs = 1)
@@ -26,9 +25,10 @@ def setup_trainer(strategy, precision, gpus):
     trainer = pl.Trainer(
         max_epochs = 1,
         logger = False,
+        accelerator = accelerator,
         strategy = strategy, 
+        devices = devices,
         precision = precision, 
-        gpus = gpus,
         limit_train_batches = 2,
         limit_val_batches = 2,
         enable_model_summary = False,
@@ -43,14 +43,26 @@ def setup_trainer(strategy, precision, gpus):
     "kwargs",
     (
         pytest.param(
-            dict(strategy = None,  precision = 32, gpus = None)),
+            dict(accelerator="cpu", strategy="auto", devices="auto", precision=16)),
+        pytest.param(
+            dict(accelerator="cpu", strategy="auto", devices="auto", precision=32)),
+        pytest.param(
+            dict(accelerator="gpu", strategy="auto", devices=[0], precision=16),
+            marks = pytest.mark.skipif(
+                condition = not torch.cuda.is_available(), 
+                reason="gpu unavailable")),
         pytest.param(
-            dict(strategy = "ddp", precision = 32, gpus = [0]), 
+            dict(accelerator="gpu", strategy="auto", devices=[0], precision=32),
             marks = pytest.mark.skipif(
                 condition = not torch.cuda.is_available(), 
-                reason="single-gpu unavailable")),
+                reason="gpu unavailable")),
+        pytest.param(
+            dict(accelerator="gpu", strategy="auto", devices=[0,1], precision=16),
+            marks = pytest.mark.skipif(
+                condition = torch.cuda.device_count() < 2, 
+                reason="multi-gpu unavailable")),
         pytest.param(
-            dict(strategy = "ddp", precision = 32, gpus = [0,1]), 
+            dict(accelerator="gpu", strategy="auto", devices=[0,1], precision=32), 
             marks = pytest.mark.skipif(
                 condition = torch.cuda.device_count() < 2, 
                 reason="multi-gpu unavailable")),
diff --git a/tests/test_configs.py b/tests/test_configs.py
new file mode 100644
index 0000000..b5ab7f8
--- /dev/null
+++ b/tests/test_configs.py
@@ -0,0 +1,20 @@
+import os
+import sys
+sys.path.insert(0, "..")
+from omegaconf import OmegaConf
+import pytest
+
+from deeplightning.config.load import load_config
+
+
+def test_configs():
+
+    cfg_base = load_config(config_file="configs/_base.yaml")
+    assert OmegaConf.is_config(cfg_base)
+
+    for cfg_filename in os.listdir("configs"):
+        if cfg_filename != "_base.yaml":
+            cfg = load_config(config_file = f"configs/{cfg_filename}")
+            assert OmegaConf.is_config(cfg)
+
+   
\ No newline at end of file
diff --git a/tests/test_trainer.py b/tests/test_trainer.py
index b729f31..76035ae 100755
--- a/tests/test_trainer.py
+++ b/tests/test_trainer.py
@@ -20,23 +20,14 @@
     "kwargs",
     (
         pytest.param(
-            dict(
-                strategy = None, 
-                precision = 32,
-                gpus = None)),
+            dict(accelerator="cpu", strategy="auto", devices="auto", precision=32)),
         pytest.param(
-            dict(
-                strategy = "ddp",  
-                precision = 32, 
-                gpus = [0]), 
+            dict(accelerator="gpu", strategy="ddp", devices="auto", precision=32), 
             marks = pytest.mark.skipif(
                 condition = not torch.cuda.is_available(), 
-                reason="single-gpu unavailable")),
+                reason="gpu unavailable")),
         pytest.param(
-            dict(
-                strategy = "ddp",  
-                precision = 32, 
-                gpus = [0,1]), 
+            dict(accelerator="gpu", strategy="ddp", precision=32), 
             marks = pytest.mark.skipif(
                 condition = torch.cuda.device_count() < 2, 
                 reason="multi-gpu unavailable")),
@@ -44,19 +35,12 @@
 )
 def test_trainer(kwargs):
 
-    cfg = load_config(config_file = "helpers/dummy_config.yaml")
+    cfg = load_config(config_file = "configs/_dummy.yaml")
     
+    cfg.engine.accelerator = kwargs["accelerator"]
     cfg.engine.strategy = kwargs["strategy"]
     cfg.engine.precision = kwargs["precision"]
-    cfg.engine.devices = kwargs["gpus"]
-    # TODO extra params for quick testing
-    '''
-    cfg.test_params.limit_train_batches = 2
-    cfg.test_params.limit_val_batches = 2
-    cfg.test_params.enable_model_summary = False,
-    cfg.test_params.enable_progress_bar = False,
-    cfg.test_params.logger = False
-    '''
+    cfg.engine.devices = kwargs["devices"]
     
     model = init_model(cfg)
     trainer = init_trainer(cfg)

From 9d6cc8a1f5bba3f3adf39a386d5bf13e385f17b0 Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Sun, 29 Oct 2023 22:53:16 +0000
Subject: [PATCH 04/19] cfg refactor

---
 configs/AudioClassification_lstm.yaml         | 73 ---------------
 configs/ImageClassification_cnn.yaml          | 69 --------------
 .../ImageClassification_cnn_TaskAgnostic.yaml | 67 --------------
 configs/ImageClassification_resnet.yaml       | 62 -------------
 configs/ImageGeneration_gan.yaml              | 76 ----------------
 configs/ImageReconstruction_vqvae.yaml        | 67 --------------
 configs/ObjectRecognition_vit.yaml            | 91 -------------------
 configs/PedestrianDetection_yolo.yaml         | 10 --
 configs/SpokenWordRecognition_cnn.yaml        | 79 ----------------
 configs/SymbolRecognition_cnn.yaml            | 67 --------------
 configs/SymbolRecognition_mlpmixer.yaml       | 72 ---------------
 configs/SymbolRecognition_vit.yaml            | 74 ---------------
 {configs => tests/helpers}/_dummy.yaml        |  0
 tests/test_configs.py                         | 34 +++++--
 tests/test_trainer.py                         |  2 +-
 15 files changed, 28 insertions(+), 815 deletions(-)
 delete mode 100755 configs/AudioClassification_lstm.yaml
 delete mode 100755 configs/ImageClassification_cnn.yaml
 delete mode 100755 configs/ImageClassification_cnn_TaskAgnostic.yaml
 delete mode 100755 configs/ImageClassification_resnet.yaml
 delete mode 100755 configs/ImageGeneration_gan.yaml
 delete mode 100755 configs/ImageReconstruction_vqvae.yaml
 delete mode 100755 configs/ObjectRecognition_vit.yaml
 delete mode 100755 configs/PedestrianDetection_yolo.yaml
 delete mode 100755 configs/SpokenWordRecognition_cnn.yaml
 delete mode 100755 configs/SymbolRecognition_cnn.yaml
 delete mode 100755 configs/SymbolRecognition_mlpmixer.yaml
 delete mode 100755 configs/SymbolRecognition_vit.yaml
 rename {configs => tests/helpers}/_dummy.yaml (100%)

diff --git a/configs/AudioClassification_lstm.yaml b/configs/AudioClassification_lstm.yaml
deleted file mode 100755
index 70c0c2b..0000000
--- a/configs/AudioClassification_lstm.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-modes: 
-  train: true
-  test: false
-  
-task: ImageClassification
-data:
-  dataset: FSD
-  root: /Users/pme/data/fsd
-  num_workers: 4
-  batch_size: 256
-  module: 
-    target: deeplightning.data.dataloaders.audio.fsd.FreeSpokenDigit
-  train_transforms:
-    normalize: # use `deeplightning.utils.data.compute_dataset_mean_and_stdev()`
-      mean: [0.4711]
-      stdev: [0.1464]
-  test_transforms:
-    normalize:
-      mean: [0.4711]
-      stdev: [0.1464]
- 
-model:
-  module: 
-    target: deeplightning.task.audio.audio_classif.AudioClassification
-  network:
-    target: deeplightning.models.lstm.LSTM
-    params: 
-      num_classes: .....
-      num_channels: .....
-  optimizer:
-    target: torch.optim.Adadelta
-    params:
-  scheduler:
-    target: torch.optim.lr_scheduler.ExponentialLR
-    params:
-      gamma: 0.99
-    call:
-      interval: "epoch"
-      frequency: 1
-  loss:
-    target: torch.nn.CrossEntropyLoss
-    params:
-
-engine:
-  backend: ddp
-  gpus: [0]
-  num_nodes: 1
-  precision: 32
-
-train:
-  num_epochs: 5
-  val_every_n_epoch: 1
-  grad_accum_from_epoch: 0
-  grad_accum_every_n_batches: 1
-  ckpt_resume_path: null
-  ckpt_every_n_epochs: 1
-  early_stop_metric: null
-  early_stop_delta: 0.001
-  early_stop_patience: 3
-
-test:
-  ckpt_test_path: /PATH_TO_CKPT # used only when `modes.test=True`
-  
-logger:
-  log_to_wandb: true
-  project_name: AudioClassification_FSD_LSTM
-  tags: ["audio", "classification", "lstm"] # cannot be empty
-  notes: null
-  type: pytorch_lightning.loggers.MLFlowLogger
-  params:
-    experiment_name: Default
-    tracking_uri: mlruns
-  log_every_n_steps: 10
\ No newline at end of file
diff --git a/configs/ImageClassification_cnn.yaml b/configs/ImageClassification_cnn.yaml
deleted file mode 100755
index 072105e..0000000
--- a/configs/ImageClassification_cnn.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-modes: 
-  train: true
-  test: true
-  
-task: ImageClassification
-data:
-  dataset: MNIST
-  root: ~/data/MNIST
-  num_workers: 4
-  batch_size: 256
-  module: 
-    target: deeplightning.data.dataloaders.image.mnist.MNIST
-  image_size: 28
-  num_channels: 1
-  num_classes: 10
-
-model:
-  module: 
-    target: deeplightning.task.image.classification.ImageClassification
-  network:
-    target: deeplightning.models.cnn.SymbolCNN
-    params: 
-      num_classes: 10
-      num_channels: 1
-  optimizer:
-    target: torch.optim.Adadelta
-    params:
-  scheduler:
-    target: torch.optim.lr_scheduler.ExponentialLR
-    params:
-      gamma: 0.99
-    call:
-      interval: "epoch"
-      frequency: 1
-  loss:
-    target: torch.nn.CrossEntropyLoss
-    params:
-
-engine:
-  backend: ddp
-  gpus: [0]
-  num_nodes: 1
-  precision: 32
-
-train:
-  num_epochs: 2
-  val_every_n_epoch: 1
-  grad_accum_from_epoch: 0
-  grad_accum_every_n_batches: 1
-  ckpt_resume_path: null
-  ckpt_every_n_epochs: 1
-  early_stop_metric: null
-  early_stop_delta: 0.001
-  early_stop_patience: 3
-
-test:
-  ckpt_test_path: /PATH_TO_CKPT # used only when `modes.test=True`
-  
-logger:
-  name: wandb
-  log_to_wandb: true
-  project_name: trial
-  tags: ["image", "classification", "cnn"] # cannot be empty
-  notes: null
-  type: pytorch_lightning.loggers.MLFlowLogger
-  params:
-    experiment_name: Default
-    tracking_uri: mlruns
-  log_every_n_steps: 10
\ No newline at end of file
diff --git a/configs/ImageClassification_cnn_TaskAgnostic.yaml b/configs/ImageClassification_cnn_TaskAgnostic.yaml
deleted file mode 100755
index c7e365e..0000000
--- a/configs/ImageClassification_cnn_TaskAgnostic.yaml
+++ /dev/null
@@ -1,67 +0,0 @@
-modes: 
-  train: true
-  test: true
-  
-task: ImageClassification
-data:
-  dataset: MNIST
-  root: ~/data/MNIST
-  num_workers: 4
-  batch_size: 256
-  module: 
-    target: deeplightning.data.dataloaders.image.mnist.MNIST
-  image_size: 28
-  num_channels: 1
-  num_classes: 10
-
-model:
-  module: 
-    target: deeplightning.task.image.classification.TaskModule
-  network:
-    target: deeplightning.models.cnn.SymbolCNN
-    params: 
-      num_classes: 10
-      num_channels: 1
-  optimizer:
-    target: torch.optim.Adadelta
-    params:
-  scheduler:
-    target: torch.optim.lr_scheduler.ExponentialLR
-    params:
-      gamma: 0.99
-    call:
-      interval: "epoch"
-      frequency: 1
-  loss:
-    target: torch.nn.CrossEntropyLoss
-    params:
-
-engine:
-  accelerator: cpu
-  strategy: ddp
-  devices: 1
-  num_nodes: 1
-  precision: 32
-
-train:
-  num_epochs: 1
-  val_every_n_epoch: 1
-  grad_accum_from_epoch: 0
-  grad_accum_every_n_batches: 32
-  ckpt_resume_path: null
-  ckpt_monitor_metric: val_acc  # used in `ModelCheckpoint` callback
-  ckpt_every_n_epochs: 1
-  ckpt_save_top_k: 1
-  early_stop_metric: null
-  early_stop_delta: 0.001
-  early_stop_patience: 3
-
-test:
-  ckpt_test_path: /PATH_TO_CKPT # used only when `modes.test=True`
-  
-logger:
-  name: wandb
-  project_name: trial
-  tags: ["image", "classification", "cnn"] # cannot be empty
-  notes: null
-  log_every_n_steps: 20
\ No newline at end of file
diff --git a/configs/ImageClassification_resnet.yaml b/configs/ImageClassification_resnet.yaml
deleted file mode 100755
index 6dddc0e..0000000
--- a/configs/ImageClassification_resnet.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-modes: 
-  train: true
-  test: true
-  
-task: classification
-
-data:
-  root: /Users/pme12/data/
-  num_workers: 4
-  batch_size: 256
-  module: 
-    target: deeplightning.data.dataloaders.mnist.MNIST
-
-model:
-  module: 
-    target: deeplightning.task.image.classification.ImageClassification
-  network:
-    target: deeplightning.models.resnet.resnet18
-    params: 
-      num_classes: 10
-      num_channels: 1
-  optimizer:
-    target: torch.optim.Adadelta
-    params:
-  scheduler:
-    target: torch.optim.lr_scheduler.ExponentialLR
-    params:
-      gamma: 0.99
-    call:
-      interval: "epoch"
-      frequency: 1
-  loss:
-    target: torch.nn.CrossEntropyLoss
-    params:
-
-engine:
-  backend: deepspeed_stage_3
-  gpus: null
-  num_nodes: 1
-  precision: 32
-
-train:
-  num_epochs: 2
-  val_every_n_epoch: 1
-  grad_accum_from_epoch: 0
-  grad_accum_every_n_batches: 1
-  ckpt_resume_path: null
-  ckpt_every_n_epochs: 1
-  early_stop_metric: null
-  early_stop_delta: 0.001
-  early_stop_patience: 3
-
-logger:
-  log_to_wandb: true
-  project_name: trial
-  tags: ["image", "classification", "resnet"] # cannot be empty
-  notes: null
-  type: pytorch_lightning.loggers.MLFlowLogger
-  params:
-    experiment_name: Default
-    tracking_uri: mlruns
-  log_every_n_steps: 10
\ No newline at end of file
diff --git a/configs/ImageGeneration_gan.yaml b/configs/ImageGeneration_gan.yaml
deleted file mode 100755
index 036b0a1..0000000
--- a/configs/ImageGeneration_gan.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-task: reconstruction
-data:
-  root: /Users/pme12/data/
-  dataset: MNIST
-  resize: 32 # VQVAE requires image size to be power of 2
-  num_workers: 4
-  batch_size: 128
-  module:
-    target: deeplightning.data.dataloaders.mnist.MNIST
-
-model:
-  module:
-    target: deeplightning.task.image.reconstructionVAE.ImageReconstructionGAN
-  network:
-    target: deeplightning.models.dcgan.DCGAN
-    params: 
-      image_size: 28
-      channels: 1
-      latent_dim: 100
-  optimizer:
-    discriminator:
-      target: torch.optim.Adam
-      params:
-        lr: 0.001
-      steps: 1
-    generator:
-      target: torch.optim.Adam
-      params:
-        lr: 0.001
-      steps: 1
-  scheduler:
-    discriminator:
-      target: torch.optim.lr_scheduler.ExponentialLR
-      params:
-        gamma: 0.99
-      call:
-        interval: "epoch"
-        frequency: 1
-    generator:
-      target: torch.optim.lr_scheduler.ExponentialLR
-      params:
-        gamma: 0.99
-      call:
-        interval: "epoch"
-        frequency: 1
-  loss:
-    target: torch.nn.BCELoss  # BCEWithLogitsLoss
-    params:
-  
-engine:
-  backend: deepspeed_stage_3
-  gpus: null
-  num_nodes: 1
-  precision: 32
-
-train:
-  num_epochs: 10
-  val_every_n_epoch: 1
-  grad_accum_from_epoch: 0
-  grad_accum_every_n_batches: 1
-  ckpt_resume_path: null
-  ckpt_every_n_epochs: 1
-  early_stop_metric: null
-  early_stop_delta: 0.001
-  early_stop_patience: 3
-
-logger:
-  log_to_wandb: true
-  project_name: trial
-  tags: ["image", "generation", "gan"] # cannot be empty
-  notes: null
-  type: pytorch_lightning.loggers.MLFlowLogger
-  params:
-    experiment_name: Default
-    tracking_uri: mlruns
-  log_every_n_steps: 10
\ No newline at end of file
diff --git a/configs/ImageReconstruction_vqvae.yaml b/configs/ImageReconstruction_vqvae.yaml
deleted file mode 100755
index c4579bd..0000000
--- a/configs/ImageReconstruction_vqvae.yaml
+++ /dev/null
@@ -1,67 +0,0 @@
-task: reconstruction
-data:
-  root: /Users/pme12/data/
-  dataset: MNIST
-  resize: 32 # VQVAE requires image size to be power of 2
-  num_workers: 4
-  batch_size: 128
-  module:
-    target: deeplightning.data.dataloaders.mnist.MNIST
-
-model:
-  module:
-    target: deeplightning.task.image.reconstructionVAE.ImageReconstructionVAE
-  network:
-    target: deeplightning.models.vqvae.DiscreteVAE
-    params: 
-      image_size: 32
-      num_tokens: 8 #512
-      codebook_dim: 32 #256
-      num_layers: 1 #2
-      num_resnet_blocks: 1 #2
-      hidden_dim: 32 #128
-      channels: 1
-      temperature: 0.9
-      straight_through: False
-      kl_div_loss_weight: 0.0
-      normalization: null
-  optimizer:
-    target: torch.optim.Adadelta
-    params:
-  scheduler:
-    target: torch.optim.lr_scheduler.ExponentialLR
-    params:
-      gamma: 0.99
-    call:
-      interval: "epoch"
-      frequency: 1
-  loss:
-    target: deeplightning.modules.loss.vqvae_loss.VQVAE_Loss
-    params:
-      smooth_l1_loss: True
-      num_tokens: 8  # ${model.network.params.num_tokens}
-      kl_div_loss_weight: 0.0  # ${model.network.params.kl_div_loss_weight}
-  
-engine:
-  backend: deepspeed_stage_3
-  gpus: null
-  num_nodes: 1
-  precision: 32
-
-train:
-  num_epochs: 10
-  val_every_n_epoch: 1
-  grad_accum_from_epoch: 0
-  grad_accum_every_n_batches: 1
-  ckpt_resume_path: null
-  ckpt_every_n_epochs: 1
-  early_stop_metric: null
-  early_stop_delta: 0.001
-  early_stop_patience: 3
-
-logger:
-log_to_wandb: true
-project_name: trial
-tags: ["image", "reconstruction", "vqvae"] # cannot be empty
-notes: null
-log_every_n_steps: 20
\ No newline at end of file
diff --git a/configs/ObjectRecognition_vit.yaml b/configs/ObjectRecognition_vit.yaml
deleted file mode 100755
index 361ba6a..0000000
--- a/configs/ObjectRecognition_vit.yaml
+++ /dev/null
@@ -1,91 +0,0 @@
-modes: 
-  train: true
-  test: true
-  
-task: classification
-
-data:
-  root: /Users/pme/data/CIFAR10
-  num_workers: 4
-  batch_size: 256
-  module: 
-    target: deeplightning.data.dataloaders.image.cifar10.CIFAR10
-  # transformations/augmentations
-  train_transforms:
-    crop:
-      size: 32
-      pad: 4
-    hflip: 0.5
-    normalize:
-      mean: [0.4914, 0.4822, 0.4465]
-      std: [0.2023, 0.1994, 0.2010]
-  test_transforms:
-    normalize:
-      mean: [0.4914, 0.4822, 0.4465]
-      std: [0.2023, 0.1994, 0.2010]
-  # the following may be required inputs for model/lr_scheduler/etc
-  image_size: [32, 32]
-  num_channels: 3
-  num_classes: 10
-  num_training_samples: 45000
-
-model:
-  module: 
-    target: deeplightning.task.image.classification.ImageClassification
-  network:
-    target: deeplightning.models.vit.VisionTransformer
-    params: 
-      image_size: "${data.image_size}"
-      num_classes: "${data.num_classes}"
-      num_channels: "${data.num_channels}"
-      embed_dim: 128
-      mlp_dim: 512
-      num_heads: 8
-      num_layers: 6
-      patch_size: 4
-      dropout: 0.2
-  optimizer:
-    target: torch.optim.Adam
-    params:
-      lr: 0.001
-  scheduler:
-    #target: torch.optim.lr_scheduler.ExponentialLR
-    target: torch.optim.lr_scheduler.CosineAnnealingLR
-    params:
-      #gamma: 0.99
-      eta_min: 0.0001
-      T_max: "AUTO"  # will be computed at runtime
-    call:
-      interval: "step"
-      frequency: 1
-  loss:
-    target: torch.nn.CrossEntropyLoss
-    params:
-
-engine:
-  backend: deepspeed_stage_3
-  gpus: null
-  num_nodes: 1
-  precision: 32
-
-train:
-  num_epochs: 10
-  val_every_n_epoch: 1
-  grad_accum_from_epoch: 0
-  grad_accum_every_n_batches: 1
-  ckpt_resume_path: null
-  ckpt_every_n_epochs: 1
-  early_stop_metric: null
-  early_stop_delta: 0.001
-  early_stop_patience: 3
-
-logger:
-  log_to_wandb: true
-  project_name: ObjectRecognition
-  tags: ["image", "classification", "vit"] # cannot be empty
-  notes: null
-  log_every_n_steps: 10
-  type: pytorch_lightning.loggers.MLFlowLogger
-  params:
-    experiment_name: Default
-    tracking_uri: mlruns
\ No newline at end of file
diff --git a/configs/PedestrianDetection_yolo.yaml b/configs/PedestrianDetection_yolo.yaml
deleted file mode 100755
index e1ae6fd..0000000
--- a/configs/PedestrianDetection_yolo.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-task: detection
-model_cfg: "external/yolov5/models/yolov5x.yaml"
-model_ckpt: "/Users/pme/code/yolov5/yolov5x6.pt"
-input_path: "/Users/pme/Downloads/people.jpeg"
-save_path: "/Users/pme/Downloads/tests/"
-
-engine:
-  backend: null
-  gpus: null
-  num_nodes: 1
\ No newline at end of file
diff --git a/configs/SpokenWordRecognition_cnn.yaml b/configs/SpokenWordRecognition_cnn.yaml
deleted file mode 100755
index fb64364..0000000
--- a/configs/SpokenWordRecognition_cnn.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-modes: 
-  train: true
-  test: true
-  inference: false
-  
-task: classification
-
-data:
-  root: /Users/pme/data/FSD
-  num_workers: 4
-  batch_size: 256
-  module: 
-    target: deeplightning.data.dataloaders.audio.fsd.FreeSpokenDigit
-  train_transforms:
-    normalize: # use `deeplightning.utils.data.compute_dataset_mean_and_stdev()`
-      mean: [0.4711]
-      std: [0.1464]
-  test_transforms:
-    normalize:
-      mean: [0.4711]
-      std: [0.1464]
-  # the following are required as model inputs in some cases
-  image_size: 64
-  num_channels: 1
-  num_classes: 10
- 
-model:
-  module: 
-    target: deeplightning.task.image.classification.ImageClassification
-  network:
-    target: deeplightning.models.cnn.SpectrogramCNN
-    params: 
-      num_classes: 10
-      num_channels: 1
-  optimizer:
-    target: torch.optim.Adam
-    params:
-      lr: 0.001
-  scheduler:
-    target: torch.optim.lr_scheduler.ExponentialLR
-    params:
-      gamma: 0.99
-    call:
-      interval: "epoch"
-      frequency: 1
-  loss:
-    target: torch.nn.CrossEntropyLoss
-    params:
-
-engine:
-  backend: ddp
-  gpus: [0]
-  num_nodes: 1
-  precision: 32
-
-train:
-  num_epochs: 20
-  val_every_n_epoch: 1
-  grad_accum_from_epoch: 0
-  grad_accum_every_n_batches: 1
-  ckpt_resume_path: null
-  ckpt_every_n_epochs: 1
-  early_stop_metric: null
-  early_stop_delta: 0.001
-  early_stop_patience: 3
-
-test:
-  ckpt_test_path: /PATH_TO_CKPT # used only when `modes.test=True` and `modes.train=False`
-  
-logger:
-  log_to_wandb: true
-  project_name: SpokenWordRecognition
-  tags: ["audio", "classification", "cnn"] # cannot be empty
-  notes: null
-  log_every_n_steps: 10
-  type: pytorch_lightning.loggers.MLFlowLogger
-  params:
-    experiment_name: Default
-    tracking_uri: mlruns
\ No newline at end of file
diff --git a/configs/SymbolRecognition_cnn.yaml b/configs/SymbolRecognition_cnn.yaml
deleted file mode 100755
index dcc09b4..0000000
--- a/configs/SymbolRecognition_cnn.yaml
+++ /dev/null
@@ -1,67 +0,0 @@
-modes: 
-  train: true
-  test: true
-  
-task: classification
-
-data:
-  root: /Users/pme/data/MNIST
-  num_workers: 4
-  batch_size: 256
-  module: 
-    target: deeplightning.data.dataloaders.image.mnist.MNIST
-  # the following are required as model inputs in some cases
-  image_size: 28
-  num_channels: 1
-  num_classes: 10
-
-model:
-  module: 
-    target: deeplightning.task.image.classification.ImageClassification
-  network:
-    target: deeplightning.models.cnn.SymbolCNN
-    params: 
-      num_channels: ${data.num_channels}
-      num_classes: ${data.num_classes}
-  optimizer:
-    target: torch.optim.Adam
-    params:
-      lr: 0.001
-  scheduler:
-    target: torch.optim.lr_scheduler.ExponentialLR
-    params:
-      gamma: 0.99
-    call:
-      interval: "epoch"
-      frequency: 1
-  loss:
-    target: torch.nn.CrossEntropyLoss
-    params:
-
-engine:
-  backend: deepspeed_stage_3
-  gpus: null
-  num_nodes: 1
-  precision: 32
-
-train:
-  num_epochs: 3
-  val_every_n_epoch: 1
-  grad_accum_from_epoch: 0
-  grad_accum_every_n_batches: 1
-  ckpt_resume_path: null
-  ckpt_every_n_epochs: 1
-  early_stop_metric: null
-  early_stop_delta: 0.001
-  early_stop_patience: 3
-
-logger:
-  log_to_wandb: true
-  project_name: SymbolRecognition
-  tags: ["image", "classification", mlpmixer] # cannot be empty
-  notes: null
-  log_every_n_steps: 10
-  type: pytorch_lightning.loggers.MLFlowLogger
-  params:
-    experiment_name: Default
-    tracking_uri: mlruns
\ No newline at end of file
diff --git a/configs/SymbolRecognition_mlpmixer.yaml b/configs/SymbolRecognition_mlpmixer.yaml
deleted file mode 100755
index 1b941e2..0000000
--- a/configs/SymbolRecognition_mlpmixer.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-modes: 
-  train: true
-  test: true
-  
-task: classification
-
-data:
-  root: /Users/pme/data/MNIST
-  num_workers: 4
-  batch_size: 256
-  module: 
-    target: deeplightning.data.dataloaders.image.mnist.MNIST
-  # the following are required as model inputs in some cases
-  image_size: 28
-  num_channels: 1
-  num_classes: 10
-
-model:
-  module: 
-    target: deeplightning.task.image.classification.ImageClassification
-  network:
-    target: deeplightning.models.mlpmixer.MLPMixer
-    params: 
-      image_size: ${data.image_size}
-      num_channels: ${data.num_channels}
-      num_classes: ${data.num_classes}
-      patch_size: 4
-      dim: 512
-      depth: 6 #12
-      dropout: 0.2
-  optimizer:
-    target: torch.optim.Adam
-    params:
-      lr: 0.001
-  scheduler:
-    target: torch.optim.lr_scheduler.ExponentialLR
-    params:
-      gamma: 0.99
-    call:
-      interval: "epoch"
-      frequency: 1
-  loss:
-    target: torch.nn.CrossEntropyLoss
-    params:
-
-engine:
-  backend: deepspeed_stage_3
-  gpus: null
-  num_nodes: 1
-  precision: 32
-
-train:
-  num_epochs: 3
-  val_every_n_epoch: 1
-  grad_accum_from_epoch: 0
-  grad_accum_every_n_batches: 1
-  ckpt_resume_path: null
-  ckpt_every_n_epochs: 1
-  early_stop_metric: null
-  early_stop_delta: 0.001
-  early_stop_patience: 3
-
-logger:
-  log_to_wandb: true
-  project_name: SymbolRecognition
-  tags: ["image", "classification", mlpmixer] # cannot be empty
-  notes: null
-  log_every_n_steps: 10
-  type: pytorch_lightning.loggers.MLFlowLogger
-  params:
-    experiment_name: Default
-    tracking_uri: mlruns
\ No newline at end of file
diff --git a/configs/SymbolRecognition_vit.yaml b/configs/SymbolRecognition_vit.yaml
deleted file mode 100755
index 94bcbe5..0000000
--- a/configs/SymbolRecognition_vit.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-modes: 
-  train: true
-  test: true
-  
-task: classification
-
-data:
-  root: /Users/pme/data/MNIST
-  num_workers: 4
-  batch_size: 256
-  module: 
-    target: deeplightning.data.dataloaders.image.mnist.MNIST
-  # the following are required as model inputs in some cases
-  image_size: 28
-  num_channels: 1
-  num_classes: 10
-
-model:
-  module: 
-    target: deeplightning.task.image.classification.ImageClassification
-  network:
-    target: deeplightning.models.vit.VisionTransformer
-    params: 
-      image_size: ${data.image_size}
-      num_classes: ${data.num_classes}
-      num_channels: ${data.num_channels}
-      hidden_dim: 512
-      embed_dim: 128
-      num_heads: 8
-      num_layers: 6
-      patch_size: 4
-      dropout: 0.2
-  optimizer:
-    target: torch.optim.Adam
-    params:
-      lr: 0.001
-  scheduler:
-    target: torch.optim.lr_scheduler.ExponentialLR
-    params:
-      gamma: 0.99
-    call:
-      interval: "step"
-      frequency: 1
-  loss:
-    target: torch.nn.CrossEntropyLoss
-    params:
-
-engine:
-  backend: deepspeed_stage_3
-  gpus: null
-  num_nodes: 1
-  precision: 32
-
-train:
-  num_epochs: 3
-  val_every_n_epoch: 1
-  grad_accum_from_epoch: 0
-  grad_accum_every_n_batches: 1
-  ckpt_resume_path: null
-  ckpt_every_n_epochs: 1
-  early_stop_metric: null
-  early_stop_delta: 0.001
-  early_stop_patience: 3
-
-logger:
-  log_to_wandb: true
-  project_name: SymbolRecognition
-  tags: ["image", "classification", "vit"] # cannot be empty
-  notes: null
-  log_every_n_steps: 10
-  type: pytorch_lightning.loggers.MLFlowLogger
-  params:
-    experiment_name: Default
-    tracking_uri: mlruns
\ No newline at end of file
diff --git a/configs/_dummy.yaml b/tests/helpers/_dummy.yaml
similarity index 100%
rename from configs/_dummy.yaml
rename to tests/helpers/_dummy.yaml
diff --git a/tests/test_configs.py b/tests/test_configs.py
index b5ab7f8..07ad810 100644
--- a/tests/test_configs.py
+++ b/tests/test_configs.py
@@ -1,20 +1,40 @@
 import os
-import sys
-sys.path.insert(0, "..")
+from glob import glob
 from omegaconf import OmegaConf
-import pytest
+from omegaconf.listconfig import ListConfig
+from omegaconf.dictconfig import DictConfig
 
 from deeplightning.config.load import load_config
 
 
+def check_all_keys_exist(cfg_base, cfg):
+    """Check if all keys in `cfg_base` exist in `cfg`
+    """
+    if not isinstance(cfg_base, DictConfig):
+        return False
+    if not isinstance(cfg, DictConfig):
+        return False
+    for key in cfg_base.keys():
+        if isinstance(cfg_base[key], DictConfig):
+            check_all_keys_exist(cfg_base[key], cfg[key])
+        else:
+            if key not in cfg:
+                print(f"Key '{key}' not in config")
+                return False
+    return True
+
+
 def test_configs():
 
     cfg_base = load_config(config_file="configs/_base.yaml")
     assert OmegaConf.is_config(cfg_base)
 
-    for cfg_filename in os.listdir("configs"):
-        if cfg_filename != "_base.yaml":
-            cfg = load_config(config_file = f"configs/{cfg_filename}")
-            assert OmegaConf.is_config(cfg)
+    configs = glob(os.path.join("configs", "*")) + glob(os.path.join("tests/helpers", "*"))
+    configs = [x for x in configs if x.endswith(".yaml") and not x.endswith("_base.yaml")]
+
+    for cfg_fp in configs:
+        cfg = load_config(config_file=cfg_fp)
+        assert OmegaConf.is_config(cfg), f"Not a config '{cfg_fp}'"
+        assert check_all_keys_exist(cfg_base, cfg), f"Incorrect structure found in '{cfg_fp}'"
 
    
\ No newline at end of file
diff --git a/tests/test_trainer.py b/tests/test_trainer.py
index 76035ae..7e529c4 100755
--- a/tests/test_trainer.py
+++ b/tests/test_trainer.py
@@ -35,7 +35,7 @@
 )
 def test_trainer(kwargs):
 
-    cfg = load_config(config_file = "configs/_dummy.yaml")
+    cfg = load_config(config_file = "tests/helpers/_dummy.yaml")
     
     cfg.engine.accelerator = kwargs["accelerator"]
     cfg.engine.strategy = kwargs["strategy"]

From da14e62bf396d0013f1fcdba3c304c1051687427 Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Sun, 29 Oct 2023 22:56:38 +0000
Subject: [PATCH 05/19] fix cfg structure

---
 tests/helpers/_dummy.yaml | 8 ++++++++
 tests/test_configs.py     | 5 ++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/tests/helpers/_dummy.yaml b/tests/helpers/_dummy.yaml
index ffdfc28..39163bd 100755
--- a/tests/helpers/_dummy.yaml
+++ b/tests/helpers/_dummy.yaml
@@ -1,4 +1,9 @@
 task: classification
+
+modes: 
+  train: 
+  test: 
+
 data:
   root: /Users/pme12/data/
   dataset: MNIST
@@ -60,6 +65,9 @@ train:
   early_stop_delta: 0.001
   early_stop_patience: 3
 
+test:
+  ckpt_test_path:
+  
 logger:
   name: wandb
   project_name: unittests
diff --git a/tests/test_configs.py b/tests/test_configs.py
index 07ad810..719bf38 100644
--- a/tests/test_configs.py
+++ b/tests/test_configs.py
@@ -19,7 +19,6 @@ def check_all_keys_exist(cfg_base, cfg):
             check_all_keys_exist(cfg_base[key], cfg[key])
         else:
             if key not in cfg:
-                print(f"Key '{key}' not in config")
                 return False
     return True
 
@@ -34,7 +33,7 @@ def test_configs():
 
     for cfg_fp in configs:
         cfg = load_config(config_file=cfg_fp)
-        assert OmegaConf.is_config(cfg), f"Not a config '{cfg_fp}'"
-        assert check_all_keys_exist(cfg_base, cfg), f"Incorrect structure found in '{cfg_fp}'"
+        assert OmegaConf.is_config(cfg)
+        assert check_all_keys_exist(cfg_base, cfg)
 
    
\ No newline at end of file

From 0fceefaec46fe1edd4ac3d365d3e57a264331ee2 Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Thu, 2 Nov 2023 22:17:13 +0000
Subject: [PATCH 06/19] add param count print to base task

---
 .gitignore                 |   2 +
 deeplightning/task/base.py | 112 +++++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+)
 create mode 100644 deeplightning/task/base.py

diff --git a/.gitignore b/.gitignore
index c400dee..d77cacf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,8 @@ tests/mlruns/
 tests/logs/
 tests/wandb/
 tests/lightning_logs/
+# hydra
+outputs/
 
 # model weights
 *.pt
diff --git a/deeplightning/task/base.py b/deeplightning/task/base.py
new file mode 100644
index 0000000..50e41fc
--- /dev/null
+++ b/deeplightning/task/base.py
@@ -0,0 +1,112 @@
+from typing import Tuple
+from omegaconf import OmegaConf
+import torch
+from torch import Tensor
+import lightning as pl
+
+from deeplightning.init.imports import init_obj_from_config
+from deeplightning.init.initializers import init_metrics
+from deeplightning.trainer.gather import gather_on_step, gather_on_epoch
+from deeplightning.utils.messages import info_message
+
+
+
+class BaseTask(pl.LightningModule):
+    """Base task module
+
+    LOGGING: manual logging `self.logger.log()` is used. This is more 
+    flexible as Lightning automatic logging `self.log()`) only allows 
+    scalars, not histograms, images, etc./ Additionally, auto-logging 
+    doesn't log at step 0, which is useful.
+
+    HOOKS: For *training*, the input to `training_epoch_end()` is the 
+    set of outputs from `training_step()`. For *validation*, the input 
+    to `validation_epoch_end()` is the output from `validation_step_end()` 
+    and the input to `validation_step_end()` is the output from 
+    `validation_step()`.
+    See https://github.com/PyTorchLightning/pytorch-lightning/issues/9811
+
+    Args
+        cfg: yaml configuration object
+    
+    """
+
+    def __init__(self, cfg: OmegaConf):
+        super().__init__()
+        self.cfg = cfg  #TODO check if this contains logger runtime params
+
+        # Lightning performs a partial validation epoch to ensure that 
+        # everything is correct. Use this to avoid logging during that
+        self.sanity_check = True
+
+        # Initialise metrics to track during training
+        self.device = torch.device("cuda") if cfg.engine.accelerator == "gpu" else torch.device('cpu')
+        self.metrics = init_metrics(cfg, device=self.device)
+
+        # Initialise label to track metrics against
+        self.step_label = "iteration"
+
+        # Aggregation utilities
+        self.gather_on_step = gather_on_step
+        self.gather_on_epoch = gather_on_epoch
+
+
+    def num_parameters(self):
+        """Prints the number of model parameters
+
+        Lightning's model summary does not give the correct number 
+        of trainable parameters. See 
+        https://github.com/PyTorchLightning/pytorch-lightning/issues/12130
+        """
+    
+        trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
+        nontrainable_params = sum(p.numel() for p in self.model.parameters() if not p.requires_grad)
+        total_params =  trainable_params + nontrainable_params
+        
+        info_message("Trainable model parameters: {:,d}".format(trainable_params))
+        info_message("Non-trainable model parameters: {:,d}".format(nontrainable_params))
+        info_message("Total model parameters: {:,d}".format(total_params))
+    
+    
+    def forward(self, x: Tensor) -> Tensor:
+        raise NotImplementedError
+
+
+    def configure_optimizers(self) -> Tuple[dict]:
+        raise NotImplementedError
+    
+
+    def training_step(self, batch, batch_idx):
+        raise NotImplementedError
+
+
+    def training_step_end(self):
+        raise NotImplementedError
+
+
+    def on_training_epoch_end(self):
+        raise NotImplementedError
+    
+
+    def validation_step(self, batch, batch_idx):
+        raise NotImplementedError
+
+
+    def validation_step_end(self):
+        raise NotImplementedError
+
+
+    def on_validation_epoch_end(self):
+        raise NotImplementedError
+
+
+    def test_step(self, batch, batch_idx):
+        raise NotImplementedError
+
+
+    def test_step_end(self):
+        raise NotImplementedError
+
+
+    def on_test_epoch_end(self):
+        raise NotImplementedError
\ No newline at end of file

From fecf019d4c8fe7cbed1143d34cf79ebd54461f17 Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Thu, 2 Nov 2023 23:57:54 +0000
Subject: [PATCH 07/19] make num model params attribute read-only

---
 deeplightning/task/base.py | 54 +++++++++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 18 deletions(-)

diff --git a/deeplightning/task/base.py b/deeplightning/task/base.py
index 50e41fc..c343514 100644
--- a/deeplightning/task/base.py
+++ b/deeplightning/task/base.py
@@ -39,10 +39,6 @@ def __init__(self, cfg: OmegaConf):
         # everything is correct. Use this to avoid logging during that
         self.sanity_check = True
 
-        # Initialise metrics to track during training
-        self.device = torch.device("cuda") if cfg.engine.accelerator == "gpu" else torch.device('cpu')
-        self.metrics = init_metrics(cfg, device=self.device)
-
         # Initialise label to track metrics against
         self.step_label = "iteration"
 
@@ -50,24 +46,46 @@ def __init__(self, cfg: OmegaConf):
         self.gather_on_step = gather_on_step
         self.gather_on_epoch = gather_on_epoch
 
-
-    def num_parameters(self):
-        """Prints the number of model parameters
-
-        Lightning's model summary does not give the correct number 
-        of trainable parameters. See 
-        https://github.com/PyTorchLightning/pytorch-lightning/issues/12130
+    
+    def on_task_init_end(self):
+        """Attributes to initialise at the end of the `__init__` method
+        of the class that inherits from this `BaseTask` class.
         """
+        self.set_num_model_params()
+        self.print_num_model_params()
+ 
+
+    @property
+    def num_trainable_params(self):
+        return self._num_trainable_params
     
-        trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
-        nontrainable_params = sum(p.numel() for p in self.model.parameters() if not p.requires_grad)
-        total_params =  trainable_params + nontrainable_params
-        
-        info_message("Trainable model parameters: {:,d}".format(trainable_params))
-        info_message("Non-trainable model parameters: {:,d}".format(nontrainable_params))
-        info_message("Total model parameters: {:,d}".format(total_params))
+
+    @property
+    def num_nontrainable_params(self):
+        return self._num_nontrainable_params
     
+
+    @property
+    def num_total_params(self):
+        return self._num_total_params
+
+
+    def set_num_model_params(self):
+        self._num_trainable_params = sum(
+            p.numel() for p in self.model.parameters() if p.requires_grad
+        )
+        self._num_nontrainable_params = sum(
+            p.numel() for p in self.model.parameters() if not p.requires_grad
+        )
+        self._num_total_params = self._num_trainable_params + self._num_nontrainable_params
+
     
+    def print_num_model_params(self):
+        info_message("Trainable model parameters: {:,d}".format(self.num_trainable_params))
+        info_message("Non-trainable model parameters: {:,d}".format(self.num_nontrainable_params))
+        info_message("Total model parameters: {:,d}".format(self.num_total_params))
+        
+
     def forward(self, x: Tensor) -> Tensor:
         raise NotImplementedError
 

From a70a5ce73898ada1cb880273ec6751a372c53520 Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Thu, 2 Nov 2023 23:59:20 +0000
Subject: [PATCH 08/19] remove temp file

---
 condaenv.sqtapdy1.requirements.txt | 21 ---------------------
 1 file changed, 21 deletions(-)
 delete mode 100644 condaenv.sqtapdy1.requirements.txt

diff --git a/condaenv.sqtapdy1.requirements.txt b/condaenv.sqtapdy1.requirements.txt
deleted file mode 100644
index 9fb1cdc..0000000
--- a/condaenv.sqtapdy1.requirements.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-colorama==0.4.4
-deepspeed==0.5.10
-einops==0.4.0
-flask==2.0.3
-imagesize==1.4.1
-ipython
-librosa==0.9.2
-lightning==2.0.0
-matplotlib==3.5.1
-numpy==1.23.5
-omegaconf==2.1.1
-opencv-python==4.7.0.72
-pandas==1.5.3
-pyyaml==6.0
-seaborn==0.12.0
-torch==2.0.0
-torchaudio==2.0.1
-torchlibrosa==0.1.0
-torchmetrics==0.11.4
-torchvision==0.15.1
-wandb==0.12.21
\ No newline at end of file

From 05e16850798e9e17434a0c0480761861c2b96388 Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Fri, 3 Nov 2023 00:23:20 +0000
Subject: [PATCH 09/19] move registries initialisations to __ini__.py

---
 deeplightning/__init__.py  | 7 +++++++
 deeplightning/model/cnn.py | 4 +---
 deeplightning/registry.py  | 8 --------
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/deeplightning/__init__.py b/deeplightning/__init__.py
index e69de29..466ac42 100755
--- a/deeplightning/__init__.py
+++ b/deeplightning/__init__.py
@@ -0,0 +1,7 @@
+from deeplightning.registry import Registry
+
+
+TASK_REGISTRY = Registry("tasks")
+MODEL_REGISTRY = Registry("models")
+DATA_REGISTRY = Registry("datasets")
+METRIC_REGISTRY = Registry("metrics")
\ No newline at end of file
diff --git a/deeplightning/model/cnn.py b/deeplightning/model/cnn.py
index aba6572..434b797 100755
--- a/deeplightning/model/cnn.py
+++ b/deeplightning/model/cnn.py
@@ -12,8 +12,8 @@
     "spectrogram_cnn",
 ]
 
+
 class SymbolCNN(nn.Module):
-    
     def __init__(self, num_classes: int, num_channels: int):
         super().__init__()
         self.num_classes = num_classes
@@ -39,7 +39,6 @@ def forward(self, x):
 
 
 class SpectrogramCNN(nn.Module):
-    
     def __init__(self, num_classes: int, num_channels: int):
         super().__init__()
         self.num_classes = num_classes
@@ -65,7 +64,6 @@ def __init__(self, num_classes: int, num_channels: int):
             nn.ReLU(),
             nn.MaxPool2d(2))
         self.fc = nn.Linear(64 * 5 * 5, num_classes)
-        #self.dropout = nn.Dropout(p=0.1)
 
     def forward(self, x):
         x = self.conv1(x)
diff --git a/deeplightning/registry.py b/deeplightning/registry.py
index 6009bde..e2c031f 100755
--- a/deeplightning/registry.py
+++ b/deeplightning/registry.py
@@ -88,14 +88,6 @@ def get_element_names(self) -> List:
         return sorted(list(self.elements_dict.keys()))
 
 
-TASK_REGISTRY = Registry("tasks")
-MODEL_REGISTRY = Registry("models")
-DATA_REGISTRY = Registry("datasets")
-METRICS_REGISTRY = Registry("metrics")
-
-
-
-
 __TaskRegistry__ = [
     # Image
     "ImageClassification",

From 1804f9dfd9194fe25e534c077bb8edf5f513f0e2 Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Fri, 3 Nov 2023 23:28:07 +0000
Subject: [PATCH 10/19] clean metrics

---
 README.md                                     | 127 +-----------------
 configs/SkinLesionSegmentation.yaml           |   2 +-
 deeplightning/init/initializers.py            |   2 +-
 deeplightning/registry.py                     |  46 +------
 deeplightning/task/vision/segmentation.py     |  11 +-
 .../hooks/SemanticSegmentation_hooks.py       |   3 +-
 deeplightning/utils/metrics.py                |  42 +++++-
 7 files changed, 57 insertions(+), 176 deletions(-)

diff --git a/README.md b/README.md
index 2b584a7..3c8b9da 100755
--- a/README.md
+++ b/README.md
@@ -1,11 +1,12 @@
+<<< in active development >>>
+
 <h1 align="center">
   <b>Deep Lightning</b><br>
 </h1>
 <p align="center">
     <a href="https://www.python.org"><img src="https://img.shields.io/badge/Python-3.10-brightgreen" /></a>
     <a href= "https://pytorch.org"><img src="https://img.shields.io/badge/PyTorch-2.0-yellow" /></a>
-    <a href= "https://www.pytorchlightning.ai"><img src="https://img.shields.io/badge/PyTorchLightning-2.0-orange" /></a>
-    <a href= "https://www.deepspeed.ai"><img src="https://img.shields.io/badge/DeepSpeed-0.5-blue" /></a>
+    <a href= "https://www.pytorchlightning.ai"><img src="https://img.shields.io/badge/Lightning-2.0-orange" /></a>
 </p>
 
 **Deep Lightning** is a configuration-based wrapper for training Deep Learning models with focus on parallel training, cross-platform compatibility and reproducibility. The philosophy is simple: from configuration to trackable and reproducible deep learning.
@@ -40,10 +41,7 @@ trainer.fit(model, data)
   * [Run](#run)
   * [Configure](#configure)
   * [Customize](#customize)
-* [Examples](#examples) 
-* [Results](#results) 
-* [Development](#development)
-* [Further Reading](#further-reading)
+* [Examples](#examples)
 
 # Overview
 
@@ -113,25 +111,8 @@ When a training run has been initiated, a link will be displayed in the terminal
 All config fields labelled `type` correspond to target classes. The format is `MODULE.CLASS` and the code will load class `CLASS` from `MODULE.py` (relative path). Note that `MODULE` can itself be composite, `X.Y.Z`, in which case the class `CLASS` will be loaded from `X/Y/Z.py`. 
 For example, `model.optimizer.target` could be existing `deepspeed.ops.adam.FusedAdam` or user-defined in `losses.custom.MyLoss`.
  
-### Example
+Example:
 ```yaml
-modes: 
-  train: true
-  test: false
-  
-task: ImageClassification
-
-data:
-  root: /data
-  dataset: MNIST
-  image_size: 28
-  num_channels: 1
-  num_classes: 10
-  num_workers: 4
-  batch_size: 256
-  module:
-    target: deeplightning.data.dataloaders.image.mnist.MNIST
-
 model:
   module:
     target: deeplightning.task.image.classification.TaskModule
@@ -140,49 +121,6 @@ model:
     params: 
       num_classes: 10
       num_channels: 1
-  optimizer:
-    target: torch.optim.SGD
-    params:
-      lr: 0.01
-      weight_decay: 0.01
-      momentum: 0.9
-  scheduler:
-    target: torch.optim.lr_scheduler.ExponentialLR
-    params:
-      gamma: 0.99
-    call:
-      interval: "epoch"
-      frequency: 1
-  loss:
-    target: torch.nn.CrossEntropyLoss
-    params:
-  
-engine:
-  accelerator: cpu
-  strategy: auto
-  devices: 1
-  num_nodes: 1
-  precision: 32
-
-train:
-  num_epochs: 1
-  val_every_n_epoch: 1
-  grad_accum_from_epoch: 0
-  grad_accum_every_n_batches: 1
-  ckpt_resume_path: null
-  ckpt_monitor_metric: val_acc  # used in `ModelCheckpoint` callback
-  ckpt_every_n_epochs: 1
-  ckpt_save_top_k: 1
-  early_stop_metric: null  # used in `EarlyStopping` callback
-  early_stop_delta: 0.001
-  early_stop_patience: 3
-
-logger:
-  name: wandb
-  project_name: trial
-  tags: ["_"] # cannot be empty
-  notes: null
-  log_every_n_steps: 20
 ```
 
 ### Customize
@@ -199,58 +137,3 @@ Beyond changing parameters values in existing configs, you can customize the fol
 
 See [`examples`](https://github.com/pme0/DeepLightning/tree/master/examples) for details.
 
-
-# Results
-
-[results on acceleration, memory use, etc.]
-
-
-# Development
-
-### Functionalities
-- [x] tracking logger (losses, learning rate, etc.)
-- [x] artifact storing (config, image, etc.)
-- [x] parallel training
-  - [x] multi-gpu
-  - [x] multi-node
-  - [x] backend engines:
-    - [x] ddp
-    - [x] deepspeed_stage_1 
-    - [x] deepspeed_stage_2
-    - [ ] deepspeed_stage_3 (TODO resuming, sharded initialization)
-- [x] 16-bit precision
-- [x] periodic model checkpoints
-- [ ] resume training from model checkpoint --- `deepspeed` untested [[docs](https://pytorch-lightning.readthedocs.io/en/stable/advanced/advanced_gpu.html#deepspeed)] [[docs](https://pytorch-lightning.readthedocs.io/en/stable/advanced/advanced_gpu.html#collating-single-file-checkpoint-for-deepspeed-zero-stage-3)];
-- [ ] sharded loading via LightningModule hook `configure_sharded_model(self):` [[docs](https://pytorch-lightning.readthedocs.io/en/latest/advanced/model_parallel.html#enabling-module-sharding-for-maximum-memory-efficiency)];
-- [x] gradient accumulation
-- [x] early stopping
-- [x] prediction API [TODO: add batch support]
-- [ ] multiple losses/optimizers e.g. GAN; [[docs](https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html?highlight=configure_optimizers#configure-optimizers)]; though deepspeed doesn't allow this atm "DeepSpeed currently only supports single optimizer, single scheduler within the training loop." [[docs](https://pytorch-lightning.readthedocs.io/en/stable/advanced/advanced_gpu.html#deepspeed)]
-- [x] reproducible examples
-  - [x] image classification
-  - [x] image reconstruction
-- [ ] model registry
-
-
-### Notes
-
-- :triangular_flag_on_post: on `deepspeed=0.5.10`, optimizer `deepspeed.ops.adam.FusedAdam` gives `AssertionError: CUDA_HOME does not exist, unable to compile CUDA op(s)`. Mentioned in issue [#1279](https://github.com/microsoft/DeepSpeed/issues/1279);
-- :warning: effective batch size is `batch * num_gpus * num_nodes` [[docs](https://pytorch-lightning.readthedocs.io/en/stable/advanced/multi_gpu.html#batch-size)] but huge batch size can cause convergence difficulties [[paper](https://arxiv.org/abs/1706.02677)];
-- :warning: deepspeed single-file checkpointing requires caution [[docs](https://pytorch-lightning.readthedocs.io/en/latest/advanced/advanced_gpu.html#collating-single-file-checkpoint-for-deepspeed-zero-stage-3)] [[docs](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.plugins.training_type.DeepSpeedPlugin.html)]
-
-
-# Further Reading
-
-**Pytorch-Lightning** organises modules as hardware-agnostic and loop-less code; separated model and backend engine for scalable deep learning
-|| :information_source: [website](https://lightning.ai/) | :floppy_disk: [github](https://github.com/Lightning-AI/lightning) ||
-
-**Weights & Biases** tracks machine learning experiments and provided real-time visualisation via a web interface
-|| :information_source: [website](https://wandb.ai/site) | :floppy_disk: [github](https://github.com/wandb/wandb) ||
-
-**DeepSpeed** is a distributed backend which reduces the training memory footprint with a Zero Redundancy Optimizer (ZeRO). It partitions model states and gradients to save memory, unlike traditional data parallelism where memory states are replicated across data-parallel processes. This allows training of large models with large batch sizes 
-|| :information_source: [website](https://www.deepspeed.ai) | :floppy_disk: [github](https://github.com/microsoft/DeepSpeed) | :page_with_curl: [ZeRO-3](https://arxiv.org/abs/1910.02054) ||
-
-**Flask** is a server-side web framework that supports building and deploying web applications such as ML prediction APIs 
-|| :information_source: [website](https://flask.palletsprojects.com) | :floppy_disk: [github](https://github.com/pallets/flask) ||
-
-
diff --git a/configs/SkinLesionSegmentation.yaml b/configs/SkinLesionSegmentation.yaml
index 493df94..c81f0ae 100755
--- a/configs/SkinLesionSegmentation.yaml
+++ b/configs/SkinLesionSegmentation.yaml
@@ -75,7 +75,7 @@ test:
 
 logger:
   name: wandb
-  project_name: trial
+  project_name: skinlesion
   tags: ["_"] # cannot be empty
   notes: null
   log_every_n_steps: 20
\ No newline at end of file
diff --git a/deeplightning/init/initializers.py b/deeplightning/init/initializers.py
index dfa916c..bcc6e09 100755
--- a/deeplightning/init/initializers.py
+++ b/deeplightning/init/initializers.py
@@ -7,7 +7,7 @@
 from deeplightning.config.defaults import __ConfigGroups__
 from deeplightning.init.imports import init_module
 from deeplightning.trainer.trainer import DLTrainer
-#from deeplightning.registry import __MetricsRegistry__
+#from deeplightning.utils.metrics import __MetricsRegistry__
 
 
 
diff --git a/deeplightning/registry.py b/deeplightning/registry.py
index e2c031f..88a1ce0 100755
--- a/deeplightning/registry.py
+++ b/deeplightning/registry.py
@@ -1,7 +1,6 @@
 from lightning.pytorch.loggers import WandbLogger
 
 #from deeplightning.logger.wandb import wandbLogger
-"""
 from deeplightning.trainer.hooks.ImageClassification_hooks import (
     training_step__ImageClassification,
     training_step_end__ImageClassification,
@@ -23,19 +22,7 @@
     test_step_end__SemanticSegmentation,
     on_test_epoch_end__SemanticSegmentation)
 
-from deeplightning.trainer.hooks.AudioClassification_hooks import (
-    training_step__AudioClassification,
-    training_step_end__AudioClassification,
-    training_epoch_end__AudioClassification,
-    validation_step__AudioClassification,
-    validation_step_end__AudioClassification,
-    validation_epoch_end__AudioClassification,
-    test_step__AudioClassification,
-    test_step_end__AudioClassification,
-    test_epoch_end__AudioClassification)
 
-from deeplightning.utils.metrics import Metric_Accuracy, Metric_ConfusionMatrix, Metric_PrecisionRecallCurve
-"""
 
 from typing import Any, Callable, List, Type, TypeVar
 T = TypeVar('T')
@@ -103,7 +90,7 @@ def get_element_names(self) -> List:
 }
 
 
-'''
+
 __HooksRegistry__ = {
     # Image
     "ImageClassification": {
@@ -149,34 +136,3 @@ def get_element_names(self) -> List:
 }
 
 
-__MetricsRegistry__ = {
-    # Image
-	"ImageClassification": {
-        "Accuracy_train": Metric_Accuracy,
-        "Accuracy_val": Metric_Accuracy,
-        "Accuracy_test": Metric_Accuracy,
-        "ConfusionMatrix_val": Metric_ConfusionMatrix,
-        "ConfusionMatrix_test": Metric_ConfusionMatrix,
-		"PrecisionRecallCurve_val": Metric_PrecisionRecallCurve,
-		"PrecisionRecallCurve_test": Metric_PrecisionRecallCurve,
-	},
-    "ImageReconstruction": {
-        "_": None,
-    },
-    "ObjectDetection": {
-        "_": None,
-    },
-    "SemanticSegmentation": {
-        "Accuracy_train": Metric_Accuracy,
-        "Accuracy_val": Metric_Accuracy,
-        "Accuracy_test": Metric_Accuracy,
-    },
-    # Audio
-    "AudioClassification":{
-        "Accuracy": Metric_Accuracy,
-        "ConfusionMatrix": Metric_ConfusionMatrix,
-		"PrecisionRecallCurve": Metric_PrecisionRecallCurve,
-    },
-}
-
-'''
\ No newline at end of file
diff --git a/deeplightning/task/vision/segmentation.py b/deeplightning/task/vision/segmentation.py
index 4320de3..b0b5d4e 100755
--- a/deeplightning/task/vision/segmentation.py
+++ b/deeplightning/task/vision/segmentation.py
@@ -8,7 +8,8 @@
 from deeplightning.init.initializers import init_metrics
 from deeplightning.trainer.gather import gather_on_step, gather_on_epoch
 from deeplightning.utils.messages import info_message
-from deeplightning.registry import __MetricsRegistry__, __HooksRegistry__
+from deeplightning.registry import __HooksRegistry__
+from deeplightning.utils.metrics import classification_accuracy
 
 
 
@@ -48,7 +49,13 @@ def __init__(self, cfg: OmegaConf):
 
         # Initialise metrics to track during training
         torch_device = torch.device("cuda") if cfg.engine.accelerator == "gpu" else torch.device('cpu')
-        self.metrics = init_metrics(cfg, device=torch_device)
+
+        #self.metrics = init_metrics(cfg, device=torch_device)
+        self.metrics = {
+            "Accuracy_train": classification_accuracy(cfg),
+            "Accuracy_val": classification_accuracy(cfg),
+            "Accuracy_test": classification_accuracy(cfg),
+        }
 
         # Initialise label to track metrics against
         self.step_label = "iteration"
diff --git a/deeplightning/trainer/hooks/SemanticSegmentation_hooks.py b/deeplightning/trainer/hooks/SemanticSegmentation_hooks.py
index 784aca9..13c4d99 100644
--- a/deeplightning/trainer/hooks/SemanticSegmentation_hooks.py
+++ b/deeplightning/trainer/hooks/SemanticSegmentation_hooks.py
@@ -85,12 +85,13 @@ def validation_step__SemanticSegmentation(self, batch, batch_idx):
     outputs = process_model_outputs(outputs, self.model)
     preds = torch.argmax(outputs, dim=1)
     
-    raise
+    '''
     for i in range(5):
         print(batch["inputs_paths"][i])
         print(batch["masks_paths"][i])
         save_image(preds[0].unsqueeze(0).float(), fp=f"/Users/pme/Downloads/segm/mask_step{self.global_step}.jpeg")
         i += 1
+    '''
 
     # loss
     val_loss = self.loss(outputs, batch["masks"])
diff --git a/deeplightning/utils/metrics.py b/deeplightning/utils/metrics.py
index 9aeb698..1598587 100755
--- a/deeplightning/utils/metrics.py
+++ b/deeplightning/utils/metrics.py
@@ -12,7 +12,8 @@
 from matplotlib.figure import Figure
 from matplotlib import pyplot as plt
 
-from deeplightning.registry import METRICS_REGISTRY
+from deeplightning import METRIC_REGISTRY
+#from deeplightning.utils.metrics import Metric_Accuracy, Metric_ConfusionMatrix, Metric_PrecisionRecallCurve
 
 
 __all__ = [
@@ -22,6 +23,39 @@
 ]
 
 
+'''
+__MetricsRegistry__ = {
+    # Image
+	"ImageClassification": {
+        "Accuracy_train": Metric_Accuracy,
+        "Accuracy_val": Metric_Accuracy,
+        "Accuracy_test": Metric_Accuracy,
+        "ConfusionMatrix_val": Metric_ConfusionMatrix,
+        "ConfusionMatrix_test": Metric_ConfusionMatrix,
+		"PrecisionRecallCurve_val": Metric_PrecisionRecallCurve,
+		"PrecisionRecallCurve_test": Metric_PrecisionRecallCurve,
+	},
+    "ImageReconstruction": {
+        "_": None,
+    },
+    "ObjectDetection": {
+        "_": None,
+    },
+    "SemanticSegmentation": {
+        "Accuracy_train": Metric_Accuracy,
+        "Accuracy_val": Metric_Accuracy,
+        "Accuracy_test": Metric_Accuracy,
+    },
+    # Audio
+    "AudioClassification":{
+        "Accuracy": Metric_Accuracy,
+        "ConfusionMatrix": Metric_ConfusionMatrix,
+		"PrecisionRecallCurve": Metric_PrecisionRecallCurve,
+    },
+}
+'''
+
+
 class ClassificationAccuracy(MulticlassAccuracy):
     """Classification Accuracy metric, inheriting from torchmetrics
     """
@@ -33,7 +67,7 @@ def __init__(self, cfg: OmegaConf):
         super().__init__(**args)
 
 
-@METRICS_REGISTRY.register_element()
+@METRIC_REGISTRY.register_element()
 def classification_accuracy(cfg) -> ClassificationAccuracy:
     return ClassificationAccuracy(cfg)
 
@@ -80,7 +114,7 @@ def draw(self,
 		return fig
 	
 
-@METRICS_REGISTRY.register_element()
+@METRIC_REGISTRY.register_element()
 def precision_recall_curve(cfg) -> PrecisionRecallCurve:
     return PrecisionRecallCurve(cfg)
 
@@ -130,7 +164,7 @@ def draw(self,
 		return fig
 
 
-@METRICS_REGISTRY.register_element()	
+@METRIC_REGISTRY.register_element()	
 def confusion_matrix(cfg) -> ConfusionMatrix:
     return ConfusionMatrix(cfg)
 

From dce0b46d63396ff0433bebda9b6e2d543463df7f Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Sat, 4 Nov 2023 15:36:34 +0000
Subject: [PATCH 11/19] add docs for class  attributes

---
 configs/SkinLesionSegmentation.yaml |  2 +-
 deeplightning/task/base.py          | 76 +++++++++++++++--------------
 2 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/configs/SkinLesionSegmentation.yaml b/configs/SkinLesionSegmentation.yaml
index c81f0ae..6d6aef2 100755
--- a/configs/SkinLesionSegmentation.yaml
+++ b/configs/SkinLesionSegmentation.yaml
@@ -75,7 +75,7 @@ test:
 
 logger:
   name: wandb
-  project_name: skinlesion
+  project_name: skin-lesion-segmentation
   tags: ["_"] # cannot be empty
   notes: null
   log_every_n_steps: 20
\ No newline at end of file
diff --git a/deeplightning/task/base.py b/deeplightning/task/base.py
index c343514..cdc0bd8 100644
--- a/deeplightning/task/base.py
+++ b/deeplightning/task/base.py
@@ -12,65 +12,67 @@
 
 
 class BaseTask(pl.LightningModule):
-    """Base task module
-
-    LOGGING: manual logging `self.logger.log()` is used. This is more 
-    flexible as Lightning automatic logging `self.log()`) only allows 
-    scalars, not histograms, images, etc./ Additionally, auto-logging 
-    doesn't log at step 0, which is useful.
-
-    HOOKS: For *training*, the input to `training_epoch_end()` is the 
-    set of outputs from `training_step()`. For *validation*, the input 
-    to `validation_epoch_end()` is the output from `validation_step_end()` 
-    and the input to `validation_step_end()` is the output from 
-    `validation_step()`.
-    See https://github.com/PyTorchLightning/pytorch-lightning/issues/9811
-
-    Args
+    """Base task module.
+
+    Notes:
+        logging: manual logging `self.logger.log()` is used. This is more 
+            flexible as Lightning automatic logging `self.log()`) only 
+            allows scalars, not histograms, images, etc./ Additionally, 
+            auto-logging doesn't log at step 0, which is useful.
+        hooks: For *training*, the input to `training_epoch_end()` is the 
+            set of outputs from `training_step()`. For *validation*, the 
+            input to `validation_epoch_end()` is the output from 
+            `validation_step_end()` and the input to `validation_step_end()` 
+            is the output from `validation_step()`. See 
+            https://github.com/PyTorchLightning/pytorch-lightning/issues/9811
+
+    Args:
         cfg: yaml configuration object
-    
-    """
 
-    def __init__(self, cfg: OmegaConf):
+    Attributes:
+        cfg: (OmegaConf) yaml configuration object.
+        step_label: (str) label to track/log metrics against.
+        sanity_check: (bool) Lightning performs a partial validation epoch
+            at the start, to ensure no issues at the validation stage. The 
+            attribute `sanity_check` is set to `True` initially and set to 
+            `False` after the sanity check run is complete. This is to
+            prevent logging during that preliminary run.
+    """
+    def __init__(self, cfg: OmegaConf) -> None:
         super().__init__()
         self.cfg = cfg  #TODO check if this contains logger runtime params
-
-        # Lightning performs a partial validation epoch to ensure that 
-        # everything is correct. Use this to avoid logging during that
-        self.sanity_check = True
-
-        # Initialise label to track metrics against
         self.step_label = "iteration"
-
-        # Aggregation utilities
-        self.gather_on_step = gather_on_step
-        self.gather_on_epoch = gather_on_epoch
-
+        self.sanity_check = True
     
-    def on_task_init_end(self):
-        """Attributes to initialise at the end of the `__init__` method
-        of the class that inherits from this `BaseTask` class.
+    def on_task_init_end(self) -> None:
+        """Additional attributes to initialise at the end of the `__init__` 
+        method of the class that inherits from this `BaseTask` class.
+
+        Attributes:
+            num_trainable_params: (int) mumber of trainable model parameters.
+            num_nontrainable_params: (int) mumber of nontrainable model parameters.
+            num_total_params: (int) mumber of total model parameters.
         """
         self.set_num_model_params()
         self.print_num_model_params()
  
 
     @property
-    def num_trainable_params(self):
+    def num_trainable_params(self) -> int:
         return self._num_trainable_params
     
 
     @property
-    def num_nontrainable_params(self):
+    def num_nontrainable_params(self) -> int:
         return self._num_nontrainable_params
     
 
     @property
-    def num_total_params(self):
+    def num_total_params(self) -> int:
         return self._num_total_params
 
 
-    def set_num_model_params(self):
+    def set_num_model_params(self) -> None:
         self._num_trainable_params = sum(
             p.numel() for p in self.model.parameters() if p.requires_grad
         )
@@ -80,7 +82,7 @@ def set_num_model_params(self):
         self._num_total_params = self._num_trainable_params + self._num_nontrainable_params
 
     
-    def print_num_model_params(self):
+    def print_num_model_params(self) -> None:
         info_message("Trainable model parameters: {:,d}".format(self.num_trainable_params))
         info_message("Non-trainable model parameters: {:,d}".format(self.num_nontrainable_params))
         info_message("Total model parameters: {:,d}".format(self.num_total_params))

From 06f74f15577b1eec165300e68e0688bc033ad6b5 Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Sat, 4 Nov 2023 16:37:08 +0000
Subject: [PATCH 12/19] add base task class

---
 configs/SkinLesionSegmentation.yaml           |  2 +-
 deeplightning/task/vision/segmentation.py     | 42 +++++++------------
 .../hooks/SemanticSegmentation_hooks.py       | 12 +++++-
 deeplightning/trainer/utils.py                |  9 ----
 4 files changed, 27 insertions(+), 38 deletions(-)
 delete mode 100644 deeplightning/trainer/utils.py

diff --git a/configs/SkinLesionSegmentation.yaml b/configs/SkinLesionSegmentation.yaml
index 6d6aef2..d8c9b40 100755
--- a/configs/SkinLesionSegmentation.yaml
+++ b/configs/SkinLesionSegmentation.yaml
@@ -26,7 +26,7 @@ data:
  
 model:
   module: 
-    target: deeplightning.task.vision.segmentation.TaskModule
+    target: deeplightning.task.vision.segmentation.SemanticSegmentationTask
   network:
     target: torchvision.models.segmentation.deeplabv3_resnet50
     params: 
diff --git a/deeplightning/task/vision/segmentation.py b/deeplightning/task/vision/segmentation.py
index b0b5d4e..eef4515 100755
--- a/deeplightning/task/vision/segmentation.py
+++ b/deeplightning/task/vision/segmentation.py
@@ -10,29 +10,17 @@
 from deeplightning.utils.messages import info_message
 from deeplightning.registry import __HooksRegistry__
 from deeplightning.utils.metrics import classification_accuracy
+from deeplightning.task.base import BaseTask
 
 
+class SemanticSegmentationTask(BaseTask):
+    """ Task module for Semantic Segmentation. 
 
-class TaskModule(pl.LightningModule):
-    """ Task module for Image Classification. 
-
-    LOGGING: manual logging `self.logger.log()` is used. This
-    is more flexible as PyTorchLightning automatic logging 
-    `self.log()`) only allows scalars, not histograms, images, etc.
-    Additionally, auto-logging doesn't log at step 0, which is useful.
-
-    Parameters
-    ----------
-    cfg : yaml configuration object
-    
+    Args:
+        cfg: yaml configuration object
     """
-
     def __init__(self, cfg: OmegaConf):
-        super().__init__()
-        self.cfg = cfg  #TODO check if this contains logger runtime params
-        self.num_classes = cfg.model.network.params.num_classes
-        #self.classif_task = "binary" if self.num_classes == 2 else "multiclass"
-
+        super().__init__(cfg=cfg)
         self.loss = init_obj_from_config(cfg.model.loss)
         self.model = init_obj_from_config(cfg.model.network)
         self.optimizer = init_obj_from_config(cfg.model.optimizer, self.model.parameters())
@@ -64,15 +52,15 @@ def __init__(self, cfg: OmegaConf):
         # to make the hooks bound to the class (so that they can access class attributes 
         #  using `self.something`), the assignment must specify the class name as follows:
         # `ClassName.fn = my_fn` rather than `self.fn = my_fn`
-        TaskModule._training_step = __HooksRegistry__[cfg.task]["training_step"]
-        TaskModule._training_step_end = __HooksRegistry__[cfg.task]["training_step_end"]
-        TaskModule._on_training_epoch_end = __HooksRegistry__[cfg.task]["on_training_epoch_end"]
-        TaskModule._validation_step = __HooksRegistry__[cfg.task]["validation_step"]
-        TaskModule._validation_step_end = __HooksRegistry__[cfg.task]["validation_step_end"]
-        TaskModule._on_validation_epoch_end = __HooksRegistry__[cfg.task]["on_validation_epoch_end"]
-        TaskModule._test_step = __HooksRegistry__[cfg.task]["test_step"]
-        TaskModule._test_step_end = __HooksRegistry__[cfg.task]["test_step_end"]
-        TaskModule._on_test_epoch_end = __HooksRegistry__[cfg.task]["on_test_epoch_end"]
+        SemanticSegmentationTask._training_step = __HooksRegistry__[cfg.task]["training_step"]
+        SemanticSegmentationTask._training_step_end = __HooksRegistry__[cfg.task]["training_step_end"]
+        SemanticSegmentationTask._on_training_epoch_end = __HooksRegistry__[cfg.task]["on_training_epoch_end"]
+        SemanticSegmentationTask._validation_step = __HooksRegistry__[cfg.task]["validation_step"]
+        SemanticSegmentationTask._validation_step_end = __HooksRegistry__[cfg.task]["validation_step_end"]
+        SemanticSegmentationTask._on_validation_epoch_end = __HooksRegistry__[cfg.task]["on_validation_epoch_end"]
+        SemanticSegmentationTask._test_step = __HooksRegistry__[cfg.task]["test_step"]
+        SemanticSegmentationTask._test_step_end = __HooksRegistry__[cfg.task]["test_step_end"]
+        SemanticSegmentationTask._on_test_epoch_end = __HooksRegistry__[cfg.task]["on_test_epoch_end"]
 
         # Aggregation utilities
         self.gather_on_step = gather_on_step
diff --git a/deeplightning/trainer/hooks/SemanticSegmentation_hooks.py b/deeplightning/trainer/hooks/SemanticSegmentation_hooks.py
index 13c4d99..6f71d33 100644
--- a/deeplightning/trainer/hooks/SemanticSegmentation_hooks.py
+++ b/deeplightning/trainer/hooks/SemanticSegmentation_hooks.py
@@ -4,9 +4,19 @@
 
 from deeplightning.trainer.batch import dictionarify_batch
 from deeplightning.trainer.gather import gather_on_step, gather_on_epoch
-from deeplightning.trainer.utils import process_model_outputs
 
 
+def process_model_outputs(outputs, model):
+    """Processes model outouts and selects the appropriate elements
+    """
+    if model.__class__.__name__ == "DeepLabV3":
+        # `DeepLabV3` returns a dictionaty with keys `out` (segmentation 
+        # mask) and optionally `aux` if an auxiliary classifier is used.
+        return outputs["out"]
+    else:
+        return outputs
+    
+
 def training_step__SemanticSegmentation(self, batch, batch_idx):
     """ Hook for `training_step`.
 
diff --git a/deeplightning/trainer/utils.py b/deeplightning/trainer/utils.py
deleted file mode 100644
index 9bf941b..0000000
--- a/deeplightning/trainer/utils.py
+++ /dev/null
@@ -1,9 +0,0 @@
-
-def process_model_outputs(outputs, model):
-    """Processes model outouts and selects the appropriate elements
-    """
-
-    if model.__class__.__name__ == "DeepLabV3":
-        # `DeepLabV3` returns a dictionaty with keys `out` (segmentation mask)
-        # and optionally `aux` if an auxiliary classifier is used.
-        return outputs["out"]
\ No newline at end of file

From d080049e4b18f4ad03ddbb995946f17a3b6a459a Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Sat, 4 Nov 2023 16:51:58 +0000
Subject: [PATCH 13/19] bring hooks inside lightningmodule

---
 deeplightning/task/specs.py               |   1 -
 deeplightning/task/vision/segmentation.py | 241 +++++++++++++++++++---
 deeplightning/trainer/batch.py            |   2 +-
 3 files changed, 209 insertions(+), 35 deletions(-)

diff --git a/deeplightning/task/specs.py b/deeplightning/task/specs.py
index 1bf471b..97c238a 100644
--- a/deeplightning/task/specs.py
+++ b/deeplightning/task/specs.py
@@ -22,7 +22,6 @@ def __init__(self, cfg: OmegaConf):
 class ImageClassificationTask(TaskSpecification):
     def __init__(self, cfg: OmegaConf):
         super().__init__()
-        if cfg.task = 
         self.metrics = [
             "classification_accuracy",
         ]
diff --git a/deeplightning/task/vision/segmentation.py b/deeplightning/task/vision/segmentation.py
index eef4515..9c40876 100755
--- a/deeplightning/task/vision/segmentation.py
+++ b/deeplightning/task/vision/segmentation.py
@@ -11,8 +11,20 @@
 from deeplightning.registry import __HooksRegistry__
 from deeplightning.utils.metrics import classification_accuracy
 from deeplightning.task.base import BaseTask
+from deeplightning.trainer.batch import dictionarify_batch
 
 
+def process_model_outputs(outputs, model):
+    """Processes model outouts and selects the appropriate elements
+    """
+    if model.__class__.__name__ == "DeepLabV3":
+        # `DeepLabV3` returns a dictionaty with keys `out` (segmentation 
+        # mask) and optionally `aux` if an auxiliary classifier is used.
+        return outputs["out"]
+    else:
+        return outputs
+        
+
 class SemanticSegmentationTask(BaseTask):
     """ Task module for Semantic Segmentation. 
 
@@ -52,19 +64,19 @@ def __init__(self, cfg: OmegaConf):
         # to make the hooks bound to the class (so that they can access class attributes 
         #  using `self.something`), the assignment must specify the class name as follows:
         # `ClassName.fn = my_fn` rather than `self.fn = my_fn`
-        SemanticSegmentationTask._training_step = __HooksRegistry__[cfg.task]["training_step"]
-        SemanticSegmentationTask._training_step_end = __HooksRegistry__[cfg.task]["training_step_end"]
-        SemanticSegmentationTask._on_training_epoch_end = __HooksRegistry__[cfg.task]["on_training_epoch_end"]
-        SemanticSegmentationTask._validation_step = __HooksRegistry__[cfg.task]["validation_step"]
-        SemanticSegmentationTask._validation_step_end = __HooksRegistry__[cfg.task]["validation_step_end"]
-        SemanticSegmentationTask._on_validation_epoch_end = __HooksRegistry__[cfg.task]["on_validation_epoch_end"]
-        SemanticSegmentationTask._test_step = __HooksRegistry__[cfg.task]["test_step"]
-        SemanticSegmentationTask._test_step_end = __HooksRegistry__[cfg.task]["test_step_end"]
-        SemanticSegmentationTask._on_test_epoch_end = __HooksRegistry__[cfg.task]["on_test_epoch_end"]
+        #SemanticSegmentationTask._training_step = __HooksRegistry__[cfg.task]["training_step"]
+        #SemanticSegmentationTask._training_step_end = __HooksRegistry__[cfg.task]["training_step_end"]
+        #SemanticSegmentationTask._on_training_epoch_end = __HooksRegistry__[cfg.task]["on_training_epoch_end"]
+        #SemanticSegmentationTask._validation_step = __HooksRegistry__[cfg.task]["validation_step"]
+        #SemanticSegmentationTask._validation_step_end = __HooksRegistry__[cfg.task]["validation_step_end"]
+        #SemanticSegmentationTask._on_validation_epoch_end = __HooksRegistry__[cfg.task]["on_validation_epoch_end"]
+        #SemanticSegmentationTask._test_step = __HooksRegistry__[cfg.task]["test_step"]
+        #SemanticSegmentationTask._test_step_end = __HooksRegistry__[cfg.task]["test_step_end"]
+        #SemanticSegmentationTask._on_test_epoch_end = __HooksRegistry__[cfg.task]["on_test_epoch_end"]
 
         # Aggregation utilities
-        self.gather_on_step = gather_on_step
-        self.gather_on_epoch = gather_on_epoch
+        #self.gather_on_step = gather_on_step
+        #self.gather_on_epoch = gather_on_epoch
 
         # PyTorch-Lightning's model summary does not give the 
         # correct  number of trainable parameters; see 
@@ -102,8 +114,7 @@ def configure_optimizers(self) -> Tuple[dict]:
         `validation_step()`.
 
         https://github.com/PyTorchLightning/pytorch-lightning/issues/9811
-    """
-
+    """    
 
     def training_step(self, batch, batch_idx):
         """ Hook for `training_step`.
@@ -113,20 +124,58 @@ def training_step(self, batch, batch_idx):
         batch : object containing the data output by the dataloader.
         batch_idx : index of batch
         """
-        return self._training_step(batch, batch_idx)
+
+        # convert batch to dictionary form
+        batch = dictionarify_batch(batch, self.cfg.data.dataset)
+
+        # forward pass
+        outputs = self.model(batch["inputs"])
+        outputs = process_model_outputs(outputs, self.model)
+
+        # loss
+        train_loss = self.loss(outputs, batch["masks"])
+
+        if "train_loss" not in self.training_step_outputs:
+            self.training_step_outputs["train_loss"] = []
+        self.training_step_outputs["train_loss"].append(train_loss)
+
+        # metrics
+        self.metrics["Accuracy_train"].update(preds=outputs, target=batch["masks"])
+
+        # the output is not used but returning None gives the following warning
+        # """lightning/pytorch/loops/optimization/automatic.py:129: 
+        # UserWarning: `training_step` returned `None`. If this was 
+        # on purpose, ignore this warning..."""
+        return {"loss": train_loss}
 
 
     def training_step_end(self):
         """ Hook for `training_step_end`.
         """
-        self._training_step_end()
+        if self.global_step % self.cfg.logger.log_every_n_steps == 0:
+
+            metrics = {}
+
+            metrics["train_loss"] = torch.stack(self.training_step_outputs["train_loss"]).mean()
+            self.training_step_outputs.clear()  # free memory
+
+            # accuracy (batch only)
+            metrics["train_acc"] = self.metrics["Accuracy_train"].compute()
+            self.metrics["Accuracy_train"].reset()
+
+            # log learning rate
+            #metrics['lr'] = self.lr_schedulers().get_last_lr()[0]
+                
+            # log training metrics
+            metrics[self.step_label] = self.global_step
+            self.logger.log_metrics(metrics)
 
 
     def on_training_epoch_end(self):
         """ Hook for `on_training_epoch_end`.
         """
-        self._on_training_epoch_end()
-    
+        pass
+
 
     def validation_step(self, batch, batch_idx):
         """ Hook for `validation_step`.
@@ -134,42 +183,168 @@ def validation_step(self, batch, batch_idx):
         Parameters
         ----------
         batch : object containing the data output by the dataloader.
-        batch_idx : index of batch.
-
+        batch_idx : index of batch
         """
-        return self._validation_step(batch, batch_idx)
+
+        # convert batch to dictionary form
+        batch = dictionarify_batch(batch, self.cfg.data.dataset)
+            
+        # forward pass
+        outputs = self.model(batch["inputs"])
+        outputs = process_model_outputs(outputs, self.model)
+        preds = torch.argmax(outputs, dim=1)
+        
+        '''
+        for i in range(5):
+            print(batch["inputs_paths"][i])
+            print(batch["masks_paths"][i])
+            save_image(preds[0].unsqueeze(0).float(), fp=f"/Users/pme/Downloads/segm/mask_step{self.global_step}.jpeg")
+            i += 1
+        '''
+
+        # loss
+        val_loss = self.loss(outputs, batch["masks"])
+
+        if "val_loss" not in self.validation_step_outputs:
+            self.validation_step_outputs["val_loss"] = []
+        self.validation_step_outputs["val_loss"].append(val_loss)
+
+        # metrics
+        self.metrics["Accuracy_val"].update(preds = preds, target = batch["masks"])
+        #self.metrics["ConfusionMatrix_val"].update(preds = preds, target = batch["masks"])
+        #self.metrics["PrecisionRecallCurve_val"].update(preds = outputs, target = batch["masks"])
 
 
-    def validation_step_end(self):
+    def validation_step_end__SemanticSegmentation(self):
         """ Hook for `validation_step_end`.
         """
-        return self._validation_step_end()
+        pass
 
 
-    def on_validation_epoch_end(self):
-        """ Hook for `validation_epoch_end`.
+    def on_validation_epoch_end__SemanticSegmentation(self):
+        """ Hook for `on_validation_epoch_end`.
         """
-        self._on_validation_epoch_end()
 
-
-    def test_step(self, batch, batch_idx):
+        #TODO confirm on multi-gpu
+        #print('\nself.validation_step_outputs["val_loss"]', len(self.validation_step_outputs["val_loss"]), '\n')
+
+        metrics = {}
+        metrics["val_loss"] = torch.stack(self.validation_step_outputs["val_loss"]).mean()
+        self.validation_step_outputs.clear()  # free memory
+
+        # accuracy
+        metrics["val_acc"] = self.metrics["Accuracy_val"].compute()
+        self.metrics["Accuracy_val"].reset()
+
+        # confusion matrix
+        '''
+        cm = self.metrics["ConfusionMatrix_val"].compute()
+        figure = self.metrics["ConfusionMatrix_val"].draw(
+            confusion_matrix=cm, subset="val", epoch=self.current_epoch+1)
+        metrics["val_confusion_matrix"] = wandb.Image(figure, 
+            caption=f"Confusion Matrix [val, epoch {self.current_epoch+1}]")
+        self.metrics["ConfusionMatrix_val"].reset()
+        '''
+
+        # precision-recall
+        '''
+        precision, recall, thresholds = self.metrics["PrecisionRecallCurve_val"].compute()
+        figure = self.metrics["PrecisionRecallCurve_val"].draw(
+            precision=precision, recall=recall, thresholds=thresholds, 
+            subset="val", epoch=self.current_epoch+1)
+        metrics["val_precision_recall"] = wandb.Image(figure, 
+            caption=f"Precision-Recall Curve [val, epoch {self.current_epoch+1}]")
+        self.metrics["PrecisionRecallCurve_val"].reset()
+        '''
+
+        # log validation metrics
+        metrics[self.step_label] = self.global_step
+        if not self.sanity_check:
+            self.logger.log_metrics(metrics)
+        self.sanity_check = False
+
+        # The following is required for EarlyStopping and ModelCheckpoint callbacks to work properly. 
+        # Callbacks read from `self.log()`, not from `self.logger.log()`, so need to log there.
+        # [EarlyStopping] key `m = self.cfg.train.early_stop_metric` must exist in `metrics`
+        if self.cfg.train.early_stop_metric is not None:
+            m_earlystop = self.cfg.train.early_stop_metric
+            self.log(m_earlystop, metrics[m_earlystop], sync_dist=True)
+        # [ModelCheckpoint] key `m = self.cfg.train.ckpt_monitor_metric` must exist in `metrics`
+        if self.cfg.train.ckpt_monitor_metric is not None:
+            m_checkpoint = self.cfg.train.ckpt_monitor_metric
+            self.log(m_checkpoint, metrics[m_checkpoint], sync_dist=True)
+
+
+    def test_step__SemanticSegmentation(self, batch, batch_idx):
         """ Hook for `test_step`.
 
         Parameters
         ----------
-        batch : object containing the data output by the dataloader. 
+        batch : object containing the data output by the dataloader.
         batch_idx: index of batch.
         """
-        return self._test_step(batch, batch_idx)
 
+        # convert batch to dictionary form
+        batch = dictionarify_batch(batch, self.cfg.data.dataset)
+
+        # forward pass
+        outputs = self.model(batch["inputs"])
+        outputs = process_model_outputs(outputs, self.model)
+        preds = torch.argmax(outputs, dim=1)
+                
+        # loss
+        test_loss = self.loss(outputs, batch["masks"])
+
+        if "test_loss" not in self.test_step_outputs:
+            self.test_step_outputs["test_loss"] = []
+        self.test_step_outputs["test_loss"].append(test_loss)
 
-    def test_step_end(self):
+        # metrics
+        self.metrics["Accuracy_test"].update(preds = preds, target = batch["masks"])
+            #self.metrics["ConfusionMatrix_test"].update(preds = preds, target = batch["masks"])
+        #self.metrics["PrecisionRecallCurve_test"].update(preds = outputs, target = batch["masks"])
+                
+
+    def test_step_end__SemanticSegmentation(self):
         """ Hook for `test_step_end`.
         """
-        return self._test_step_end()
+        pass
 
 
-    def on_test_epoch_end(self):
+    def on_test_epoch_end__SemanticSegmentation(self):
         """ Hook for `on_test_epoch_end`.
         """
-        self._on_test_epoch_end()
\ No newline at end of file
+
+        metrics = {}
+        metrics["test_loss"] = torch.stack(self.test_step_outputs["test_loss"]).mean()
+        self.test_step_outputs.clear()  # free memory
+
+        # accuracy
+        metrics["test_acc"] = self.metrics["Accuracy_test"].compute()
+        self.metrics["Accuracy_test"].reset()
+
+        # confusion matrix
+        '''
+        cm = self.metrics["ConfusionMatrix_test"].compute()
+        figure = self.metrics["ConfusionMatrix_test"].draw(
+            confusion_matrix=cm, subset="test", epoch=self.current_epoch+1)
+        metrics["test_confusion_matrix"] = wandb.Image(figure, 
+            caption=f"Confusion Matrix [test, epoch {self.current_epoch+1}]")
+        self.metrics["ConfusionMatrix_test"].reset()    
+        '''
+
+        # precision-recall
+        '''
+        precision, recall, thresholds = self.metrics["PrecisionRecallCurve_test"].compute()
+        figure = self.metrics["PrecisionRecallCurve_test"].draw(
+            precision=precision, recall=recall, thresholds=thresholds, 
+            subset="test", epoch=self.current_epoch+1)
+        metrics["test_precision_recall"] = wandb.Image(figure, 
+            caption=f"Precision-Recall Curve [test, epoch {self.current_epoch+1}]")
+        self.metrics["PrecisionRecallCurve_test"].reset()
+        '''
+
+        # log test metrics
+        metrics[self.step_label] = self.global_step
+        self.logger.log_metrics(metrics)
+
diff --git a/deeplightning/trainer/batch.py b/deeplightning/trainer/batch.py
index 2fe3a04..024723a 100755
--- a/deeplightning/trainer/batch.py
+++ b/deeplightning/trainer/batch.py
@@ -1,7 +1,7 @@
 from typing import Any
 
 
-def dictionarify_batch(batch: Any, dataset: str):
+def dictionarify_batch(batch: Any, dataset: str) -> dict:
     """Convert batch to dictionary format.
     
     Typically keys in this dictionary would be `inputs`, `targets`

From 6dc1bbf88e4f921e6d8c82a5ff28d39a95a9c59d Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Sat, 4 Nov 2023 17:00:12 +0000
Subject: [PATCH 14/19] remove deprecated hooks

---
 deeplightning/task/base.py                | 12 ------------
 deeplightning/task/vision/segmentation.py | 10 +++++-----
 2 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/deeplightning/task/base.py b/deeplightning/task/base.py
index cdc0bd8..28c5c93 100644
--- a/deeplightning/task/base.py
+++ b/deeplightning/task/base.py
@@ -100,10 +100,6 @@ def training_step(self, batch, batch_idx):
         raise NotImplementedError
 
 
-    def training_step_end(self):
-        raise NotImplementedError
-
-
     def on_training_epoch_end(self):
         raise NotImplementedError
     
@@ -112,10 +108,6 @@ def validation_step(self, batch, batch_idx):
         raise NotImplementedError
 
 
-    def validation_step_end(self):
-        raise NotImplementedError
-
-
     def on_validation_epoch_end(self):
         raise NotImplementedError
 
@@ -124,9 +116,5 @@ def test_step(self, batch, batch_idx):
         raise NotImplementedError
 
 
-    def test_step_end(self):
-        raise NotImplementedError
-
-
     def on_test_epoch_end(self):
         raise NotImplementedError
\ No newline at end of file
diff --git a/deeplightning/task/vision/segmentation.py b/deeplightning/task/vision/segmentation.py
index 9c40876..145ac6e 100755
--- a/deeplightning/task/vision/segmentation.py
+++ b/deeplightning/task/vision/segmentation.py
@@ -215,13 +215,13 @@ def validation_step(self, batch, batch_idx):
         #self.metrics["PrecisionRecallCurve_val"].update(preds = outputs, target = batch["masks"])
 
 
-    def validation_step_end__SemanticSegmentation(self):
+    def validation_step_end(self):
         """ Hook for `validation_step_end`.
         """
         pass
 
 
-    def on_validation_epoch_end__SemanticSegmentation(self):
+    def on_validation_epoch_end(self):
         """ Hook for `on_validation_epoch_end`.
         """
 
@@ -275,7 +275,7 @@ def on_validation_epoch_end__SemanticSegmentation(self):
             self.log(m_checkpoint, metrics[m_checkpoint], sync_dist=True)
 
 
-    def test_step__SemanticSegmentation(self, batch, batch_idx):
+    def test_step(self, batch, batch_idx):
         """ Hook for `test_step`.
 
         Parameters
@@ -305,13 +305,13 @@ def test_step__SemanticSegmentation(self, batch, batch_idx):
         #self.metrics["PrecisionRecallCurve_test"].update(preds = outputs, target = batch["masks"])
                 
 
-    def test_step_end__SemanticSegmentation(self):
+    def test_step_end(self):
         """ Hook for `test_step_end`.
         """
         pass
 
 
-    def on_test_epoch_end__SemanticSegmentation(self):
+    def on_test_epoch_end(self):
         """ Hook for `on_test_epoch_end`.
         """
 

From 0e223b6ef37b9cbd2673ca3e2e23425212fd24e4 Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Sun, 5 Nov 2023 19:48:28 +0000
Subject: [PATCH 15/19] refactor hooks for lightning2.0

---
 configs/SkinLesionSegmentation.yaml           |  2 +-
 .../data/dataloaders/vision/ham10000.py       | 37 ++++-----
 deeplightning/task/base.py                    | 24 +++++-
 deeplightning/task/vision/segmentation.py     | 82 ++++---------------
 4 files changed, 58 insertions(+), 87 deletions(-)

diff --git a/configs/SkinLesionSegmentation.yaml b/configs/SkinLesionSegmentation.yaml
index d8c9b40..c5e940c 100755
--- a/configs/SkinLesionSegmentation.yaml
+++ b/configs/SkinLesionSegmentation.yaml
@@ -58,7 +58,7 @@ metrics:
   test: default
   
 train:
-  num_epochs: 10
+  num_epochs: 2
   val_every_n_epoch: 1
   grad_accum_from_epoch: 0
   grad_accum_every_n_batches: 1
diff --git a/deeplightning/data/dataloaders/vision/ham10000.py b/deeplightning/data/dataloaders/vision/ham10000.py
index 7d1e77e..42317b2 100644
--- a/deeplightning/data/dataloaders/vision/ham10000.py
+++ b/deeplightning/data/dataloaders/vision/ham10000.py
@@ -27,19 +27,17 @@ def _extract_masks(metadata, root):
 
 
 class HAM10000_dataset(Dataset):
-    """HAM10000 Dataset ("Human Against Machine with 10000 training images") 
-    for Image Classification and Semantic Segmentation.
+    """HAM10000 Dataset for Image Classification and Semantic Segmentation.
     It contains dermatoscopic images from different populations, acquired and 
     stored by different modalities. Cases include a representative collection 
     of all important diagnostic categories in the realm of pigmented lesions.
 
-    Statistics & Details
-    --------------------
-    - images and segmentation masks size: (width,height)=(600,450)
-    - normalization constants for images: mean=(?,) and std=(?,)
-    - number of samples: 10015
-    - number of image classes: 7
-    - number of segmentation classes: 7 ()
+    images and segmentation masks size: (width,height)=(600,450)
+    normalization constants for images: mean=(?,) and std=(?,)
+    number of samples: 10015
+    number of image classes: 7
+    number of segmentation classes: 7
+
     |-------|-------------|------------------------------------------------------------------------------------------------------------|
     | label | no. samples | description                                                                                                |
     |-------|-------------|------------------------------------------------------------------------------------------------------------|
@@ -52,17 +50,16 @@ class HAM10000_dataset(Dataset):
     | VASC  | 142         | vascular lesions (angiomas, angiokeratomas, pyogenic granulomas and hemorrhage)                            |
     |-------|-------------|------------------------------------------------------------------------------------------------------------|
         
-    References
-    ----------
-    - Tschandl, P., Rosendahl, C., & Kittler, H. (2018). "The HAM10000 
-        dataset, a large collection of multi-source dermatoscopic images 
-        of common pigmented skin lesions". Scientific data, 5(1), 1-9.
-    - https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/DBW86T
-    
-    Arguments
-    ---------
-    cfg : configuration object
-    transform : Transforms to be applied to images
+    References:
+        > "Human Against Machine with 10000 training images"
+        > https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/DBW86T
+        > Tschandl, P., Rosendahl, C., & Kittler, H. (2018). "The HAM10000 
+            dataset, a large collection of multi-source dermatoscopic images 
+            of common pigmented skin lesions". Scientific data, 5(1), 1-9.
+
+    Args:
+        cfg: configuration object
+        transform: Transforms to be applied to images
 
     """
     def __init__(self, 
diff --git a/deeplightning/task/base.py b/deeplightning/task/base.py
index 28c5c93..44990fd 100644
--- a/deeplightning/task/base.py
+++ b/deeplightning/task/base.py
@@ -73,6 +73,8 @@ def num_total_params(self) -> int:
 
 
     def set_num_model_params(self) -> None:
+        """Set the number of model parameters as attributes of the class.
+        """
         self._num_trainable_params = sum(
             p.numel() for p in self.model.parameters() if p.requires_grad
         )
@@ -83,10 +85,12 @@ def set_num_model_params(self) -> None:
 
     
     def print_num_model_params(self) -> None:
+        """Print the number of model parameters.
+        """
         info_message("Trainable model parameters: {:,d}".format(self.num_trainable_params))
         info_message("Non-trainable model parameters: {:,d}".format(self.num_nontrainable_params))
         info_message("Total model parameters: {:,d}".format(self.num_total_params))
-        
+
 
     def forward(self, x: Tensor) -> Tensor:
         raise NotImplementedError
@@ -97,6 +101,12 @@ def configure_optimizers(self) -> Tuple[dict]:
     
 
     def training_step(self, batch, batch_idx):
+        """ Lightning hook for training step.
+
+        Args:
+            batch: object containing the data output by the dataloader.
+            batch_idx: index of batch
+        """
         raise NotImplementedError
 
 
@@ -105,6 +115,12 @@ def on_training_epoch_end(self):
     
 
     def validation_step(self, batch, batch_idx):
+        """ Lightning hook for validation step.
+
+        Args:
+            batch: object containing the data output by the dataloader.
+            batch_idx: index of batch
+        """
         raise NotImplementedError
 
 
@@ -113,6 +129,12 @@ def on_validation_epoch_end(self):
 
 
     def test_step(self, batch, batch_idx):
+        """ Lightning hook for test step.
+
+        Args:
+            batch: object containing the data output by the dataloader.
+            batch_idx: index of batch
+        """
         raise NotImplementedError
 
 
diff --git a/deeplightning/task/vision/segmentation.py b/deeplightning/task/vision/segmentation.py
index 145ac6e..29b8356 100755
--- a/deeplightning/task/vision/segmentation.py
+++ b/deeplightning/task/vision/segmentation.py
@@ -2,11 +2,14 @@
 from omegaconf import OmegaConf
 import torch
 from torch import Tensor
+from torchvision.utils import save_image
+
 import lightning as pl
+from lightning.pytorch.trainer.states import RunningStage
 
 from deeplightning.init.imports import init_obj_from_config
-from deeplightning.init.initializers import init_metrics
-from deeplightning.trainer.gather import gather_on_step, gather_on_epoch
+#from deeplightning.init.initializers import init_metrics
+#from deeplightning.trainer.gather import gather_on_step, gather_on_epoch
 from deeplightning.utils.messages import info_message
 from deeplightning.registry import __HooksRegistry__
 from deeplightning.utils.metrics import classification_accuracy
@@ -45,7 +48,7 @@ def __init__(self, cfg: OmegaConf):
 
         # PyTorch-Lightning performs a partial validation epoch to ensure that
         # everything is correct. Use this to avoid logging metrics to W&B for that 
-        self.sanity_check = True
+        #self.sanity_check = True
 
         # Initialise metrics to track during training
         torch_device = torch.device("cuda") if cfg.engine.accelerator == "gpu" else torch.device('cpu')
@@ -117,13 +120,6 @@ def configure_optimizers(self) -> Tuple[dict]:
     """    
 
     def training_step(self, batch, batch_idx):
-        """ Hook for `training_step`.
-
-        Parameters
-        ----------
-        batch : object containing the data output by the dataloader.
-        batch_idx : index of batch
-        """
 
         # convert batch to dictionary form
         batch = dictionarify_batch(batch, self.cfg.data.dataset)
@@ -142,49 +138,33 @@ def training_step(self, batch, batch_idx):
         # metrics
         self.metrics["Accuracy_train"].update(preds=outputs, target=batch["masks"])
 
-        # the output is not used but returning None gives the following warning
-        # """lightning/pytorch/loops/optimization/automatic.py:129: 
-        # UserWarning: `training_step` returned `None`. If this was 
-        # on purpose, ignore this warning..."""
-        return {"loss": train_loss}
-
-
-    def training_step_end(self):
-        """ Hook for `training_step_end`.
-        """
         if self.global_step % self.cfg.logger.log_every_n_steps == 0:
 
             metrics = {}
-
             metrics["train_loss"] = torch.stack(self.training_step_outputs["train_loss"]).mean()
             self.training_step_outputs.clear()  # free memory
-
             # accuracy (batch only)
             metrics["train_acc"] = self.metrics["Accuracy_train"].compute()
             self.metrics["Accuracy_train"].reset()
-
             # log learning rate
             #metrics['lr'] = self.lr_schedulers().get_last_lr()[0]
-                
+
             # log training metrics
             metrics[self.step_label] = self.global_step
             self.logger.log_metrics(metrics)
 
+        # the output is not used but returning None gives the following warning
+        # """lightning/pytorch/loops/optimization/automatic.py:129: 
+        # UserWarning: `training_step` returned `None`. If this was 
+        # on purpose, ignore this warning..."""
+        return {"loss": train_loss}
+    
 
     def on_training_epoch_end(self):
-        """ Hook for `on_training_epoch_end`.
-        """
         pass
 
 
     def validation_step(self, batch, batch_idx):
-        """ Hook for `validation_step`.
-
-        Parameters
-        ----------
-        batch : object containing the data output by the dataloader.
-        batch_idx : index of batch
-        """
 
         # convert batch to dictionary form
         batch = dictionarify_batch(batch, self.cfg.data.dataset)
@@ -194,13 +174,11 @@ def validation_step(self, batch, batch_idx):
         outputs = process_model_outputs(outputs, self.model)
         preds = torch.argmax(outputs, dim=1)
         
-        '''
         for i in range(5):
             print(batch["inputs_paths"][i])
             print(batch["masks_paths"][i])
-            save_image(preds[0].unsqueeze(0).float(), fp=f"/Users/pme/Downloads/segm/mask_step{self.global_step}.jpeg")
-            i += 1
-        '''
+            torch.save(obj=preds[i], f=f"/Users/pme/Downloads/segm/mask_step{self.global_step}_i{i}.pt")
+            save_image(preds[i].unsqueeze(0).float(), fp=f"/Users/pme/Downloads/segm/mask_step{self.global_step}_i{i}.jpeg")
 
         # loss
         val_loss = self.loss(outputs, batch["masks"])
@@ -215,18 +193,7 @@ def validation_step(self, batch, batch_idx):
         #self.metrics["PrecisionRecallCurve_val"].update(preds = outputs, target = batch["masks"])
 
 
-    def validation_step_end(self):
-        """ Hook for `validation_step_end`.
-        """
-        pass
-
-
     def on_validation_epoch_end(self):
-        """ Hook for `on_validation_epoch_end`.
-        """
-
-        #TODO confirm on multi-gpu
-        #print('\nself.validation_step_outputs["val_loss"]', len(self.validation_step_outputs["val_loss"]), '\n')
 
         metrics = {}
         metrics["val_loss"] = torch.stack(self.validation_step_outputs["val_loss"]).mean()
@@ -259,9 +226,9 @@ def on_validation_epoch_end(self):
 
         # log validation metrics
         metrics[self.step_label] = self.global_step
-        if not self.sanity_check:
+        if self.trainer.state.stage != RunningStage.SANITY_CHECKING:  # `and self.global_step > 0`
             self.logger.log_metrics(metrics)
-        self.sanity_check = False
+        #self.sanity_check = False
 
         # The following is required for EarlyStopping and ModelCheckpoint callbacks to work properly. 
         # Callbacks read from `self.log()`, not from `self.logger.log()`, so need to log there.
@@ -276,13 +243,6 @@ def on_validation_epoch_end(self):
 
 
     def test_step(self, batch, batch_idx):
-        """ Hook for `test_step`.
-
-        Parameters
-        ----------
-        batch : object containing the data output by the dataloader.
-        batch_idx: index of batch.
-        """
 
         # convert batch to dictionary form
         batch = dictionarify_batch(batch, self.cfg.data.dataset)
@@ -303,17 +263,9 @@ def test_step(self, batch, batch_idx):
         self.metrics["Accuracy_test"].update(preds = preds, target = batch["masks"])
             #self.metrics["ConfusionMatrix_test"].update(preds = preds, target = batch["masks"])
         #self.metrics["PrecisionRecallCurve_test"].update(preds = outputs, target = batch["masks"])
-                
-
-    def test_step_end(self):
-        """ Hook for `test_step_end`.
-        """
-        pass
 
 
     def on_test_epoch_end(self):
-        """ Hook for `on_test_epoch_end`.
-        """
 
         metrics = {}
         metrics["test_loss"] = torch.stack(self.test_step_outputs["test_loss"]).mean()

From 8a34bca2157bd14718c963b7ec8be3f0e65defa1 Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Sun, 5 Nov 2023 21:16:53 +0000
Subject: [PATCH 16/19] cleanup

---
 deeplightning/task/vision/segmentation.py | 39 ++---------------------
 1 file changed, 3 insertions(+), 36 deletions(-)

diff --git a/deeplightning/task/vision/segmentation.py b/deeplightning/task/vision/segmentation.py
index 29b8356..87eae9a 100755
--- a/deeplightning/task/vision/segmentation.py
+++ b/deeplightning/task/vision/segmentation.py
@@ -36,57 +36,24 @@ class SemanticSegmentationTask(BaseTask):
     """
     def __init__(self, cfg: OmegaConf):
         super().__init__(cfg=cfg)
+        
         self.loss = init_obj_from_config(cfg.model.loss)
         self.model = init_obj_from_config(cfg.model.network)
         self.optimizer = init_obj_from_config(cfg.model.optimizer, self.model.parameters())
         self.scheduler = init_obj_from_config(cfg.model.scheduler, self.optimizer)
         
-        # migration from `pytorch_lightning==1.5.10` to `lightning==2.0.0`
         self.training_step_outputs = {"train_loss": []}
         self.validation_step_outputs = {"val_loss": []}
         self.test_step_outputs = {"test_loss": []}
 
-        # PyTorch-Lightning performs a partial validation epoch to ensure that
-        # everything is correct. Use this to avoid logging metrics to W&B for that 
-        #self.sanity_check = True
-
-        # Initialise metrics to track during training
-        torch_device = torch.device("cuda") if cfg.engine.accelerator == "gpu" else torch.device('cpu')
-
-        #self.metrics = init_metrics(cfg, device=torch_device)
         self.metrics = {
             "Accuracy_train": classification_accuracy(cfg),
             "Accuracy_val": classification_accuracy(cfg),
             "Accuracy_test": classification_accuracy(cfg),
         }
 
-        # Initialise label to track metrics against
-        self.step_label = "iteration"
-
-        # Define hook functions
-        # to make the hooks bound to the class (so that they can access class attributes 
-        #  using `self.something`), the assignment must specify the class name as follows:
-        # `ClassName.fn = my_fn` rather than `self.fn = my_fn`
-        #SemanticSegmentationTask._training_step = __HooksRegistry__[cfg.task]["training_step"]
-        #SemanticSegmentationTask._training_step_end = __HooksRegistry__[cfg.task]["training_step_end"]
-        #SemanticSegmentationTask._on_training_epoch_end = __HooksRegistry__[cfg.task]["on_training_epoch_end"]
-        #SemanticSegmentationTask._validation_step = __HooksRegistry__[cfg.task]["validation_step"]
-        #SemanticSegmentationTask._validation_step_end = __HooksRegistry__[cfg.task]["validation_step_end"]
-        #SemanticSegmentationTask._on_validation_epoch_end = __HooksRegistry__[cfg.task]["on_validation_epoch_end"]
-        #SemanticSegmentationTask._test_step = __HooksRegistry__[cfg.task]["test_step"]
-        #SemanticSegmentationTask._test_step_end = __HooksRegistry__[cfg.task]["test_step_end"]
-        #SemanticSegmentationTask._on_test_epoch_end = __HooksRegistry__[cfg.task]["on_test_epoch_end"]
-
-        # Aggregation utilities
-        #self.gather_on_step = gather_on_step
-        #self.gather_on_epoch = gather_on_epoch
-
-        # PyTorch-Lightning's model summary does not give the 
-        # correct  number of trainable parameters; see 
-        # https://github.com/PyTorchLightning/pytorch-lightning/issues/12130
-        self.trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
-        info_message("Trainable parameters: {:,d}".format(self.trainable_params))
-       
+        self.on_task_init_end()
+
 
     def forward(self, x: Tensor) -> Tensor:
         """ Model forward pass.

From c9d103c8c9ce3e191a8b062affead4c96cdea2df Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Mon, 6 Nov 2023 22:21:30 +0000
Subject: [PATCH 17/19] .

---
 configs/SkinLesionSegmentation.yaml       | 4 ++--
 deeplightning/task/vision/segmentation.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/configs/SkinLesionSegmentation.yaml b/configs/SkinLesionSegmentation.yaml
index c5e940c..f9177b1 100755
--- a/configs/SkinLesionSegmentation.yaml
+++ b/configs/SkinLesionSegmentation.yaml
@@ -18,7 +18,7 @@ data:
     normalize: # use `deeplightning.utils.data.compute_dataset_mean_and_stdev()`
       mean: [0., 0., 0.]
       std: [1., 1., 1.]
-    resize: [32, 32]
+    resize: [128, 128]
   test_transforms:
     normalize:
       mean: [0., 0., 0.]
@@ -58,7 +58,7 @@ metrics:
   test: default
   
 train:
-  num_epochs: 2
+  num_epochs: 20
   val_every_n_epoch: 1
   grad_accum_from_epoch: 0
   grad_accum_every_n_batches: 1
diff --git a/deeplightning/task/vision/segmentation.py b/deeplightning/task/vision/segmentation.py
index 87eae9a..dd5a5bc 100755
--- a/deeplightning/task/vision/segmentation.py
+++ b/deeplightning/task/vision/segmentation.py
@@ -145,7 +145,7 @@ def validation_step(self, batch, batch_idx):
             print(batch["inputs_paths"][i])
             print(batch["masks_paths"][i])
             torch.save(obj=preds[i], f=f"/Users/pme/Downloads/segm/mask_step{self.global_step}_i{i}.pt")
-            save_image(preds[i].unsqueeze(0).float(), fp=f"/Users/pme/Downloads/segm/mask_step{self.global_step}_i{i}.jpeg")
+            save_image(preds[i].unsqueeze(0).float(), fp=f"/Users/pme/Downloads/segm/{batch['masks_paths'][i]}_pred_step{self.global_step}.png")
 
         # loss
         val_loss = self.loss(outputs, batch["masks"])

From 3d3a4b42fd487819ebee49ca507ed8d66dbca48c Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Sun, 12 Nov 2023 21:01:20 +0000
Subject: [PATCH 18/19] rename sound -> audio

---
 deeplightning/data/dataloaders/{sound => audio}/__init__.py | 0
 deeplightning/data/dataloaders/{sound => audio}/fsd.py      | 0
 deeplightning/task/{sound => audio}/__init__.py             | 0
 deeplightning/task/{sound => audio}/classification.py       | 0
 deeplightning/viz/{sound => audio}/__init__.py              | 0
 deeplightning/viz/{sound => audio}/spectrum.py              | 0
 deeplightning/viz/{sound => audio}/waveform.py              | 0
 7 files changed, 0 insertions(+), 0 deletions(-)
 rename deeplightning/data/dataloaders/{sound => audio}/__init__.py (100%)
 rename deeplightning/data/dataloaders/{sound => audio}/fsd.py (100%)
 rename deeplightning/task/{sound => audio}/__init__.py (100%)
 rename deeplightning/task/{sound => audio}/classification.py (100%)
 rename deeplightning/viz/{sound => audio}/__init__.py (100%)
 rename deeplightning/viz/{sound => audio}/spectrum.py (100%)
 rename deeplightning/viz/{sound => audio}/waveform.py (100%)

diff --git a/deeplightning/data/dataloaders/sound/__init__.py b/deeplightning/data/dataloaders/audio/__init__.py
similarity index 100%
rename from deeplightning/data/dataloaders/sound/__init__.py
rename to deeplightning/data/dataloaders/audio/__init__.py
diff --git a/deeplightning/data/dataloaders/sound/fsd.py b/deeplightning/data/dataloaders/audio/fsd.py
similarity index 100%
rename from deeplightning/data/dataloaders/sound/fsd.py
rename to deeplightning/data/dataloaders/audio/fsd.py
diff --git a/deeplightning/task/sound/__init__.py b/deeplightning/task/audio/__init__.py
similarity index 100%
rename from deeplightning/task/sound/__init__.py
rename to deeplightning/task/audio/__init__.py
diff --git a/deeplightning/task/sound/classification.py b/deeplightning/task/audio/classification.py
similarity index 100%
rename from deeplightning/task/sound/classification.py
rename to deeplightning/task/audio/classification.py
diff --git a/deeplightning/viz/sound/__init__.py b/deeplightning/viz/audio/__init__.py
similarity index 100%
rename from deeplightning/viz/sound/__init__.py
rename to deeplightning/viz/audio/__init__.py
diff --git a/deeplightning/viz/sound/spectrum.py b/deeplightning/viz/audio/spectrum.py
similarity index 100%
rename from deeplightning/viz/sound/spectrum.py
rename to deeplightning/viz/audio/spectrum.py
diff --git a/deeplightning/viz/sound/waveform.py b/deeplightning/viz/audio/waveform.py
similarity index 100%
rename from deeplightning/viz/sound/waveform.py
rename to deeplightning/viz/audio/waveform.py

From 87f87d6924a537ef035adf294a2f261c50582fd1 Mon Sep 17 00:00:00 2001
From: pme0 <12113751+pme0@users.noreply.github.com>
Date: Sun, 12 Nov 2023 21:08:00 +0000
Subject: [PATCH 19/19] rename sound -> audio

---
 examples/audio/audio_classification_cnn/generate_media.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/audio/audio_classification_cnn/generate_media.py b/examples/audio/audio_classification_cnn/generate_media.py
index b4e2bfb..18934d4 100644
--- a/examples/audio/audio_classification_cnn/generate_media.py
+++ b/examples/audio/audio_classification_cnn/generate_media.py
@@ -13,8 +13,8 @@
 sys.path.append(parent2)
 print(f"Added to system path: '{parent2}'")
 
-from deeplightning.viz.sound.waveform import waveplot
-from deeplightning.viz.sound.spectrum import spectrogram
+from deeplightning.viz.audio.waveform import waveplot
+from deeplightning.viz.audio.spectrum import spectrogram
 
 
 def parse_args():