diff --git a/backpack/utils/examples.py b/backpack/utils/examples.py index b52b8a903..3798ebde9 100644 --- a/backpack/utils/examples.py +++ b/backpack/utils/examples.py @@ -40,13 +40,14 @@ def get_mnist_dataloader(batch_size: int = 64, shuffle: bool = True) -> DataLoad def load_one_batch_mnist( - batch_size: int = 64, shuffle: bool = True + batch_size: int = 64, shuffle: bool = True, flat: bool = False ) -> Tuple[Tensor, Tensor]: """Return a single mini-batch (inputs, labels) from MNIST. Args: batch_size: Mini-batch size. Default: ``64``. shuffle: Randomly shuffle the data. Default: ``True``. + flat: Flatten chanel and returns a matrix ``[batch_size x 784]`` Returns: A single batch (inputs, labels) from MNIST. @@ -54,6 +55,9 @@ def load_one_batch_mnist( dataloader = get_mnist_dataloader(batch_size, shuffle) X, y = next(iter(dataloader)) + if flat: + X = X.reshape(X.shape[0], -1) + return X, y diff --git a/docs/examples.html b/docs/examples.html index c8d887d13..8280bff9e 100644 --- a/docs/examples.html +++ b/docs/examples.html @@ -60,8 +60,8 @@

BackPACK on a small example

Installation

For this example to run, you will need PyTorch and TorchVision (>= 1.0) -To install BackPACK, either use pip or clone the repo.

-
pip install backpack-for-pytorch
+To install BackPACK, either use pip or clone the repo.

+
pip install backpack-for-pytorch
 

An example: Diagonal GGN Preconditioner

@@ -90,20 +90,20 @@

Step 1: Libraries, MNIST, and the STEP_SIZE = 0.01 DAMPING = 1.0 MAX_ITER = 100 -torch.manual_seed(0) +torch.manual_seed(0)

Now, let’s load MNIST


-mnist_loader = torch.utils.data.dataloader.DataLoader(
-    torchvision.datasets.MNIST(
+mnist_loader = torch.utils.data.dataloader.DataLoader(
+    torchvision.datasets.MNIST(
         './data',
         train=True,
         download=True,
-        transform=torchvision.transforms.Compose([
-            torchvision.transforms.ToTensor(),
-            torchvision.transforms.Normalize(
+        transform=torchvision.transforms.Compose([
+            torchvision.transforms.ToTensor(),
+            torchvision.transforms.Normalize(
                 (0.1307,), (0.3081,)
             )
         ])),
@@ -113,32 +113,32 @@ 

Step 1: Libraries, MNIST, and the

-

We’ll create a small CNN with MaxPooling and ReLU activations, using a Sequential layer as the main model.

+

We’ll create a small CNN with MaxPooling and ReLU activations, using a Sequential layer as the main model.

-
model = torch.nn.Sequential(
-    torch.nn.Conv2d(1, 20, 5, 1),
-    torch.nn.ReLU(),
-    torch.nn.MaxPool2d(2, 2),
-    torch.nn.Conv2d(20, 50, 5, 1),
-    torch.nn.ReLU(),
-    torch.nn.MaxPool2d(2, 2),
+
model = torch.nn.Sequential(
+    torch.nn.Conv2d(1, 20, 5, 1),
+    torch.nn.ReLU(),
+    torch.nn.MaxPool2d(2, 2),
+    torch.nn.Conv2d(20, 50, 5, 1),
+    torch.nn.ReLU(),
+    torch.nn.MaxPool2d(2, 2),
     Flatten(), 
     # Pytorch <1.2 doesn't have a Flatten layer
-    torch.nn.Linear(4*4*50, 500),
-    torch.nn.ReLU(),
-    torch.nn.Linear(500, 10),
+    torch.nn.Linear(4*4*50, 500),
+    torch.nn.ReLU(),
+    torch.nn.Linear(500, 10),
 )
 
 

We will also need a loss function and a way to measure accuracy

-
loss_function = torch.nn.CrossEntropyLoss()
+
loss_function = torch.nn.CrossEntropyLoss()
 
 def get_accuracy(output, targets):
     """Helper function to print the accuracy"""
-    predictions = output.argmax(dim=1, keepdim=True).view_as(targets)
-    return predictions.eq(targets).float().mean().item()```
+    predictions = output.argmax(dim=1, keepdim=True).view_as(targets)
+    return predictions.eq(targets).float().mean().item()```
 
 
@@ -151,23 +151,23 @@

Step 2: The optimizer

-

where 𝛼 is the step-size, 𝜆 is the damping parameter, g is the gradient and G is the diagonal of the generalized Gauss-Newton (GGN). -The difficult part is computing G, but BackPACK will do this; -just like PyTorch’s autograd compute the gradient for each parameter p and store it in p.grad, BackPACK with the DiagGGNMC extension will compute (a Monte-Carlo estimate of) the diagonal of the GGN and store it in p.diag_ggn_mc. +

where 𝛼 is the step-size, 𝜆 is the damping parameter, g is the gradient and G is the diagonal of the generalized Gauss-Newton (GGN). +The difficult part is computing G, but BackPACK will do this; +just like PyTorch’s autograd compute the gradient for each parameter p and store it in p.grad, BackPACK with the DiagGGNMC extension will compute (a Monte-Carlo estimate of) the diagonal of the GGN and store it in p.diag_ggn_mc. We can now simply focus on implementing the optimizer that uses this information:

-
class DiagGGNOptimizer(torch.optim.Optimizer):
+
class DiagGGNOptimizer(torch.optim.Optimizer):
     def __init__(self, parameters, step_size, damping):
-        super().__init__(
+        super().__init__(
             parameters, 
             dict(step_size=step_size, damping=damping)
         )
 
     def step(self):
-        for group in self.param_groups:
+        for group in self.param_groups:
             for p in group["params"]:
-                step_direction = p.grad / (p.diag_ggn_mc + group["damping"])
-                p.data.add_(-group["step_size"], step_direction)
+                step_direction = p.grad / (p.diag_ggn_mc + group["damping"])
+                p.data.add_(-group["step_size"], step_direction)
         return loss
 
@@ -179,7 +179,7 @@

Step 3: Put on your BackPACK

extend(loss_function) optimizer = DiagGGNOptimizer( - model.parameters(), + model.parameters(), step_size=STEP_SIZE, damping=DAMPING ) @@ -191,7 +191,7 @@

The main loop

Traditional optimization loop: load each minibatch, compute the minibatch loss, but now call BackPACK before doing the backward pass. -The diag_ggn_mc fields of the parameters will get filled and the optimizer will run.

+The diag_ggn_mc fields of the parameters will get filled and the optimizer will run.

for batch_idx, (x, y) in enumerate(mnist_loader):
     output = model(x)
@@ -200,13 +200,13 @@ 

The main loop

with backpack(DiagGGNMC()): loss = loss_function(output, y) - loss.backward() - optimizer.step() + loss.backward() + optimizer.step() print( - "Iteration %3.d/%d " % (batch_idx, MAX_ITER) + - "Minibatch Loss %.3f " % (loss) + - "Accuracy %.0f" % (accuracy * 100) + "%" + "Iteration %3.d/%d " % (batch_idx, MAX_ITER) + + "Minibatch Loss %.3f " % (loss) + + "Accuracy %.0f" % (accuracy * 100) + "%" ) if batch_idx >= MAX_ITER: @@ -215,7 +215,7 @@

The main loop

If everything went fine, the output should look like

-
Iteration   0/100   Minibatch Loss 2.307   Accuracy 12%
+
Iteration   0/100   Minibatch Loss 2.307   Accuracy 12%
 Iteration   1/100   Minibatch Loss 2.318   Accuracy 8%
 Iteration   2/100   Minibatch Loss 2.329   Accuracy 8%
 Iteration   3/100   Minibatch Loss 2.281   Accuracy 19%
diff --git a/docs/index.html b/docs/index.html
index a5eeb25d6..c086476b7 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -101,10 +101,10 @@ 

""" from torch.nn import CrossEntropyLoss, Linear -from utils import load_mnist_data +from backpack.utils.examples import load_one_batch_mnist -X, y = load_mnist_data() +X, y = load_one_batch_mnist(flat=True) model = Linear(784, 10) lossfunc = CrossEntropyLoss() loss = lossfunc(model(X), y) @@ -127,10 +127,11 @@

and the variance with BackPACK """ from torch.nn import CrossEntropyLoss, Linear -from utils import load_mnist_data -from backpack import extend, backpack, Variance +from backpack.utils.examples import load_one_batch_mnist +from backpack import extend, backpack +from backpack.extensions import Variance -X, y = load_mnist_data() +X, y = load_one_batch_mnist(flat=True) model = extend(Linear(784, 10)) lossfunc = extend(CrossEntropyLoss()) loss = lossfunc(model(X), y) @@ -150,10 +151,11 @@

and the second moment with BackPACK """ from torch.nn import CrossEntropyLoss, Linear -from utils import load_mnist_data -from backpack import extend, backpack, SumGradSquared +from backpack.utils.examples import load_one_batch_mnist +from backpack import extend, backpack +from backpack.extensions import SumGradSquared -X, y = load_mnist_data() +X, y = load_one_batch_mnist(flat=True) model = extend(Linear(784, 10)) lossfunc = extend(CrossEntropyLoss()) loss = lossfunc(model(X), y) @@ -173,10 +175,11 @@

and the diagonal of the Gauss-Newton with BackPACK """ from torch.nn import CrossEntropyLoss, Linear -from utils import load_mnist_data -from backpack import extend, backpack, DiagGGNExact +from backpack.utils.examples import load_one_batch_mnist +from backpack import extend, backpack +from backpack.extensions import DiagGGNExact -X, y = load_mnist_data() +X, y = load_one_batch_mnist(flat=True) model = extend(Linear(784, 10)) lossfunc = extend(CrossEntropyLoss()) loss = lossfunc(model(X), y) @@ -196,10 +199,11 @@

and KFAC with BackPACK """ from torch.nn import CrossEntropyLoss, Linear -from utils import load_mnist_data -from backpack import extend, backpack, KFAC +from backpack.utils.examples import load_one_batch_mnist +from backpack import extend, backpack +from backpack.extensions import KFAC -X, y = load_mnist_data() +X, y = load_one_batch_mnist(flat=True) model = extend(Linear(784, 10)) lossfunc = extend(CrossEntropyLoss()) loss = lossfunc(model(X), y) @@ -292,14 +296,14 @@


Install with

-
pip install backpack-for-pytorch 
+
pip install backpack-for-pytorch 
 

If you use BackPACK in your research, please cite download bibtex

-
@inproceedings{dangel2020backpack,
+
@inproceedings{dangel2020backpack,
     title = {BackPACK: Packing more into Backprop},
     author = {Felix Dangel and Frederik Kunstner and Philipp Hennig},
     booktitle = {International Conference on Learning Representations},
diff --git a/docs_src/CNAME b/docs_src/CNAME
new file mode 100644
index 000000000..b172915ff
--- /dev/null
+++ b/docs_src/CNAME
@@ -0,0 +1 @@
+backpack.pt
\ No newline at end of file
diff --git a/docs_src/README.md b/docs_src/README.md
index fe8032e5b..ca7680652 100644
--- a/docs_src/README.md
+++ b/docs_src/README.md
@@ -1,25 +1,31 @@
 **Building the web version**
 
-Requirements: [Jekyll](https://jekyllrb.com/docs/installation/) and [Sphinx](https://www.sphinx-doc.org/en/1.8/usage/installation.html)
+Requirements: [Jekyll](https://jekyllrb.com/docs/installation/) and [Sphinx](https://www.sphinx-doc.org/en/1.8/usage/installation.html) 
+and installing the jekyll dependencies (`bundle install` in `docs_src/splash`)
 
-Full build to output results in `../docs`
-```
-bash buildweb.sh
-```
+- Full build to output results in `../docs`
+  ```
+  bash buildweb.sh
+  ```
 
-Local build of the Jekyll splash page 
-```
-cd splash
-bundle exec jekyll server
-```
-and go to `localhost:4000/backpack`
+- Local build of the Jekyll splash page 
+  ```
+  cd splash
+  bundle exec jekyll server
+  ```
+  and go to `localhost:4000/backpack`
+  
+  Note: The code examples on backpack.pt are defined with HTML tags in 
+  `splash/_includes/code-samples.html`. 
+  There are no python source file to generate them. 
+  Test manually by copy-pasting from the resulting page.
 
-Local build of the documentation
-```
-cd rtd
-make
-```
-and open `/docs_src/rtd_output/index.html`
+- Local build of the documentation
+  ```
+  cd rtd
+  make
+  ```
+  and open `/docs_src/rtd_output/index.html`
 
 
 
diff --git a/docs_src/buildweb.sh b/docs_src/buildweb.sh
index a9659a915..2f79ecfc2 100644
--- a/docs_src/buildweb.sh
+++ b/docs_src/buildweb.sh
@@ -2,3 +2,4 @@ cd splash
 bundle exec jekyll build -d "../../docs"
 cd ..
 touch ../docs/.nojekyll
+cp CNAME ../docs/CNAME
\ No newline at end of file
diff --git a/docs_src/splash/_includes/code-samples.html b/docs_src/splash/_includes/code-samples.html
index 7a701bce8..62ef48348 100644
--- a/docs_src/splash/_includes/code-samples.html
+++ b/docs_src/splash/_includes/code-samples.html
@@ -44,10 +44,10 @@
 
 """
 from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
+from backpack.utils.examples import load_one_batch_mnist
 
 
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
 model = Linear(784, 10)
 lossfunc = CrossEntropyLoss()
 loss = lossfunc(model(X), y)
@@ -70,10 +70,11 @@
 and the variance with BackPACK
 """
 from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
-from backpack import extend, backpack, extensions
+from backpack.utils.examples import load_one_batch_mnist
+from backpack import extend, backpack
+from backpack.extensions import Variance
 
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
 model = extend(Linear(784, 10))
 lossfunc = extend(CrossEntropyLoss())
 loss = lossfunc(model(X), y)
@@ -94,10 +95,11 @@
 and the second moment with BackPACK
 """
 from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
-from backpack import extend, backpack, SumGradSquared
+from backpack.utils.examples import load_one_batch_mnist
+from backpack import extend, backpack
+from backpack.extensions import SumGradSquared
 
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
 model = extend(Linear(784, 10))
 lossfunc = extend(CrossEntropyLoss())
 loss = lossfunc(model(X), y)
@@ -117,10 +119,11 @@
 and the diagonal of the Gauss-Newton with BackPACK
 """
 from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
-from backpack import extend, backpack, extensions
+from backpack.utils.examples import load_one_batch_mnist
+from backpack import extend, backpack
+from backpack.extensions import DiagGGNExact
 
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
 model = extend(Linear(784, 10))
 lossfunc = extend(CrossEntropyLoss())
 loss = lossfunc(model(X), y)
@@ -140,10 +143,11 @@
 and KFAC with BackPACK
 """
 from torch.nn import CrossEntropyLoss, Linear
-from utils import load_mnist_data
-from backpack import extend, backpack, extensions
+from backpack.utils.examples import load_one_batch_mnist
+from backpack import extend, backpack
+from backpack.extensions import KFAC
 
-X, y = load_mnist_data()
+X, y = load_one_batch_mnist(flat=True)
 model = extend(Linear(784, 10))
 lossfunc = extend(CrossEntropyLoss())
 loss = lossfunc(model(X), y)