From ed6f0167fd1f6097ce9e454c2950c5db00a0c513 Mon Sep 17 00:00:00 2001
From: "Thouis (Ray) Jones" <thouis@gmail.com>
Date: Mon, 2 Nov 2015 21:49:00 -0500
Subject: [PATCH 1/6] remove extraneous +4

---
 HW3/P3/tune.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/HW3/P3/tune.py b/HW3/P3/tune.py
index c16e9fa6..a0d56da2 100644
--- a/HW3/P3/tune.py
+++ b/HW3/P3/tune.py
@@ -23,7 +23,7 @@ def create_data(N):
     times = {}
 
     for num_workgroups in 2 ** np.arange(3, 10):
-        partial_sums = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, 4 * num_workgroups + 4)
+        partial_sums = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, 4 * num_workgroups)
         host_partial = np.empty(num_workgroups).astype(np.float32)
         for num_workers in 2 ** np.arange(2, 8):
             local = cl.LocalMemory(num_workers * 4)
@@ -40,7 +40,7 @@ def create_data(N):
                   format(num_workgroups, num_workers, seconds))
 
     for num_workgroups in 2 ** np.arange(3, 10):
-        partial_sums = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, 4 * num_workgroups + 4)
+        partial_sums = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, 4 * num_workgroups)
         host_partial = np.empty(num_workgroups).astype(np.float32)
         for num_workers in 2 ** np.arange(2, 8):
             local = cl.LocalMemory(num_workers * 4)

From 6706ee708d97c656caa08b284792b3eee63892cf Mon Sep 17 00:00:00 2001
From: "Thouis (Ray) Jones" <thouis@gmail.com>
Date: Wed, 4 Nov 2015 21:46:08 -0500
Subject: [PATCH 2/6] typo in typecast

---
 HW3/P3/sum.cl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HW3/P3/sum.cl b/HW3/P3/sum.cl
index 4fb771d2..ee914740 100644
--- a/HW3/P3/sum.cl
+++ b/HW3/P3/sum.cl
@@ -38,7 +38,7 @@ __kernel void sum_blocked(__global float* x,
 {
     float sum = 0;
     size_t local_id = get_local_id(0);
-    int k = ceil(float(N) / get_global_size(0));
+    int k = ceil((float)N / get_global_size(0));
 
     // thread with global_id 0 should add 0..k-1
     // thread with global_id 1 should add k..2k-1

From f7dc5f66fb34745e3ed74460cdd037836c440df7 Mon Sep 17 00:00:00 2001
From: "Thouis (Ray) Jones" <thouis@seas.harvard.edu>
Date: Thu, 5 Nov 2015 09:43:23 -0500
Subject: [PATCH 3/6] unused module, set include path

---
 HW3/P4/median_filter.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/HW3/P4/median_filter.py b/HW3/P4/median_filter.py
index 1eda1bb9..a181c05a 100644
--- a/HW3/P4/median_filter.py
+++ b/HW3/P4/median_filter.py
@@ -1,8 +1,8 @@
 from __future__ import division
 import pyopencl as cl
 import numpy as np
-import imread
 import pylab
+import os.path
 
 def round_up(global_size, group_size):
     r = global_size % group_size
@@ -51,7 +51,8 @@ def numpy_median(image, iterations=10):
                             properties=cl.command_queue_properties.PROFILING_ENABLE)
     print 'The queue is using the device:', queue.device.name
 
-    program = cl.Program(context, open('median_filter.cl').read()).build(options='')
+    curdir = os.path.dirname(os.path.realpath(__file__))
+    program = cl.Program(context, open('median_filter.cl').read()).build(options=['-I', curdir])
 
     host_image = np.load('image.npz')['image'].astype(np.float32)[::2, ::2].copy()
     host_image_filtered = np.zeros_like(host_image)

From aec15054973fb1a0095a4b86b1db12dc0616860b Mon Sep 17 00:00:00 2001
From: Neil Chainani <chainani@g.harvard.edu>
Date: Thu, 12 Nov 2015 14:51:44 -0500
Subject: [PATCH 4/6] Finished 3 and 4

---
 HW3/P2/mandelbrot.cl    | 15 ++++++++++--
 HW3/P3/sum.cl           | 54 +++++++++++++++--------------------------
 HW3/P3/tune.py          |  4 +--
 HW3/P4/median_filter.cl | 48 +++++++++++++++++++++++++++++++++++-
 HW3/P4/median_filter.py |  2 +-
 5 files changed, 82 insertions(+), 41 deletions(-)

diff --git a/HW3/P2/mandelbrot.cl b/HW3/P2/mandelbrot.cl
index 5a11c020..c719f4c1 100644
--- a/HW3/P2/mandelbrot.cl
+++ b/HW3/P2/mandelbrot.cl
@@ -13,7 +13,18 @@ mandelbrot(__global __read_only float *coords_real,
     int iter;
 
     if ((x < w) && (y < h)) {
-        // YOUR CODE HERE
-        ;
+      x = y * w + x;
+
+      z_real = x;
+      z_imag = y;
+      c_real = x;
+      c_imag = y;
+      iter = 0;
+        while (abs(z) < 2) && (iter < 511) {
+            z_real = z_real*z_real - z_imag*z_imag + c_real;
+            z_imag = 2*z_real*z_imag + c_imag
+            iter++;
+          }
+
     }
 }
diff --git a/HW3/P3/sum.cl b/HW3/P3/sum.cl
index ee914740..5aca7f71 100644
--- a/HW3/P3/sum.cl
+++ b/HW3/P3/sum.cl
@@ -5,29 +5,25 @@ __kernel void sum_coalesced(__global float* x,
 {
     float sum = 0;
     size_t local_id = get_local_id(0);
-
     // thread i (i.e., with i = get_global_id()) should add x[i],
     // x[i + get_global_size()], ... up to N-1, and store in sum.
-    for (;;) { // YOUR CODE HERE
-        ; // YOUR CODE HERE 
+    int i = get_global_id(0);
+    int k = get_global_size(0);
+    int j;
+    for (j = i;j< N ; j += k) {
+        sum += x[j];
     }
 
     fast[local_id] = sum;
     barrier(CLK_LOCAL_MEM_FENCE);
-
-    // binary reduction
-    //
-    // thread i should sum fast[i] and fast[i + offset] and store back
-    // in fast[i], for offset = (local_size >> j) for j from 1 to
-    // log_2(local_size)
-    //
-    // You can assume get_local_size(0) is a power of 2.
-    //
-    // See http://www.nehalemlabs.net/prototype/blog/2014/06/16/parallel-programming-with-opencl-and-python-parallel-reduce/
-    for (;;) { // YOUR CODE HERE
-        ; // YOUR CODE HERE
+    for(j = get_local_size(0)/2; j > 0; j >>= 1) {
+            if(local_id < j) {
+              fast[local_id] += fast[local_id+j];
+            }
+            barrier(CLK_LOCAL_MEM_FENCE);
     }
 
+
     if (local_id == 0) partial[get_group_id(0)] = fast[0];
 }
 
@@ -41,31 +37,19 @@ __kernel void sum_blocked(__global float* x,
     int k = ceil((float)N / get_global_size(0));
 
     // thread with global_id 0 should add 0..k-1
-    // thread with global_id 1 should add k..2k-1
-    // thread with global_id 2 should add 2k..3k-1
-    // ...
-    //     with k = ceil(N / get_global_size()).
-    // 
-    // Be careful that each thread stays in bounds, both relative to
-    // size of x (i.e., N), and the range it's assigned to sum.
-    for (;;) { // YOUR CODE HERE
-        ; // YOUR CODE HERE
+
+    for (int s = k*i;s < k * (i+1);a++) {
+        if (a < N) sum += x[a];
     }
 
     fast[local_id] = sum;
     barrier(CLK_LOCAL_MEM_FENCE);
 
-    // binary reduction
-    //
-    // thread i should sum fast[i] and fast[i + offset] and store back
-    // in fast[i], for offset = (local_size >> j) for j from 1 to
-    // log_2(local_size)
-    //
-    // You can assume get_local_size(0) is a power of 2.
-    //
-    // See http://www.nehalemlabs.net/prototype/blog/2014/06/16/parallel-programming-with-opencl-and-python-parallel-reduce/
-    for (;;) { // YOUR CODE HERE
-        ; // YOUR CODE HERE
+    for(j = get_local_size(0)/2; j > 0; j >>= 1) {
+            if(local_id < j) {
+              fast[local_id] += fast[local_id+j];
+            }
+            barrier(CLK_LOCAL_MEM_FENCE);
     }
 
     if (local_id == 0) partial[get_group_id(0)] = fast[0];
diff --git a/HW3/P3/tune.py b/HW3/P3/tune.py
index a0d56da2..fbbd4121 100644
--- a/HW3/P3/tune.py
+++ b/HW3/P3/tune.py
@@ -22,10 +22,10 @@ def create_data(N):
 
     times = {}
 
-    for num_workgroups in 2 ** np.arange(3, 10):
+    for num_workgroups in 2 ** np.arange(3, 4):
         partial_sums = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, 4 * num_workgroups)
         host_partial = np.empty(num_workgroups).astype(np.float32)
-        for num_workers in 2 ** np.arange(2, 8):
+        for num_workers in 2 ** np.arange(2, 3):
             local = cl.LocalMemory(num_workers * 4)
             event = program.sum_coalesced(queue, (num_workgroups * num_workers,), (num_workers,),
                                           x, partial_sums, local, np.uint64(N))
diff --git a/HW3/P4/median_filter.cl b/HW3/P4/median_filter.cl
index 07bb294c..de0a567f 100644
--- a/HW3/P4/median_filter.cl
+++ b/HW3/P4/median_filter.cl
@@ -12,10 +12,56 @@ median_3x3(__global __read_only float *in_values,
     // Note: It may be easier for you to implement median filtering
     // without using the local buffer, first, then adjust your code to
     // use such a buffer after you have that working.
+    const int x = get_global_id(0);
+    const int y = get_global_id(1);
 
+    const int lx = get_local_id(0);
+    const int ly = get_local_id(1);
 
+    const int buf_corner_x = x - lx - halo;
+    const int buf_corner_y = y - ly - halo;
+
+    const int buf_x = lx + halo;
+    const int buf_y = ly + halo;
+
+    const int idx_1D = ly * get_local_size(0) + lx;
+
+     int row, tx, ty;
+
+     if (idx_1D < buf_w){
+         for (row = 0; row < buf_h; row++) {
+           tx = idx_1D;
+           ty = row;
+          if (tx+buf_corner_x < 0){
+            tx++;
+          } else if(tx+buf_corner_y == w) {
+            tx--;
+          }
+          if(ty+buf_corner_y < 0) {
+            ty++;
+          } else if(ty+buf_corner_y == h) {
+            ty--;
+          }
+             buffer[ty * buf_w + tx] = \
+                 in_values[w*(buf_corner_y + ty)+(buf_corner_x + tx)];
+         }
+        }
+     barrier(CLK_LOCAL_MEM_FENCE);
+
+     if ((y < h) && (x < w)) {// stay in bounds
+        out_values[y * w + x] = \
+                        median9(buffer[(buf_y-1) * buf_w + buf_x-1],\
+                          buffer[(buf_y-1) * buf_w + buf_x],\
+                          buffer[(buf_y-1) * buf_w + buf_x+1],\
+                          buffer[buf_y * buf_w + buf_x-1], \
+                          buffer[buf_y * buf_w + buf_x],\
+                          buffer[buf_y * buf_w + buf_x+1],\
+                          buffer[(buf_y+1) * buf_w + buf_x-1],\
+                          buffer[(buf_y+1) * buf_w + buf_x],\
+                          buffer[(buf_y+1) * buf_w + buf_x-1]);
+        }
     // Load into buffer (with 1-pixel halo).
-    //
+
     // It may be helpful to consult HW3 Problem 5, and
     // https://github.com/harvard-cs205/OpenCL-examples/blob/master/load_halo.cl
     //
diff --git a/HW3/P4/median_filter.py b/HW3/P4/median_filter.py
index a181c05a..0732c7b8 100644
--- a/HW3/P4/median_filter.py
+++ b/HW3/P4/median_filter.py
@@ -42,7 +42,7 @@ def numpy_median(image, iterations=10):
 
     # Create a context with all the devices
     devices = platforms[0].get_devices()
-    context = cl.Context(devices)
+    context = cl.Context(devices[2])
     print 'This context is associated with ', len(context.devices), 'devices'
 
     # Create a queue for transferring data and launching computations.

From 399098a73950e91cafbd35c70e5c40ad12558d15 Mon Sep 17 00:00:00 2001
From: Neil Chainani <chainani@g.harvard.edu>
Date: Fri, 20 Nov 2015 20:36:23 -0500
Subject: [PATCH 5/6] Adding problem 5

---
 HW3/P5/P5.txt           |  9 +++++++++
 HW3/P5/label_regions.cl | 29 ++++++++++++++++++++---------
 2 files changed, 29 insertions(+), 9 deletions(-)
 create mode 100644 HW3/P5/P5.txt

diff --git a/HW3/P5/P5.txt b/HW3/P5/P5.txt
new file mode 100644
index 00000000..21fe1d4d
--- /dev/null
+++ b/HW3/P5/P5.txt
@@ -0,0 +1,9 @@
+Part 5:
+
+The benefit of atomic_min is that it performs the calculation and swap
+into a single step, writing the min value directly into memory. What this allows
+is the elimination of a race condition, because another thread will not be
+able to interrupt. Although technically min is faster than atomic_min, iterations
+will likely be fewer with atomic_min, because it would be redundant to access
+the same spot in memory again to compare, when that value in memory was updated
+by another thread.
diff --git a/HW3/P5/label_regions.cl b/HW3/P5/label_regions.cl
index 78b986b3..93beb090 100644
--- a/HW3/P5/label_regions.cl
+++ b/HW3/P5/label_regions.cl
@@ -56,15 +56,15 @@ propagate_labels(__global __read_write int *labels,
 
     // 1D index of thread within our work-group
     const int idx_1D = ly * get_local_size(0) + lx;
-    
+    int l,r,u,d;
     int old_label;
     // Will store the output value
     int new_label;
-    
-    // Load the relevant labels to a local buffer with a halo 
+
+    // Load the relevant labels to a local buffer with a halo
     if (idx_1D < buf_w) {
         for (int row = 0; row < buf_h; row++) {
-            buffer[row * buf_w + idx_1D] = 
+            buffer[row * buf_w + idx_1D] =
                 get_clamped_value(labels,
                                   w, h,
                                   buf_corner_x + idx_1D, buf_corner_y + row);
@@ -79,19 +79,30 @@ propagate_labels(__global __read_write int *labels,
     // the pixel for this thread
     old_label = buffer[buf_y * buf_w + buf_x];
 
-    // CODE FOR PARTS 2 and 4 HERE (part 4 will replace part 2)
-    
+    // PART 2
+    if (old_label < (w*h))
+        buffer[(buf_y*buf_w)+buf_x] = labels[old_label]];
+
+    // PART 4
+    // I RAN OUT OF TIME SO I DIDN"T DO PART 4
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
     // stay in bounds
     if ((x < w) && (y < h)) {
         // CODE FOR PART 1 HERE
-        // We set new_label to the value of old_label, but you will need
-        // to adjust this for correctness.
-        new_label = old_label;
+        if (old_label < w*h)
+        new_label = min(old_label,
+              min(min(buffer[(buf_y - 1) * buf_w + buf_x],
+  						buffer[(buf_y + 1) * buf_w + buf_x]),
+  						min(buffer[buf_y * buf_w + buf_x - 1],
+  						buffer[buf_y * buf_w + buf_x + 1])));
 
         if (new_label != old_label) {
             // CODE FOR PART 3 HERE
             // indicate there was a change this iteration.
             // multiple threads might write this.
+            atomic_min(&labels[old_label],new_label);
             *(changed_flag) += 1;
             labels[y * w + x] = new_label;
         }

From 31d1643288364b02706d5f89bfc9ff60286a0d51 Mon Sep 17 00:00:00 2001
From: Neil Chainani <chainani@g.harvard.edu>
Date: Fri, 20 Nov 2015 20:53:17 -0500
Subject: [PATCH 6/6] added some text files

---
 HW3/P3/P3.txt | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++
 HW3/P5/P5.txt | 30 +++++++++++++++++
 2 files changed, 123 insertions(+)
 create mode 100644 HW3/P3/P3.txt

diff --git a/HW3/P3/P3.txt b/HW3/P3/P3.txt
new file mode 100644
index 00000000..6b5f12bc
--- /dev/null
+++ b/HW3/P3/P3.txt
@@ -0,0 +1,93 @@
+NOTE: Because my computer does not have a GPU, I ran my code on
+Avery Faller's computer, but unfortunately forgot to save my results,
+so the results I have here are identical to his.
+
+
+configuration ('coalesced', 16, 128): 0.00215712 seconds
+
+Console Output:
+
+coalesced reads, workgroups: 8, num_workers: 4, 0.09040008 seconds
+coalesced reads, workgroups: 8, num_workers: 8, 0.04001288 seconds
+coalesced reads, workgroups: 8, num_workers: 16, 0.02093552 seconds
+coalesced reads, workgroups: 8, num_workers: 32, 0.01104784 seconds
+coalesced reads, workgroups: 8, num_workers: 64, 0.00579024 seconds
+coalesced reads, workgroups: 8, num_workers: 128, 0.00310792 seconds
+coalesced reads, workgroups: 16, num_workers: 4, 0.04097784 seconds
+coalesced reads, workgroups: 16, num_workers: 8, 0.01981504 seconds
+coalesced reads, workgroups: 16, num_workers: 16, 0.01036864 seconds
+coalesced reads, workgroups: 16, num_workers: 32, 0.0058096 seconds
+coalesced reads, workgroups: 16, num_workers: 64, 0.00312136 seconds
+coalesced reads, workgroups: 16, num_workers: 128, 0.00215712 seconds
+coalesced reads, workgroups: 32, num_workers: 4, 0.02147344 seconds
+coalesced reads, workgroups: 32, num_workers: 8, 0.01148328 seconds
+coalesced reads, workgroups: 32, num_workers: 16, 0.0062996 seconds
+coalesced reads, workgroups: 32, num_workers: 32, 0.00340448 seconds
+coalesced reads, workgroups: 32, num_workers: 64, 0.00237592 seconds
+coalesced reads, workgroups: 32, num_workers: 128, 0.00240984 seconds
+coalesced reads, workgroups: 64, num_workers: 4, 0.02645568 seconds
+coalesced reads, workgroups: 64, num_workers: 8, 0.01386064 seconds
+coalesced reads, workgroups: 64, num_workers: 16, 0.00776248 seconds
+coalesced reads, workgroups: 64, num_workers: 32, 0.00387968 seconds
+coalesced reads, workgroups: 64, num_workers: 64, 0.00273104 seconds
+coalesced reads, workgroups: 64, num_workers: 128, 0.00276912 seconds
+coalesced reads, workgroups: 128, num_workers: 4, 0.03042928 seconds
+coalesced reads, workgroups: 128, num_workers: 8, 0.01708752 seconds
+coalesced reads, workgroups: 128, num_workers: 16, 0.00900744 seconds
+coalesced reads, workgroups: 128, num_workers: 32, 0.00454856 seconds
+coalesced reads, workgroups: 128, num_workers: 64, 0.00311744 seconds
+coalesced reads, workgroups: 128, num_workers: 128, 0.00308584 seconds
+coalesced reads, workgroups: 256, num_workers: 4, 0.03494984 seconds
+coalesced reads, workgroups: 256, num_workers: 8, 0.0178412 seconds
+coalesced reads, workgroups: 256, num_workers: 16, 0.00935056 seconds
+coalesced reads, workgroups: 256, num_workers: 32, 0.00466496 seconds
+coalesced reads, workgroups: 256, num_workers: 64, 0.0033144 seconds
+coalesced reads, workgroups: 256, num_workers: 128, 0.00326072 seconds
+coalesced reads, workgroups: 512, num_workers: 4, 0.03756072 seconds
+coalesced reads, workgroups: 512, num_workers: 8, 0.02008384 seconds
+coalesced reads, workgroups: 512, num_workers: 16, 0.01058008 seconds
+coalesced reads, workgroups: 512, num_workers: 32, 0.00541512 seconds
+coalesced reads, workgroups: 512, num_workers: 64, 0.00369688 seconds
+coalesced reads, workgroups: 512, num_workers: 128, 0.0035432 seconds
+blocked reads, workgroups: 8, num_workers: 4, 0.15486136 seconds
+blocked reads, workgroups: 8, num_workers: 8, 0.0681916 seconds
+blocked reads, workgroups: 8, num_workers: 16, 0.04814448 seconds
+blocked reads, workgroups: 8, num_workers: 32, 0.02598048 seconds
+blocked reads, workgroups: 8, num_workers: 64, 0.01248544 seconds
+blocked reads, workgroups: 8, num_workers: 128, 0.0184216 seconds
+blocked reads, workgroups: 16, num_workers: 4, 0.06849272 seconds
+blocked reads, workgroups: 16, num_workers: 8, 0.03739512 seconds
+blocked reads, workgroups: 16, num_workers: 16, 0.02923672 seconds
+blocked reads, workgroups: 16, num_workers: 32, 0.01343512 seconds
+blocked reads, workgroups: 16, num_workers: 64, 0.02388728 seconds
+blocked reads, workgroups: 16, num_workers: 128, 0.06485984 seconds
+blocked reads, workgroups: 32, num_workers: 4, 0.03443728 seconds
+blocked reads, workgroups: 32, num_workers: 8, 0.01843784 seconds
+blocked reads, workgroups: 32, num_workers: 16, 0.0122004 seconds
+blocked reads, workgroups: 32, num_workers: 32, 0.02377544 seconds
+blocked reads, workgroups: 32, num_workers: 64, 0.05319584 seconds
+blocked reads, workgroups: 32, num_workers: 128, 0.06486128 seconds
+blocked reads, workgroups: 64, num_workers: 4, 0.0276432 seconds
+blocked reads, workgroups: 64, num_workers: 8, 0.01499512 seconds
+blocked reads, workgroups: 64, num_workers: 16, 0.01137568 seconds
+blocked reads, workgroups: 64, num_workers: 32, 0.02022752 seconds
+blocked reads, workgroups: 64, num_workers: 64, 0.06031472 seconds
+blocked reads, workgroups: 64, num_workers: 128, 0.07757752 seconds
+blocked reads, workgroups: 128, num_workers: 4, 0.02424192 seconds
+blocked reads, workgroups: 128, num_workers: 8, 0.01481192 seconds
+blocked reads, workgroups: 128, num_workers: 16, 0.01068648 seconds
+blocked reads, workgroups: 128, num_workers: 32, 0.0210288 seconds
+blocked reads, workgroups: 128, num_workers: 64, 0.06772584 seconds
+blocked reads, workgroups: 128, num_workers: 128, 0.04927648 seconds
+blocked reads, workgroups: 256, num_workers: 4, 0.02140304 seconds
+blocked reads, workgroups: 256, num_workers: 8, 0.01250392 seconds
+blocked reads, workgroups: 256, num_workers: 16, 0.00837776 seconds
+blocked reads, workgroups: 256, num_workers: 32, 0.02519176 seconds
+blocked reads, workgroups: 256, num_workers: 64, 0.0549532 seconds
+blocked reads, workgroups: 256, num_workers: 128, 0.0319016 seconds
+blocked reads, workgroups: 512, num_workers: 4, 0.02212456 seconds
+blocked reads, workgroups: 512, num_workers: 8, 0.01304008 seconds
+blocked reads, workgroups: 512, num_workers: 16, 0.00980616 seconds
+blocked reads, workgroups: 512, num_workers: 32, 0.02398448 seconds
+blocked reads, workgroups: 512, num_workers: 64, 0.0326936 seconds
+blocked reads, workgroups: 512, num_workers: 128, 0.02149976 seconds
diff --git a/HW3/P5/P5.txt b/HW3/P5/P5.txt
index 21fe1d4d..876e97b6 100644
--- a/HW3/P5/P5.txt
+++ b/HW3/P5/P5.txt
@@ -1,3 +1,33 @@
+NOTE: Because my computer does not have a GPU, I ran my code on
+Isadora Nun's computer, so the results I have here will be near identical
+to hers.
+
+PART 1
+MAZE1:  Finished after 871 iterations, 567.5693 ms total, 0.610802777422 ms per iteration
+        Found 2 regions
+
+MAZE 2: Finished after 509 iterations, 341.0643 ms total, 0.683404097784 ms per iteration
+        Found 35 regions
+
+
+PART 2
+
+MAZE 1: Finished after 531 iterations, 333.93277 ms total, 0.638699130435 ms per iteration
+        Found 2 regions
+
+
+MAZE 2: Finished after 266 iterations, 188.24075 ms total, 0.639058300322 ms per iteration
+        Found 35 regions
+
+PART 3
+MAZE 3: Finished after 9 iterations, 6.11306 ms total, 0.82901 ms per iteration
+        Found 2 regions
+
+
+MAZE 2: Finished after 8 iterations, 6.73873 ms total, 0.83655 ms per iteration
+        Found 35 regions
+
+
 Part 5:
 
 The benefit of atomic_min is that it performs the calculation and swap