diff --git a/.gitignore b/.gitignore
index f891e40e..fc3baf17 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+.idea
 code/*.pyc
 code/*_plots
 code/tmp*
@@ -13,3 +14,5 @@ html
 *.pyc
 *~
 *.swp
+# This directory may be created by scripts from segmentation tutorials.
+save_models
diff --git a/.jenkins/jenkins_buildbot_dlt.sh b/.jenkins/jenkins_buildbot_dlt.sh
new file mode 100755
index 00000000..fadd9f9d
--- /dev/null
+++ b/.jenkins/jenkins_buildbot_dlt.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+
+# CUDA
+export PATH=/usr/local/cuda/bin:$PATH
+export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+export LIBRARY_PATH=/usr/local/cuda/lib64:$LIBRARY_PATH
+
+# MKL
+export MKL_THREADING_LAYER=GNU
+
+# Set OpenMP threads for stability of speedtests
+export OMP_NUM_THREADS=1
+
+BUILDBOT_DIR=$WORKSPACE/nightly_build
+
+mkdir -p ${BUILDBOT_DIR}
+
+date
+COMPILEDIR=$HOME/.theano/lisa_theano_buildbot_deeplearning
+NOSETESTS=${BUILDBOT_DIR}/Theano/bin/theano-nose
+XUNIT="--with-xunit --xunit-file="
+# name test suites
+SUITE="--xunit-testsuite-name="
+
+FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR}
+export PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/Pylearn:$PYTHONPATH
+
+# Install libgpuarray and pygpu
+cd ${BUILDBOT_DIR}
+
+# Make fresh clone (with no history since we don't need it)
+rm -rf libgpuarray
+git clone "https://github.com/Theano/libgpuarray.git"
+
+(cd libgpuarray && echo "libgpuarray commit" && git rev-parse HEAD)
+
+# Clean up previous installs (to make sure no old files are left)
+rm -rf local
+mkdir local
+
+# Build libgpuarray and run C tests
+mkdir libgpuarray/build
+(cd libgpuarray/build && cmake .. -DCMAKE_BUILD_TYPE=${GPUARRAY_CONFIG} -DCMAKE_INSTALL_PREFIX=${BUILDBOT_DIR}/local && make)
+
+# Finally install
+(cd libgpuarray/build && make install)
+export LD_LIBRARY_PATH=${BUILDBOT_DIR}/local/lib:${LD_LIBRARY_PATH}
+export LIBRARY_PATH=${BUILDBOT_DIR}/local/lib:${LIBRARY_PATH}
+export CPATH=${BUILDBOT_DIR}/local/include:${CPATH}
+
+# Build the pygpu modules
+(cd libgpuarray && python setup.py build_ext --inplace -I${BUILDBOT_DIR}/local/include -L${BUILDBOT_DIR}/local/lib)
+
+mkdir ${BUILDBOT_DIR}/local/lib/python
+export PYTHONPATH=${PYTHONPATH}:${BUILDBOT_DIR}/local/lib/python
+# Then install
+(cd libgpuarray && python setup.py install --home=${BUILDBOT_DIR}/local)
+
+# Install Theano
+cd ${BUILDBOT_DIR}
+if [ ! -d ${BUILDBOT_DIR}/Theano ]; then
+  git clone git://github.com/Theano/Theano.git
+fi
+# update repo
+cd ${BUILDBOT_DIR}/Theano; git pull
+
+cd ${WORKSPACE}/data
+./download.sh
+
+cd ${BUILDBOT_DIR}/Theano
+echo "git version for Theano:" `git rev-parse HEAD`
+cd ${WORKSPACE}/code
+echo "git version:" `git rev-parse HEAD`
+
+echo "==== Executing nosetests speed with mode=FAST_RUN"
+NAME=dlt_speed
+FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
+THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} ${SUITE}${NAME} test.py:speed
+
+echo "==== Executing nosetests with mode=FAST_RUN,floatX=float32"
+NAME=dlt_float32
+FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
+THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE} ${SUITE}${NAME}
+
+echo "==== Executing nosetests with mode=FAST_RUN,floatX=float32,device=cuda"
+NAME=dlt_float32_cuda
+FILE=${BUILDBOT_DIR}/${NAME}_tests.xml
+PYTHONPATH=${BUILDBOT_DIR}/Theano:${BUILDBOT_DIR}/DeepLearningTutorials/code:${PYTHONPATH} THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32,device=cuda nosetests test.py ${XUNIT}${FILE} ${SUITE}${NAME}
diff --git a/.travis.yml b/.travis.yml
index ae3801c2..ad729ced 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,8 +4,8 @@ sudo: false
 
 language: python
 #python:
-#  - "2.7"
-#  - "3.2"
+#  - "2.6"
+#  - "3.3"
 # command to install dependencies
 before_install:
   - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
@@ -23,7 +23,7 @@ env:
   - PART="test.py:test_logistic_sgd test.py:test_logistic_cg test.py:test_mlp test.py:test_convolutional_mlp test.py:test_dA"
   - PART="test.py:test_SdA test.py:test_lstm"
   - PART="test.py:test_dbn"
-  - PART="test.py:test_rbm test.py:test_rnnrbm"
+  - PART="test.py:test_rbm test.py:test_rnnrbm test.py:test_rnnslu"
   - PART="-e test.py"
 
 #i7-2600K CPU @ 3.40GHz
@@ -78,6 +78,7 @@ script:
   - pwd
   - ls
   - export THEANO_FLAGS=warn.ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise
+  - export MKL_THREADING_LAYER=GNU
   - python --version
   - nosetests -v $PART
 
diff --git a/README.rst b/README.rst
index 85de179c..81252fc0 100644
--- a/README.rst
+++ b/README.rst
@@ -37,4 +37,4 @@ Subdirectories:
 Build instructions
 ------------------
 
-To build the html version of the tutorials, install sphinx and run doc/Makefile
+To build the html version of the tutorials, run python doc/scripts/docgen.py
diff --git a/code/DBN.py b/code/DBN.py
index b54ac5bc..e1bb66df 100644
--- a/code/DBN.py
+++ b/code/DBN.py
@@ -1,5 +1,6 @@
 """
 """
+from __future__ import print_function, division
 import os
 import sys
 import timeit
@@ -61,9 +62,12 @@ def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
             theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 30))
 
         # allocate symbolic variables for the data
-        self.x = T.matrix('x')  # the data is presented as rasterized images
-        self.y = T.ivector('y')  # the labels are presented as 1D vector
-                                 # of [int] labels
+
+        # the data is presented as rasterized images
+        self.x = T.matrix('x')
+
+        # the labels are presented as 1D vector of [int] labels
+        self.y = T.ivector('y')
         # end-snippet-1
         # The DBN is an MLP, for which all weights of intermediate
         # layers are shared with a different RBM.  We will first
@@ -75,7 +79,7 @@ def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
         # training the DBN by doing stochastic gradient descent on the
         # MLP.
 
-        for i in xrange(self.n_layers):
+        for i in range(self.n_layers):
             # construct the sigmoidal layer
 
             # the size of the input is either the number of hidden
@@ -156,8 +160,6 @@ def pretraining_functions(self, train_set_x, batch_size, k):
         index = T.lscalar('index')  # index to a minibatch
         learning_rate = T.scalar('lr')  # learning rate to use
 
-        # number of batches
-        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
         # begining of a batch, given `index`
         batch_begin = index * batch_size
         # ending of a batch given `index`
@@ -174,7 +176,7 @@ def pretraining_functions(self, train_set_x, batch_size, k):
 
             # compile the theano function
             fn = theano.function(
-                inputs=[index, theano.Param(learning_rate, default=0.1)],
+                inputs=[index, theano.In(learning_rate, value=0.1)],
                 outputs=cost,
                 updates=updates,
                 givens={
@@ -211,9 +213,9 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate):
 
         # compute number of minibatches for training, validation and testing
         n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
-        n_valid_batches /= batch_size
+        n_valid_batches //= batch_size
         n_test_batches = test_set_x.get_value(borrow=True).shape[0]
-        n_test_batches /= batch_size
+        n_test_batches //= batch_size
 
         index = T.lscalar('index')  # index to a [mini]batch
 
@@ -267,11 +269,11 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate):
 
         # Create a function that scans the entire validation set
         def valid_score():
-            return [valid_score_i(i) for i in xrange(n_valid_batches)]
+            return [valid_score_i(i) for i in range(n_valid_batches)]
 
         # Create a function that scans the entire test set
         def test_score():
-            return [test_score_i(i) for i in xrange(n_test_batches)]
+            return [test_score_i(i) for i in range(n_test_batches)]
 
         return train_fn, valid_score, test_score
 
@@ -307,11 +309,11 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
     test_set_x, test_set_y = datasets[2]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
 
     # numpy random generator
     numpy_rng = numpy.random.RandomState(123)
-    print '... building the model'
+    print('... building the model')
     # construct the Deep Belief Network
     dbn = DBN(numpy_rng=numpy_rng, n_ins=28 * 28,
               hidden_layers_sizes=[1000, 1000, 1000],
@@ -321,54 +323,56 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
     #########################
     # PRETRAINING THE MODEL #
     #########################
-    print '... getting the pretraining functions'
+    print('... getting the pretraining functions')
     pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                 batch_size=batch_size,
                                                 k=k)
 
-    print '... pre-training the model'
+    print('... pre-training the model')
     start_time = timeit.default_timer()
-    ## Pre-train layer-wise
-    for i in xrange(dbn.n_layers):
+    # Pre-train layer-wise
+    for i in range(dbn.n_layers):
         # go through pretraining epochs
-        for epoch in xrange(pretraining_epochs):
+        for epoch in range(pretraining_epochs):
             # go through the training set
             c = []
-            for batch_index in xrange(n_train_batches):
+            for batch_index in range(n_train_batches):
                 c.append(pretraining_fns[i](index=batch_index,
                                             lr=pretrain_lr))
-            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
-            print numpy.mean(c)
+            print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ')
+            print(numpy.mean(c, dtype='float64'))
 
     end_time = timeit.default_timer()
     # end-snippet-2
-    print >> sys.stderr, ('The pretraining code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print('The pretraining code for file ' + os.path.split(__file__)[1] +
+          ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr)
     ########################
     # FINETUNING THE MODEL #
     ########################
 
     # get the training, validation and testing function for the model
-    print '... getting the finetuning functions'
+    print('... getting the finetuning functions')
     train_fn, validate_model, test_model = dbn.build_finetune_functions(
         datasets=datasets,
         batch_size=batch_size,
         learning_rate=finetune_lr
     )
 
-    print '... finetuning the model'
+    print('... finetuning the model')
     # early-stopping parameters
-    patience = 4 * n_train_batches  # look as this many examples regardless
-    patience_increase = 2.    # wait this much longer when a new best is
-                              # found
-    improvement_threshold = 0.995  # a relative improvement of this much is
-                                   # considered significant
+
+    # look as this many examples regardless
+    patience = 4 * n_train_batches
+
+    # wait this much longer when a new best is found
+    patience_increase = 2.
+
+    # a relative improvement of this much is considered significant
+    improvement_threshold = 0.995
+
+    # go through this many minibatches before checking the network on
+    # the validation set; in this case we check every epoch
     validation_frequency = min(n_train_batches, patience / 2)
-                                  # go through this many
-                                  # minibatches before checking the network
-                                  # on the validation set; in this case we
-                                  # check every epoch
 
     best_validation_loss = numpy.inf
     test_score = 0.
@@ -379,33 +383,29 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
 
     while (epoch < training_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
-            minibatch_avg_cost = train_fn(minibatch_index)
+            train_fn(minibatch_index)
             iter = (epoch - 1) * n_train_batches + minibatch_index
 
             if (iter + 1) % validation_frequency == 0:
 
                 validation_losses = validate_model()
-                this_validation_loss = numpy.mean(validation_losses)
-                print(
-                    'epoch %i, minibatch %i/%i, validation error %f %%'
-                    % (
-                        epoch,
-                        minibatch_index + 1,
-                        n_train_batches,
-                        this_validation_loss * 100.
+                this_validation_loss = numpy.mean(validation_losses, dtype='float64')
+                print('epoch %i, minibatch %i/%i, validation error %f %%' % (
+                    epoch,
+                    minibatch_index + 1,
+                    n_train_batches,
+                    this_validation_loss * 100.
                     )
                 )
 
                 # if we got the best validation score until now
                 if this_validation_loss < best_validation_loss:
 
-                    #improve patience if loss improvement is good enough
-                    if (
-                        this_validation_loss < best_validation_loss *
-                        improvement_threshold
-                    ):
+                    # improve patience if loss improvement is good enough
+                    if (this_validation_loss < best_validation_loss *
+                            improvement_threshold):
                         patience = max(patience, iter * patience_increase)
 
                     # save best validation score and iteration number
@@ -414,28 +414,23 @@ def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
 
                     # test it on the test set
                     test_losses = test_model()
-                    test_score = numpy.mean(test_losses)
+                    test_score = numpy.mean(test_losses, dtype='float64')
                     print(('     epoch %i, minibatch %i/%i, test error of '
                            'best model %f %%') %
                           (epoch, minibatch_index + 1, n_train_batches,
-                           test_score * 100.))
+                          test_score * 100.))
 
             if patience <= iter:
                 done_looping = True
                 break
 
     end_time = timeit.default_timer()
-    print(
-        (
-            'Optimization complete with best validation score of %f %%, '
-            'obtained at iteration %i, '
-            'with test performance %f %%'
-        ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
-    )
-    print >> sys.stderr, ('The fine tuning code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time)
-                                              / 60.))
+    print(('Optimization complete with best validation score of %f %%, '
+           'obtained at iteration %i, '
+           'with test performance %f %%'
+           ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.))
+    print('The fine tuning code for file ' + os.path.split(__file__)[1] +
+          ' ran for %.2fm' % ((end_time - start_time) / 60.), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/code/SdA.py b/code/SdA.py
index 82660e99..8da74797 100644
--- a/code/SdA.py
+++ b/code/SdA.py
@@ -29,6 +29,9 @@
    Systems 19, 2007
 
 """
+
+from __future__ import print_function
+
 import os
 import sys
 import timeit
@@ -37,7 +40,7 @@
 
 import theano
 import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 
 from logistic_sgd import LogisticRegression, load_data
 from mlp import HiddenLayer
@@ -78,8 +81,8 @@ def __init__(
         :type n_ins: int
         :param n_ins: dimension of the input to the sdA
 
-        :type n_layers_sizes: list of ints
-        :param n_layers_sizes: intermediate layers size, must contain
+        :type hidden_layers_sizes: list of ints
+        :param hidden_layers_sizes: intermediate layers size, must contain
                                at least one value
 
         :type n_outs: int
@@ -116,7 +119,7 @@ def __init__(
         # stochastich gradient descent on the MLP
 
         # start-snippet-2
-        for i in xrange(self.n_layers):
+        for i in range(self.n_layers):
             # construct the sigmoidal layer
 
             # the size of the input is either the number of hidden units of
@@ -214,8 +217,8 @@ def pretraining_functions(self, train_set_x, batch_size):
             fn = theano.function(
                 inputs=[
                     index,
-                    theano.Param(corruption_level, default=0.2),
-                    theano.Param(learning_rate, default=0.1)
+                    theano.In(corruption_level, value=0.2),
+                    theano.In(learning_rate, value=0.1)
                 ],
                 outputs=cost,
                 updates=updates,
@@ -254,9 +257,9 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate):
 
         # compute number of minibatches for training, validation and testing
         n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
-        n_valid_batches /= batch_size
+        n_valid_batches //= batch_size
         n_test_batches = test_set_x.get_value(borrow=True).shape[0]
-        n_test_batches /= batch_size
+        n_test_batches //= batch_size
 
         index = T.lscalar('index')  # index to a [mini]batch
 
@@ -314,11 +317,11 @@ def build_finetune_functions(self, datasets, batch_size, learning_rate):
 
         # Create a function that scans the entire validation set
         def valid_score():
-            return [valid_score_i(i) for i in xrange(n_valid_batches)]
+            return [valid_score_i(i) for i in range(n_valid_batches)]
 
         # Create a function that scans the entire test set
         def test_score():
-            return [test_score_i(i) for i in xrange(n_test_batches)]
+            return [test_score_i(i) for i in range(n_test_batches)]
 
         return train_fn, valid_score, test_score
 
@@ -357,12 +360,12 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
 
     # compute number of minibatches for training, validation and testing
     n_train_batches = train_set_x.get_value(borrow=True).shape[0]
-    n_train_batches /= batch_size
+    n_train_batches //= batch_size
 
     # numpy random generator
     # start-snippet-3
     numpy_rng = numpy.random.RandomState(89677)
-    print '... building the model'
+    print('... building the model')
     # construct the stacked denoising autoencoder class
     sda = SdA(
         numpy_rng=numpy_rng,
@@ -374,52 +377,51 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
     #########################
     # PRETRAINING THE MODEL #
     #########################
-    print '... getting the pretraining functions'
+    print('... getting the pretraining functions')
     pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                 batch_size=batch_size)
 
-    print '... pre-training the model'
+    print('... pre-training the model')
     start_time = timeit.default_timer()
     ## Pre-train layer-wise
     corruption_levels = [.1, .2, .3]
-    for i in xrange(sda.n_layers):
+    for i in range(sda.n_layers):
         # go through pretraining epochs
-        for epoch in xrange(pretraining_epochs):
+        for epoch in range(pretraining_epochs):
             # go through the training set
             c = []
-            for batch_index in xrange(n_train_batches):
+            for batch_index in range(n_train_batches):
                 c.append(pretraining_fns[i](index=batch_index,
                          corruption=corruption_levels[i],
                          lr=pretrain_lr))
-            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
-            print numpy.mean(c)
+            print('Pre-training layer %i, epoch %d, cost %f' % (i, epoch, numpy.mean(c, dtype='float64')))
 
     end_time = timeit.default_timer()
 
-    print >> sys.stderr, ('The pretraining code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print(('The pretraining code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
     # end-snippet-4
     ########################
     # FINETUNING THE MODEL #
     ########################
 
     # get the training, validation and testing function for the model
-    print '... getting the finetuning functions'
+    print('... getting the finetuning functions')
     train_fn, validate_model, test_model = sda.build_finetune_functions(
         datasets=datasets,
         batch_size=batch_size,
         learning_rate=finetune_lr
     )
 
-    print '... finetunning the model'
+    print('... finetunning the model')
     # early-stopping parameters
     patience = 10 * n_train_batches  # look as this many examples regardless
     patience_increase = 2.  # wait this much longer when a new best is
                             # found
     improvement_threshold = 0.995  # a relative improvement of this much is
                                    # considered significant
-    validation_frequency = min(n_train_batches, patience / 2)
+    validation_frequency = min(n_train_batches, patience // 2)
                                   # go through this many
                                   # minibatche before checking the network
                                   # on the validation set; in this case we
@@ -434,13 +436,13 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
 
     while (epoch < training_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
             minibatch_avg_cost = train_fn(minibatch_index)
             iter = (epoch - 1) * n_train_batches + minibatch_index
 
             if (iter + 1) % validation_frequency == 0:
                 validation_losses = validate_model()
-                this_validation_loss = numpy.mean(validation_losses)
+                this_validation_loss = numpy.mean(validation_losses, dtype='float64')
                 print('epoch %i, minibatch %i/%i, validation error %f %%' %
                       (epoch, minibatch_index + 1, n_train_batches,
                        this_validation_loss * 100.))
@@ -461,7 +463,7 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
 
                     # test it on the test set
                     test_losses = test_model()
-                    test_score = numpy.mean(test_losses)
+                    test_score = numpy.mean(test_losses, dtype='float64')
                     print(('     epoch %i, minibatch %i/%i, test error of '
                            'best model %f %%') %
                           (epoch, minibatch_index + 1, n_train_batches,
@@ -480,9 +482,9 @@ def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
         )
         % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
     )
-    print >> sys.stderr, ('The training code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print(('The training code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/code/cA.py b/code/cA.py
index e26a1ddf..8dc5d8b6 100644
--- a/code/cA.py
+++ b/code/cA.py
@@ -28,6 +28,9 @@
    Systems 19, 2007
 
 """
+
+from __future__ import print_function
+
 import os
 import sys
 import timeit
@@ -205,7 +208,7 @@ def get_cost_updates(self, contraction_level, learning_rate):
                              axis=1)
 
         # Compute the jacobian and average over the number of samples/minibatch
-        self.L_jacob = T.sum(J ** 2) / self.n_batchsize
+        self.L_jacob = T.sum(J ** 2) // self.n_batchsize
 
         # note : L is now a vector, where each element is the
         #        cross-entropy cost of the reconstruction of the
@@ -246,7 +249,7 @@ def test_cA(learning_rate=0.01, training_epochs=20,
     train_set_x, train_set_y = datasets[0]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
 
     # allocate symbolic variables for the data
     index = T.lscalar()    # index to a [mini]batch
@@ -283,22 +286,22 @@ def test_cA(learning_rate=0.01, training_epochs=20,
     ############
 
     # go through training epochs
-    for epoch in xrange(training_epochs):
+    for epoch in range(training_epochs):
         # go through trainng set
         c = []
-        for batch_index in xrange(n_train_batches):
+        for batch_index in range(n_train_batches):
             c.append(train_ca(batch_index))
 
         c_array = numpy.vstack(c)
-        print 'Training epoch %d, reconstruction cost ' % epoch, numpy.mean(
-            c_array[0]), ' jacobian norm ', numpy.mean(numpy.sqrt(c_array[1]))
+        print('Training epoch %d, reconstruction cost ' % epoch, numpy.mean(
+            c_array[0]), ' jacobian norm ', numpy.mean(numpy.sqrt(c_array[1])))
 
     end_time = timeit.default_timer()
 
     training_time = (end_time - start_time)
 
-    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((training_time) / 60.))
+    print(('The code for file ' + os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((training_time) / 60.)), file=sys.stderr)
     image = Image.fromarray(tile_raster_images(
         X=ca.W.get_value(borrow=True).T,
         img_shape=(28, 28), tile_shape=(10, 10),
diff --git a/code/cnn_1D_segm/data_loader/__init__.py b/code/cnn_1D_segm/data_loader/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/code/cnn_1D_segm/data_loader/cortical_layers.py b/code/cnn_1D_segm/data_loader/cortical_layers.py
new file mode 100644
index 00000000..a0b3a2f9
--- /dev/null
+++ b/code/cnn_1D_segm/data_loader/cortical_layers.py
@@ -0,0 +1,185 @@
+import os
+import time
+
+import numpy as np
+from PIL import Image
+import re
+import warnings
+
+from dataset_loaders.parallel_loader import ThreadedDataset
+from parallel_loader_1D import ThreadedDataset_1D
+
+floatX = 'float32'
+
+class Cortical6LayersDataset(ThreadedDataset_1D):
+    '''The Cortical Layers Dataset.
+    Parameters
+    ----------
+    which_set: string
+        A string in ['train', 'val', 'valid', 'test'], corresponding to
+        the set to be returned.
+    split: float
+        A float indicating the dataset split between training and validation.
+        For example, if split=0.85, 85\% of the images will be used for training,
+        whereas 15\% will be used for validation.
+    '''
+    name = 'cortical_layers'
+
+    non_void_nclasses = 7
+    GTclasses = [0, 1, 2, 3, 4, 5, 6]
+    _cmap = {
+        0: (128, 128, 128),    # padding
+        1: (128, 0, 0),        # layer 1
+        2: (128, 64, ),        # layer 2
+        3: (128, 64, 128),     # layer 3
+        4: (0, 0, 128),        # layer 4
+        5: (0, 0, 64),         # layer 5
+        6: (64, 64, 128),      # layer 6
+    }
+    _mask_labels = {0: 'padding', 1: 'layers1', 2: 'layer2', 3: 'layer3',
+                    4: 'layer4', 5: 'layer5',   6: 'layer6'}
+    _void_labels = []
+
+
+    _filenames = None
+
+    @property
+    def filenames(self):
+
+        if self._filenames is None:
+            # Load filenames
+            nfiles = sum(1 for line in open(self.mask_path))
+            filenames = range(nfiles)
+            np.random.seed(1609)
+            np.random.shuffle(filenames)
+
+            if self.which_set == 'train':
+                filenames = filenames[:int(nfiles*self.split)]
+            elif self.which_set == 'val':
+                filenames = filenames[-(nfiles - int(nfiles*self.split)):]
+
+            # Save the filenames list
+            self._filenames = filenames
+
+        return self._filenames
+
+    def __init__(self,
+                 which_set="train",
+                 split=0.85,
+                 shuffle_at_each_epoch = True,
+                 smooth_or_raw = 'both',
+                 *args, **kwargs):
+
+        self.task = 'segmentation'
+
+        self.n_layers = 6
+        n_layers_path = str(self.n_layers)+"layers_segmentation"
+
+        self.which_set = "val" if which_set == "valid" else which_set
+        if self.which_set not in ("train", "val", 'test'):
+            raise ValueError("Unknown argument to which_set %s" %
+                             self.which_set)
+
+        self.split = split
+
+        self.image_path_raw =  os.path.join(self.path,n_layers_path,"training_raw.txt")
+        self.image_path_smooth =  os.path.join(self.path,n_layers_path, "training_geo.txt")
+        self.mask_path = os.path.join(self.path,n_layers_path, "training_cls.txt")
+        self.regions_path = os.path.join(self.path, n_layers_path, "training_regions.txt")
+
+        self.smooth_raw_both = smooth_or_raw
+
+        if smooth_or_raw == 'both':
+            self.data_shape = (200,2)
+        else :
+            self.data_shape = (200,1)
+
+        super(Cortical6LayersDataset, self).__init__(*args, **kwargs)
+
+    def get_names(self):
+        """Return a dict of names, per prefix/subset."""
+
+        return {'default': self.filenames}
+
+
+
+def test_6layers():
+    train_iter = Cortical6LayersDataset(
+        which_set='train',
+        smooth_or_raw = 'both',
+        batch_size=500,
+        data_augm_kwargs={},
+        return_one_hot=False,
+        return_01c=False,
+        return_list=True,
+        use_threads=False)
+
+    valid_iter = Cortical6LayersDataset(
+        which_set='valid',
+        smooth_or_raw = 'smooth',
+        batch_size=500,
+        data_augm_kwargs={},
+        return_one_hot=False,
+        return_01c=False,
+        return_list=True,
+        use_threads=False)
+
+    valid_iter2 = Cortical6LayersDataset(
+        which_set='valid',
+        smooth_or_raw = 'raw',
+        batch_size=500,
+        data_augm_kwargs={},
+        return_one_hot=False,
+        return_01c=False,
+        return_list=True,
+        use_threads=False)
+
+
+
+    train_nsamples = train_iter.nsamples
+    train_nbatches = train_iter.nbatches
+    valid_nbatches = valid_iter.nbatches
+    valid_nbatches2 = valid_iter2.nbatches
+
+
+
+    # Simulate training
+    max_epochs = 1
+    print "Simulate training for", str(max_epochs), "epochs"
+    start_training = time.time()
+    for epoch in range(max_epochs):
+        print "Epoch #", str(epoch)
+
+        start_epoch = time.time()
+
+        print "Iterate on the training set", train_nbatches, "minibatches"
+        for mb in range(train_nbatches):
+            start_batch = time.time()
+            batch = train_iter.next()
+            if mb%5 ==0:
+                print("Minibatch train {}: {} sec".format(mb, (time.time() -
+                                                     start_batch)))
+
+        print "Iterate on the validation set", valid_nbatches, "minibatches"
+        for mb in range(valid_nbatches):
+            start_batch = time.time()
+            batch = valid_iter.next()
+            if mb%5 ==0:
+                print("Minibatch valid {}: {} sec".format(mb, (time.time() -
+                                                     start_batch)))
+
+        print "Iterate on the validation set (second time)", valid_nbatches2, "minibatches"
+        for mb in range(valid_nbatches2):
+            start_batch = time.time()
+            batch = valid_iter2.next()
+            if mb%5==0:
+                print("Minibatch valid {}: {} sec".format(mb, (time.time() -
+                                                     start_batch)))
+
+        print("Epoch time: %s" % str(time.time() - start_epoch))
+    print("Training time: %s" % str(time.time() - start_training))
+
+if __name__ == '__main__':
+    print "Loading the dataset 1 batch at a time"
+    test_6layers()
+    print "Success!"
diff --git a/code/cnn_1D_segm/data_loader/parallel_loader_1D.py b/code/cnn_1D_segm/data_loader/parallel_loader_1D.py
new file mode 100644
index 00000000..272c8d00
--- /dev/null
+++ b/code/cnn_1D_segm/data_loader/parallel_loader_1D.py
@@ -0,0 +1,619 @@
+import ConfigParser
+import os
+from os.path import realpath
+try:
+    import Queue
+except ImportError:
+    import queue as Queue
+import shutil
+import sys
+from threading import Thread
+from time import sleep
+import weakref
+
+import re
+import numpy as np
+from numpy.random import RandomState
+from dataset_loaders.data_augmentation import random_transform
+from dataset_loaders.parallel_loader import ThreadedDataset
+
+import dataset_loaders
+from dataset_loaders.utils_parallel_loader import classproperty, grouper, overlap_grouper
+from dataset_loaders.parallel_loader import threaded_fetch
+
+floatX = 'float32'
+
+class ThreadedDataset_1D(ThreadedDataset):
+    _wait_time = 0.05
+    __version__ = '1'
+    """
+    Threaded dataset.
+    This is an abstract class and should not be used as is. Each
+    specific dataset class should implement its `get_names` and
+    `load_sequence` functions to load the list of filenames to be
+    loaded and define how to load the data from the dataset,
+    respectively.
+    See `example_dataset.py` for an example on how to implement a
+    specific instance of a dataset.
+    Parameters
+    ----------
+    seq_per_subset: int
+        The *maximum* number of sequences per each subset (a.k.a. prefix
+        or video). If 0, all sequences will be used. If greater than 0
+        and `shuffle_at_each_epoch` is True, at each epoch a new
+        selection of sequences per subset will be randomly picked. Default: 0.
+    seq_length: int
+        The number of frames per sequence. If 0, 4D arrays will be
+        returned (not a sequence), else 5D arrays will be returned.
+        Default: 0.
+    overlap: int
+        The number of frames of overlap between the first frame of one
+        sample and the first frame of the next. Note that a negative
+        overlap will instead specify the number of frames that are
+        *skipped* between the last frame of one sample and the first
+        frame of the next. None is equivalent to seq_length - 1.
+        Default: None.
+    batch_size: int
+        The size of the batch.
+    queues_size: int
+        The size of the buffers used in the threaded case. Default: 50.
+    return_one_hot: bool
+        If True the labels will be returned in one-hot format, i.e. as
+        an array of `nclasses` elements all set to 0 except from the id
+        of the correct class which is set to 1. Default: False.
+    return_01c: bool
+        If True the last axis will be the channel axis (01c format),
+        else the channel axis will be the third to last (c01 format).
+        Default: False.
+    return_extended_sequences:bool
+        If True the first and last sequence of a batch will be extended so that
+        the first frame is repeated `seq_length/2` times. This is useful
+        to perform middle frame prediction, i.e., where the current
+        frame has to be the middle one and the previous and next ones
+        are used as context. Default:False.
+    return_middle_frame_only:bool
+        If True only the middle frame of the ground truth will be returned.
+        Default:False.
+    return_0_255: bool
+        If True the images will be returned in the range [0, 255] with
+        dtype `uint8`. Otherwise the images will be returned in the
+        range [0, 1] as dtype `float32`. Default: False.
+    use_threads: bool
+        If True threads will be used to fetch the data from the dataset.
+        Default: False.
+    nthreads: int
+        The number of threads to use when `use_threads` is True. Default: 1.
+    shuffle_at_each_epoch: bool
+        If True, at the end of each epoch a new set of batches will be
+        prepared and shuffled. Default: True.
+    infinite_iterator: bool
+        If False a `StopIteration` exception will be raised at the end of an
+        epoch. If True no exception will be raised and the dataset will
+        behave as an infinite iterator. Default: True.
+    return_list: bool
+        If True, each call to `next()` will return a list of two numpy arrays
+        containing the data and the labels respectively. If False, the
+        dataset will instead return a dictionary with the following
+        keys:
+            * `data`: the augmented/cropped sequence/image
+            * `labels`: the corresponding potentially cropped labels
+            * `filenames`: the filenames of the frames/images
+            * `subset`: the name of the subset the sequence/image belongs to
+            * `raw_data`: the original unprocessed sequence/image
+        Depending on the dataset, additional keys might be available.
+        Default: False.
+    data_augm_kwargs: dict
+        A dictionary of arguments to be passed to the data augmentation
+        function. Default: no data augmentation. See
+        :func:`~data_augmentation.random_transform` for a complete list
+        of parameters.
+    remove_mean: bool
+        If True, the statistics computed dataset-wise will be used to
+        remove the dataset mean from the data. Default: False.
+    divide_by_std: bool
+        If True, the statistics computed dataset-wise will be used to
+        divide the data by the dataset standard deviation. Default: False.
+    remove_per_img_mean: bool
+        If True, each image will be processed to have zero-mean.
+        Default: False.
+    divide_by_per_img_std=False
+        If True, each image will be processed to have unit variance.
+        Default: False.
+    raise_IOErrors: bool
+        If False in case of an IOError a message will be printed on
+        screen but no Exception will be raised. Default: False.
+    rng: :class:`numpy.random.RandomState` instance
+        The random number generator to use. If None, one will be created.
+        Default: None.
+    Notes
+    -----
+    The parallel loader will automatically map all non-void classes to be
+    sequential starting from 0 and then map all void classes to the
+    next class. E.g., suppose non_void_nclasses = 4 and _void_classes = [3, 5]
+    the non-void classes will be mapped to 0, 1, 2, 3 and the void
+    classes will be mapped to 4, as follows:
+        0 --> 0
+        1 --> 1
+        2 --> 2
+        3 --> 4
+        4 --> 3
+        5 --> 4
+    Note also that in case the original labels are not sequential, it
+    suffices to list all the original labels as a list in GTclasses for
+    parallel_loader to map the non-void classes sequentially starting
+    from 0 and all the void classes to the next class. E.g. suppose
+    non_void_nclasses = 5, GTclasses = [0, 2, 5, 9, 11, 12, 99] and
+    _void_labels = [2, 99], then this will be the mapping:
+         0 --> 0
+         2 --> 5
+         5 --> 1
+         9 --> 2
+        11 --> 3
+        12 --> 4
+        99 --> 5
+    """
+    def __init__(self,
+                 seq_per_subset=0,   # if 0 all sequences (or frames, if 4D)
+                 seq_length=0,      # if 0, return 4D
+                 overlap=None,
+                 batch_size=1,
+                 queues_size=20,
+                 return_one_hot=False,
+                 return_01c=False,
+                 return_extended_sequences=False,
+                 return_middle_frame_only=False,
+                 return_0_255=False,
+                 use_threads=False,
+                 nthreads=1,
+                 shuffle_at_each_epoch=True,
+                 infinite_iterator=True,
+                 return_list=False,  # for keras, return X,Y only
+                 data_augm_kwargs={},
+                 remove_mean=False,  # dataset stats
+                 divide_by_std=False,  # dataset stats
+                 remove_per_img_mean=False,  # img stats
+                 divide_by_per_img_std=False,  # img stats
+                 raise_IOErrors=False,
+                 rng=None,
+                 preload=False,
+                 **kwargs):
+
+        if len(kwargs):
+            print('Unknown arguments: {}'.format(kwargs.keys()))
+
+        # Set default values for the data augmentation params if not specified
+        default_data_augm_kwargs = {
+            'crop_size': None,
+            'rotation_range': 0,
+            'width_shift_range': 0,
+            'height_shift_range': 0,
+            'shear_range': 0,
+            'zoom_range': 0,
+            'channel_shift_range': 0,
+            'fill_mode': 'nearest',
+            'cval': 0,
+            'cval_mask': 0,
+            'horizontal_flip': False,
+            'vertical_flip': False,
+            'rescale': None,
+            'spline_warp': False,
+            'warp_sigma': 0.1,
+            'warp_grid_size': 3,
+            'gamma': 0,
+            'gain': 1}
+
+        default_data_augm_kwargs.update(data_augm_kwargs)
+        self.data_augm_kwargs = default_data_augm_kwargs
+        del(default_data_augm_kwargs, data_augm_kwargs)
+
+        # Put crop_size into canonical form [c1, 2]
+        cs = self.data_augm_kwargs['crop_size']
+        if cs is not None:
+            # Convert to list
+            if isinstance(cs, int):
+                cs = [cs, cs]
+            elif isinstance(cs, tuple):
+                cs = list(cs)
+            # set 0, 0 to None
+            if cs == [0, 0]:
+                cs = None
+            self.data_augm_kwargs['crop_size'] = cs
+
+        # Do not support multithread without shuffling
+        if use_threads and nthreads > 1 and not shuffle_at_each_epoch:
+            raise NotImplementedError('Multiple threads are not order '
+                                      'preserving')
+
+        # Check that the implementing class has all the mandatory attributes
+        mandatory_attrs = ['name', 'non_void_nclasses', '_void_labels']
+        missing_attrs = [attr for attr in mandatory_attrs if not
+                         hasattr(self, attr)]
+        if missing_attrs != []:
+            raise NameError('Mandatory argument(s) missing: {}'.format(
+                missing_attrs))
+        if hasattr(self, 'GT_classes'):
+            raise NameError('GTclasses mispelled as GT_classes')
+
+        # If variable sized dataset --> either batch_size 1 or crop
+        if (not hasattr(self, 'data_shape') and batch_size > 1 and
+                not self.data_augm_kwargs['crop_size']):
+            raise ValueError(
+                '{} has no `data_shape` attribute, this means that the '
+                'shape of the samples varies across the dataset. You '
+                'must either set `batch_size = 1` or specify a '
+                '`crop_size`'.format(self.name))
+
+        if seq_length and overlap and overlap >= seq_length:
+            raise ValueError('`overlap` should be smaller than `seq_length`')
+
+        # Copy the data to the local path if not existing
+        if not os.path.exists(self.path):
+            print('The local path {} does not exist. Copying '
+                  'the dataset...'.format(self.path))
+            shutil.copytree(self.shared_path, self.path)
+            for r,d,f in os.walk(self.path):
+                os.chmod(r,0775)
+            print('Done.')
+        else:
+            try:
+                with open(os.path.join(self.path, '__version__')) as f:
+                    if f.read() != self.__version__:
+                        raise IOError
+            except IOError:
+                print('The local path {} exist, but is outdated. I will '
+                      'replace the old files with the new ones...'.format(
+                          self.path))
+                if not os.path.exists(self.shared_path):
+                    print('The shared_path {} for {} does not exist. Please '
+                          'edit the config.ini file with a valid path, as '
+                          'specified in the README.'.format(self.shared_path,
+                                                            self.name))
+                if realpath(self.path) != realpath(self.shared_path):
+                    shutil.rmtree(self.path)
+                    shutil.copytree(self.shared_path, self.path)
+                    for r,d,f in os.walk(self.path):
+                        os.chmod(r,0775)
+                with open(os.path.join(self.path, '__version__'), 'w') as f:
+                    f.write(self.__version__)
+                print('Done.')
+
+        # Save parameters in object
+        self.seq_per_subset = seq_per_subset
+        self.return_sequence = seq_length != 0
+        self.seq_length = seq_length if seq_length else 1
+        self.overlap = overlap if overlap is not None else self.seq_length - 1
+        self.one_subset_per_batch = False
+        self.batch_size = batch_size
+        self.queues_size = queues_size
+        self.return_one_hot = return_one_hot
+        self.return_01c = return_01c
+        self.return_extended_sequences = return_extended_sequences
+        self.return_middle_frame_only = return_middle_frame_only
+        self.return_0_255 = return_0_255
+        self.use_threads = use_threads
+        self.nthreads = nthreads
+        self.shuffle_at_each_epoch = shuffle_at_each_epoch
+        self.infinite_iterator = infinite_iterator
+        self.return_list = return_list
+        self.remove_mean = remove_mean
+        self.divide_by_std = divide_by_std
+        self.remove_per_img_mean = remove_per_img_mean
+        self.divide_by_per_img_std = divide_by_per_img_std
+        self.raise_IOErrors = raise_IOErrors
+        self.rng = rng if rng is not None else RandomState(0xbeef)
+        self.preload = preload
+
+        self.set_has_GT = getattr(self, 'set_has_GT', True)
+        self.mean = getattr(self, 'mean', [])
+        self.std = getattr(self, 'std', [])
+
+        # ...01c
+        data_shape = list(getattr(self.__class__, 'data_shape',
+                                  (None, None, 3)))
+        if self.data_augm_kwargs['crop_size']:
+            data_shape[-3:-1] = self.data_augm_kwargs['crop_size']  # change 01
+        if self.return_01c:
+            self.data_shape = data_shape
+        else:
+            self.data_shape = [data_shape[i] for i in
+                               [1] + range(1) + range(2, len(data_shape))]
+
+        # Load a dict of names, per video/subset/prefix/...
+        self.names_per_subset = self.get_names()
+
+        # Fill the sequences/batches lists and initialize everything
+        self._fill_names_sequences()
+        if len(self.names_sequences) == 0:
+            raise RuntimeError('The name list cannot be empty')
+        self._fill_names_batches(shuffle_at_each_epoch)
+
+        # Cache for already loaded data
+        if self.preload:
+            self.image_raw = self._preload_data(
+                self.image_path_raw, dtype='floatX', expand=True)
+            self.image_smooth = self._preload_data(
+                self.image_path_smooth, dtype='floatX', expand=True)
+            self.mask = self._preload_data(self.mask_path, dtype='int32')
+            self.regions = self._preload_data(self.regions_path, dtype='int32')
+        else:
+            self.image_raw = None
+            self.image_smooth = None
+            self.mask = None
+            self.regions = None
+
+        if self.use_threads:
+            # Initialize the queues
+            self.names_queue = Queue.Queue(maxsize=self.queues_size)
+            self.data_queue = Queue.Queue(maxsize=self.queues_size)
+            self._init_names_queue()  # Fill the names queue
+
+            # Start the data fetcher threads
+            self.sentinel = object()  # guaranteed unique reference
+            self.data_fetchers = []
+            for _ in range(self.nthreads):
+                data_fetcher = Thread(
+                    target=threaded_fetch,
+                    args=(weakref.ref(self),))
+                data_fetcher.setDaemon(True)  # Die when main dies
+                data_fetcher.start()
+                data_fetcher = weakref.ref(data_fetcher)
+                self.data_fetchers.append(data_fetcher)
+            # Give time to the data fetcher to die, in case of errors
+            # sleep(1)
+
+        # super(ThreadedDataset_1D, self).__init__(*args, **kwargs)
+
+    def _preload_data(self, path, dtype, expand=False):
+        if dtype == 'floatX':
+            py_type = float
+            dtype = floatX
+        elif dtype == 'int32':
+            py_type = int
+        else:
+            raise ValueError('dtype not supported', dtype)
+        ret = []
+        with open(path) as fp:
+            for i, line in enumerate(fp):
+                line = re.split(' ', line)
+                line = np.array([py_type(el) for el in line], dtype=dtype)
+                ret.append(line)
+        ret = np.vstack(ret)
+        if expand:
+            # b,0 to b,0,c
+            ret = np.expand_dims(ret, axis=2)
+        return ret
+
+    def fetch_from_dataset(self, batch_to_load):
+        """
+        Return *batches* of 1D data.
+        `batch_to_load` contains the indices of the lines to load in the batch.
+        `load_sequence` should return a numpy array of 2 or more
+        elements, the first of which 4-dimensional (frame, 0, 1, c)
+        or (frame, c, 0, 1) containing the data and the second 3D or 4D
+        containing the label.
+        """
+        batch_ret = {}
+        batch_to_load = [el for el in batch_to_load if el is not None]
+        batch_to_load = [element[1] for tupl in batch_to_load for element in tupl]
+        # Create batches
+        ret = {}
+        # Load data
+        ret['data'] = []
+
+        ret['indices'] = []#np.sort(batch_to_load)
+
+        if self.smooth_raw_both=='raw' or self.smooth_raw_both=='both':
+            if self.preload:
+                raw = self.image_raw[batch_to_load]
+            else:
+                raw=[]
+                with open(self.image_path_raw) as fp:
+                    for i, line in enumerate(fp):
+                        if i in batch_to_load:
+                            line = re.split(' ', line)
+                            line = np.array([float(el) for el in line])
+                            line = line.astype(floatX)
+                            raw.append(line)
+                        if len(raw) == len(batch_to_load):
+                            break
+                raw = np.vstack(raw)
+                # b,0 to b,0,c
+                raw = np.expand_dims(raw, axis=2)
+
+        if self.smooth_raw_both=='smooth' or self.smooth_raw_both=='both':
+            if self.preload:
+                smooth = self.image_smooth[batch_to_load]
+            else:
+                smooth=[]
+                with open(self.image_path_smooth) as fp:
+                    for i, line in enumerate(fp):
+                        if i in batch_to_load:
+                            line = re.split(' ', line)
+                            line = np.array([float(el) for el in line])
+                            line = line.astype(floatX)
+                            smooth.append(line)
+                        if len(smooth) == len(batch_to_load):
+                            break
+
+                smooth = np.vstack(smooth)
+                # b,0 to b,0,c
+                smooth = np.expand_dims(smooth, axis=2)
+
+        if self.smooth_raw_both=='raw':
+            ret['data'] = raw
+        elif self.smooth_raw_both == 'smooth':
+            ret['data'] = smooth
+        elif self.smooth_raw_both == 'both':
+            ret['data']=np.concatenate([smooth,raw],axis=2)
+
+
+
+        # Load mask
+        ret['labels'] = []
+        if self.task=='segmentation':
+            if self.preload:
+                ret['labels'] = self.mask[batch_to_load]
+            else:
+                with open(self.mask_path) as fp:
+                    for i, line in enumerate(fp):
+                        if i in batch_to_load:
+                            line = re.split(' ', line)
+                            line = np.array([int(el) for el in line])
+                            line = line.astype('int32')
+                            ret['labels'].append(line)
+                        if len(ret['labels']) == len(batch_to_load):
+                            break
+                ret['labels'] = np.vstack(ret['labels'])
+
+        elif self.task =='classification':
+            if self.preload:
+                ret['labels'] = self.mask[batch_to_load]
+            else:
+                with open(self.mask_path) as fp:
+                    for i, line in enumerate(fp):
+                        if i in batch_to_load:
+                            line = re.split(' ', line)
+                            line = np.array([int(el) for el in line])
+                            line = line.astype('int32')
+                            ret['labels'].append(line)
+                        if len(ret['labels']) == len(batch_to_load):
+                            break
+                ret['labels'] = np.vstack(ret['labels'])
+
+
+        ret['filenames'] = batch_to_load
+
+        ret['subset'] = 'default'
+
+        assert all(el in ret.keys()
+                   for el in ('data', 'labels', 'filenames', 'subset')), (
+                'Keys: {}'.format(ret.keys()))
+        assert all(isinstance(el, np.ndarray)
+                       for el in (ret['data'], ret['labels']))
+        raw_data = ret['data'].copy()
+        seq_x, seq_y = ret['data'], ret['labels']
+
+        # Per-data normalization
+        if self.remove_per_img_mean:
+            seq_x -= seq_x.mean(axis=1, keepdims=True)
+        if self.divide_by_per_img_std:
+            seq_x /= seq_x.std(axis=1, keepdims=True)
+
+        # Dataset statistics normalization
+        if self.remove_mean:
+            seq_x -= getattr(self, 'mean', 0)
+        if self.divide_by_std:
+            seq_x /= getattr(self, 'std', 1)
+
+        assert seq_x.ndim == 3
+        assert seq_y.ndim == 2
+
+        # from b,0(,c) to b,0,1(,c)
+        seq_x = np.expand_dims(seq_x, axis=2)
+        seq_y = np.expand_dims(seq_y, axis=2)
+
+        # Perform data augmentation, if needed
+        seq_x, seq_y = random_transform(
+            seq_x, seq_y,
+            nclasses=self.nclasses,
+            void_label=self.void_labels,
+            **self.data_augm_kwargs)
+
+        # from b,0,1(,c) to b,0(,c)
+        sh = seq_x.shape
+        seq_x = seq_x.reshape((sh[0], sh[1], sh[3]))
+
+        if self.task == 'segmentation':
+            seq_y = seq_y.reshape((sh[0], sh[1]))
+        elif self.task=='classification':
+            #print seq_y.shape
+            seq_y = seq_y.reshape((sh[0]))
+            #print seq_y.shape
+
+        if self.set_has_GT and self._void_labels != []:
+            # Map all void classes to non_void_nclasses and shift the other
+            # values accordingly, so that the valid values are between 0
+            # and non_void_nclasses-1 and the void_classes are all equal to
+            # non_void_nclasses.
+            void_l = self._void_labels
+            void_l.sort(reverse=True)
+            mapping = self._mapping
+
+            # Apply the mapping
+            tmp_class = (-1 if not hasattr(self, 'GTclasses') else
+                         max(self.GTclasses) + 1)
+            seq_y[seq_y == self.non_void_nclasses] = tmp_class
+            for i in sorted(mapping.keys()):
+                if i == self.non_void_nclasses:
+                    continue
+                seq_y[seq_y == i] = mapping[i]
+            try:
+                seq_y[seq_y == tmp_class] = mapping[self.non_void_nclasses]
+            except KeyError:
+                # none of the original classes was self.non_void_nclasses
+                pass
+        elif max(self._cmap.keys()) > self.non_void_nclasses-1:
+            # Shift values of labels, so that the valid values are between 0
+            # and non_void_nclasses-1.
+            mapping = self._mapping
+
+            # Apply the mapping
+            tmp_class = (-1 if not hasattr(self, 'GTclasses') else
+                         max(self.GTclasses) + 1)
+            seq_y[seq_y == self.non_void_nclasses] = tmp_class
+            for i in sorted(mapping.keys()):
+                if i == self.non_void_nclasses:
+                    continue
+                seq_y[seq_y == i] = mapping[i]
+            try:
+                seq_y[seq_y == tmp_class] = mapping[self.non_void_nclasses]
+            except KeyError:
+                # none of the original classes was self.non_void_nclasses
+                pass
+
+        # Transform targets seq_y to one hot code if return_one_hot
+        # is True
+        if self.set_has_GT and self.return_one_hot:
+            nc = (self.non_void_nclasses if self._void_labels == [] else
+                  self.non_void_nclasses + 1)
+            sh = seq_y.shape
+            seq_y = seq_y.flatten()
+            seq_y_hot = np.zeros((seq_y.shape[0], nc),
+                                 dtype='int32')
+            seq_y = seq_y.astype('int32')
+            seq_y_hot[range(seq_y.shape[0]), seq_y] = 1
+            seq_y_hot = seq_y_hot.reshape(sh + (nc,))
+            seq_y = seq_y_hot
+            # Dimshuffle if return_01c is False
+        if not self.return_01c:
+            # b,0,c --> b,c,0
+            seq_x = seq_x.transpose([0, 2, 1])
+            if self.set_has_GT and self.return_one_hot:
+                seq_y = seq_y.transpose([0, 2, 1])
+            raw_data = raw_data.transpose([0, 2, 1])
+
+        if self.return_0_255:
+            seq_x = (seq_x * 255).astype('uint8')
+        ret['data'], ret['labels'] = seq_x, seq_y
+        ret['raw_data'] = raw_data
+        # Append the data of this batch to the minibatch array
+        for k, v in ret.iteritems():
+            batch_ret.setdefault(k, []).append(v)
+
+        for k, v in batch_ret.iteritems():
+            try:
+                batch_ret[k] = np.array(v)
+            except ValueError:
+                # Variable shape: cannot wrap with a numpy array
+                pass
+
+
+        batch_ret['data'] = batch_ret['data'].squeeze(0)
+        batch_ret['labels'] = batch_ret['labels'].squeeze(0)
+
+        if self.seq_length > 0 and self.return_middle_frame_only:
+            batch_ret['labels'] = batch_ret['labels'][:, self.seq_length//2]
+        if self.return_list:
+            return [batch_ret['data'], batch_ret['labels']]
+        else:
+            return batch_ret
diff --git a/code/cnn_1D_segm/fcn1D.py b/code/cnn_1D_segm/fcn1D.py
new file mode 100644
index 00000000..35d50c7f
--- /dev/null
+++ b/code/cnn_1D_segm/fcn1D.py
@@ -0,0 +1,109 @@
+import numpy as np
+import theano.tensor as T
+import lasagne
+from lasagne.layers import InputLayer, DropoutLayer, ReshapeLayer, \
+        NonlinearityLayer, DimshuffleLayer, ConcatLayer
+from lasagne.layers import batch_norm, BatchNormLayer
+from lasagne.layers import Pool1DLayer as PoolLayer
+from lasagne.layers import Conv1DLayer as ConvLayer
+from lasagne.layers import Upscale1DLayer as UpscaleLayer
+from lasagne.layers import PadLayer
+from lasagne.layers import ElemwiseSumLayer, ElemwiseMergeLayer
+from lasagne.nonlinearities import softmax, linear, rectify
+
+
+def conv_bn_relu(net, incoming_layer, depth, num_filters, filter_size, pad = 'same'):
+    net['conv'+str(depth)] = ConvLayer(net[incoming_layer],
+                num_filters = num_filters, filter_size = filter_size,
+                pad = pad, nonlinearity=None)
+    net['bn'+str(depth)] = BatchNormLayer(net['conv'+str(depth)])
+    net['relu'+str(depth)] = NonlinearityLayer( net['bn'+str(depth)], nonlinearity = rectify)
+    incoming_layer = 'relu'+str(depth)
+
+    return incoming_layer
+
+# start-snippet-bn_relu_conv
+def bn_relu_conv(net, incoming_layer, depth, num_filters, filter_size, pad = 'same'):
+
+    net['bn'+str(depth)] = BatchNormLayer(net[incoming_layer])
+    net['relu'+str(depth)] = NonlinearityLayer( net['bn'+str(depth)], nonlinearity = rectify)
+    net['conv'+str(depth)] = ConvLayer(net['relu'+str(depth)],
+                num_filters = num_filters, filter_size = filter_size,
+                pad = pad, nonlinearity=None)
+    incoming_layer = 'conv'+str(depth)
+
+    return incoming_layer
+# end-snippet-bn_relu_conv
+
+# start-snippet-convolutions
+def build_model(input_var,
+	    n_classes = 6,
+	    nb_in_channels = 2,
+        filter_size=25,
+        n_filters = 64,
+        depth = 8,
+        last_filter_size = 1,
+        block = 'bn_relu_conv',
+        out_nonlin = softmax):
+    '''
+    Parameters:
+    -----------
+    input_var : theano 3Dtensor shape(n_samples, n_in_channels, ray_length)
+    filter_size : odd int (to fit with same padding)
+    n_filters : int, number of filters for each convLayer
+    n_classes : int, number of classes to segment
+    depth : int, number of stacked convolution before concatenation
+    last_filter_size : int, last convolution filter size to obtain n_classes feature maps
+    out_nonlin : default=softmax, non linearity function
+    '''
+
+
+    net = {}
+
+    net['input'] = InputLayer((None, nb_in_channels, 200), input_var)
+    incoming_layer = 'input'
+
+    #Convolution layers
+    for d in range(depth):
+        if block == 'bn_relu_conv':
+            incoming_layer = bn_relu_conv(net, incoming_layer, depth = d,
+                            num_filters= n_filters, filter_size=filter_size)
+            # end-snippet-convolutions
+        elif block == 'conv_bn_relu':
+            incoming_layer = conv_bn_relu(net, incoming_layer, depth = d,
+                            num_filters= n_filters, filter_size=filter_size)
+    # start-snippet-output
+    #Output layer
+    net['final_conv'] = ConvLayer(net[incoming_layer],
+                    num_filters = n_classes,
+                    filter_size = last_filter_size,
+                    pad='same')
+    incoming_layer = 'final_conv'
+
+    #DimshuffleLayer and ReshapeLayer to fit the softmax implementation
+    #(it needs a 1D or 2D tensor, not a 3D tensor)
+    net['final_dimshuffle'] = DimshuffleLayer(net[incoming_layer], (0,2,1))
+    incoming_layer = 'final_dimshuffle'
+
+    layerSize = lasagne.layers.get_output(net[incoming_layer]).shape
+    net['final_reshape'] = ReshapeLayer(net[incoming_layer],
+                                (T.prod(layerSize[0:2]),layerSize[2]))
+                                # (200*batch_size,n_classes))
+    incoming_layer = 'final_reshape'
+
+
+    #This is the layer that computes the prediction
+    net['last_layer'] = NonlinearityLayer(net[incoming_layer],
+                    nonlinearity = out_nonlin)
+    incoming_layer = 'last_layer'
+
+    #Layers needed to visualize the prediction of the network
+    net['probs_reshape'] = ReshapeLayer(net[incoming_layer],
+                    (layerSize[0], layerSize[1], n_classes))
+    incoming_layer = 'probs_reshape'
+
+    net['probs_dimshuffle'] = DimshuffleLayer(net[incoming_layer], (0,2,1))
+
+
+    return [net[l] for l in ['last_layer']], net
+	# end-snippet-output
diff --git a/code/cnn_1D_segm/train_fcn1D.py b/code/cnn_1D_segm/train_fcn1D.py
new file mode 100644
index 00000000..d58c31d4
--- /dev/null
+++ b/code/cnn_1D_segm/train_fcn1D.py
@@ -0,0 +1,381 @@
+#!/usr/bin/env python2
+from __future__ import absolute_import, print_function, division
+
+import os
+import argparse
+import json
+import time
+from distutils.dir_util import copy_tree
+
+import lasagne
+import numpy as np
+import theano
+import theano.tensor as T
+from data_loader.cortical_layers import Cortical6LayersDataset
+from fcn1D import build_model
+from lasagne.objectives import categorical_crossentropy
+from lasagne.regularization import regularize_network_params
+from theano import config
+
+_FLOATX = config.floatX
+
+
+def accuracy_metric(y_pred, y_true, void_labels, one_hot=False):
+    assert (y_pred.ndim == 2) or (y_pred.ndim == 1)
+
+    # y_pred to indices
+    if y_pred.ndim == 2:
+        y_pred = T.argmax(y_pred, axis=1)
+
+    if one_hot:
+        y_true = T.argmax(y_true, axis=1)
+
+    # Compute accuracy
+    acc = T.eq(y_pred, y_true).astype(_FLOATX)
+
+    # Create mask
+    mask = T.ones_like(y_true, dtype=_FLOATX)
+    for el in void_labels:
+        indices = T.eq(y_true, el).nonzero()
+        if any(indices):
+            mask = T.set_subtensor(mask[indices], 0.)
+
+    # Apply mask
+    acc *= mask
+    acc = T.sum(acc) / T.sum(mask)
+
+    return acc
+
+
+def jaccard(y_pred, y_true, n_classes, one_hot=False):
+    assert (y_pred.ndim == 2) or (y_pred.ndim == 1)
+
+    # y_pred to indices
+    if y_pred.ndim == 2:
+        y_pred = T.argmax(y_pred, axis=1)
+
+    if one_hot:
+        y_true = T.argmax(y_true, axis=1)
+
+    # Compute confusion matrix
+    cm = T.zeros((n_classes, n_classes))
+    for i in range(n_classes):
+        for j in range(n_classes):
+            cm = T.set_subtensor(
+                cm[i, j], T.sum(T.eq(y_pred, i) * T.eq(y_true, j)))
+
+    # Compute Jaccard Index
+    TP_perclass = T.cast(cm.diagonal(), _FLOATX)
+    FP_perclass = cm.sum(1) - TP_perclass
+    FN_perclass = cm.sum(0) - TP_perclass
+
+    num = TP_perclass
+    denom = TP_perclass + FP_perclass + FN_perclass
+
+    return T.stack([num, denom], axis=0)
+
+
+SAVEPATH = 'save_models/'
+LOADPATH = SAVEPATH
+WEIGHTS_PATH = SAVEPATH
+
+
+def train(dataset, learning_rate=0.0005,
+          weight_decay=0.001, num_epochs=500,
+          max_patience=25, data_augmentation={},
+          savepath=None, loadpath=None,
+          batch_size=None, resume=False):
+
+    if savepath is None:
+        raise ValueError('A saving directory must be specified')
+
+    if batch_size is None:
+        batch_size = [1024, 1024, 1]
+
+    # Model hyperparameters
+    n_filters = 64
+    filter_size = 25
+    depth = 8
+    block = 'bn_relu_conv'
+
+    # Hyperparameters for the dataset loader
+    smooth_or_raw = 'both'  # use both input channels
+    shuffle_at_each_epoch = True
+
+    #
+    # Prepare load/save directories
+    #
+
+    exp_name = 'fcn1D'
+    exp_name += '_lrate=' + str(learning_rate)
+    exp_name += '_fil=' + str(n_filters)
+    exp_name += '_fsizes=' + str(filter_size)
+    exp_name += '_depth=' + str(depth)
+    exp_name += '_data=' + smooth_or_raw
+    exp_name += '_decay=' + str(weight_decay)
+    exp_name += '_pat=' + str(max_patience)
+
+    savepath = os.path.join(savepath, dataset, exp_name)
+    loadpath = os.path.join(loadpath, dataset, exp_name)
+    print('Savepath : ')
+    print(savepath)
+    print('Loadpath : ')
+    print(loadpath)
+
+    if not os.path.exists(savepath):
+        os.makedirs(savepath)
+    else:
+        print('\033[93m The following folder already exists {}. '
+              'It will be overwritten in a few seconds...\033[0m'.format(
+            savepath))
+
+    print('Saving directory : ' + savepath)
+    with open(os.path.join(savepath, "config.txt"), "w") as f:
+        for key, value in locals().items():
+            f.write('{} = {}\n'.format(key, value))
+
+    #
+    # Define symbolic variables
+    #
+    input_var = T.tensor3('input_var')  # n_example*nb_in_channels*ray_size
+    target_var = T.ivector('target_var')  # n_example*ray_size
+    # learning rate is defined below as a theano variable.
+    learn_step = theano.shared(np.array(learning_rate, dtype=theano.config.floatX))
+
+    #
+    # Build dataset iterator
+    #
+
+    if smooth_or_raw == 'both':
+        nb_in_channels = 2
+        use_threads = False
+    else:
+        nb_in_channels = 1
+        use_threads = True
+
+    train_iter = Cortical6LayersDataset(
+        which_set='train',
+        smooth_or_raw=smooth_or_raw,
+        batch_size=batch_size[0],
+        data_augm_kwargs=data_augmentation,
+        shuffle_at_each_epoch=True,
+        return_one_hot=False,
+        return_01c=False,
+        return_list=False,
+        use_threads=use_threads,
+        preload=True)
+
+    val_iter = Cortical6LayersDataset(
+        which_set='valid',
+        smooth_or_raw=smooth_or_raw,
+        batch_size=batch_size[1],
+        shuffle_at_each_epoch=True,
+        return_one_hot=False,
+        return_01c=False,
+        return_list=False,
+        use_threads=use_threads,
+        preload=True)
+
+    test_iter = None
+
+    n_batches_train = train_iter.nbatches
+    n_batches_val = val_iter.nbatches
+    n_batches_test = test_iter.nbatches if test_iter is not None else 0
+    n_classes = train_iter.non_void_nclasses
+    void_labels = train_iter.void_labels
+
+    #
+    # Build network
+    #
+    simple_net_output, net = build_model(input_var,
+                                         filter_size=filter_size,
+                                         n_filters=n_filters,
+                                         depth=depth,
+                                         block=block,
+                                         nb_in_channels=nb_in_channels,
+                                         n_classes=n_classes)
+
+    #
+    # Define and compile theano functions
+    #
+    print("Defining and compiling training functions")
+
+    prediction = lasagne.layers.get_output(simple_net_output[0])
+    loss = categorical_crossentropy(prediction, target_var)
+    loss = loss.mean()
+
+    if weight_decay > 0:
+        weightsl2 = regularize_network_params(
+            simple_net_output, lasagne.regularization.l2)
+        loss += weight_decay * weightsl2
+
+    train_acc = accuracy_metric(prediction, target_var, void_labels)
+
+    params = lasagne.layers.get_all_params(simple_net_output, trainable=True)
+    updates = lasagne.updates.adam(loss, params, learning_rate=learn_step)
+
+    train_fn = theano.function([input_var, target_var], [loss, train_acc], updates=updates)
+
+    print("Done")
+
+    print("Defining and compiling valid functions")
+    valid_prediction = lasagne.layers.get_output(simple_net_output[0], deterministic=True)
+    valid_loss = categorical_crossentropy(valid_prediction, target_var).mean()
+    valid_acc = accuracy_metric(valid_prediction, target_var, void_labels)
+    valid_jacc = jaccard(valid_prediction, target_var, n_classes)
+
+    valid_fn = theano.function([input_var, target_var], [valid_loss, valid_acc, valid_jacc])
+    print("Done")
+
+    #
+    # Train loop
+    #
+    err_train = []
+    acc_train = []
+
+    err_valid = []
+    acc_valid = []
+    jacc_valid = []
+    patience = 0
+
+    # Training main loop
+    print("Start training")
+
+    for epoch in range(num_epochs):
+        learn_step.set_value((learn_step.get_value() * 0.99).astype(theano.config.floatX))
+
+        # Single epoch training and validation
+        start_time = time.time()
+        # Cost train and acc train for this epoch
+        cost_train_epoch = 0
+        acc_train_epoch = 0
+
+        for i in range(n_batches_train):
+            # Get minibatch (comment the next line if only 1 minibatch in training)
+            train_batch = train_iter.next()
+            X_train_batch, L_train_batch, idx_train_batch = train_batch['data'], train_batch['labels'], \
+                                                            train_batch['filenames'][0]
+            L_train_batch = np.reshape(L_train_batch, np.prod(L_train_batch.shape))
+
+            # Training step
+            cost_train_batch, acc_train_batch = train_fn(X_train_batch, L_train_batch)
+
+            # Update epoch results
+            cost_train_epoch += cost_train_batch
+            acc_train_epoch += acc_train_batch
+
+        # Add epoch results
+        err_train += [cost_train_epoch / n_batches_train]
+        acc_train += [acc_train_epoch / n_batches_train]
+
+        # Validation
+        cost_val_epoch = 0
+        acc_val_epoch = 0
+        jacc_val_epoch = np.zeros((2, n_classes))
+
+        for i in range(n_batches_val):
+            # Get minibatch (comment the next line if only 1 minibatch in training)
+            val_batch = val_iter.next()
+            X_val_batch, L_val_batch, idx_val_batch = val_batch['data'], val_batch['labels'], val_batch['filenames'][0]
+            L_val_batch = np.reshape(L_val_batch, np.prod(L_val_batch.shape))
+
+            # Validation step
+            cost_val_batch, acc_val_batch, jacc_val_batch = valid_fn(X_val_batch, L_val_batch)
+
+            # Update epoch results
+            cost_val_epoch += cost_val_batch
+            acc_val_epoch += acc_val_batch
+            jacc_val_epoch += jacc_val_batch
+
+        # Add epoch results
+        err_valid += [cost_val_epoch / n_batches_val]
+        acc_valid += [acc_val_epoch / n_batches_val]
+        jacc_perclass_valid = jacc_val_epoch[0, :] / jacc_val_epoch[1, :]
+        jacc_valid += [np.mean(jacc_perclass_valid)]
+        # worse_indices_valid += [worse_indices_val_epoch]
+
+        # Print results (once per epoch)
+
+        out_str = ("EPOCH %i: Avg cost train %f, acc train %f" +
+                   ", cost val %f, acc val %f, jacc val per class %s, "
+                   "jacc val %f took %f s")
+        out_str = out_str % (epoch, err_train[epoch],
+                             acc_train[epoch],
+                             err_valid[epoch],
+                             acc_valid[epoch],
+                             ['%d: %f' % (i, j)
+                              for i, j in enumerate(jacc_perclass_valid)],
+                             jacc_valid[epoch],
+                             time.time() - start_time)
+        print(out_str)
+
+        # Early stopping and saving stuff
+
+        with open(os.path.join(savepath, "fcn1D_output.log"), "a") as f:
+            f.write(out_str + "\n")
+
+        if epoch == 0:
+            best_jacc_val = jacc_valid[epoch]
+        elif epoch > 1 and jacc_valid[epoch] > best_jacc_val:
+            print('saving best (and last) model')
+            best_jacc_val = jacc_valid[epoch]
+            patience = 0
+            np.savez(os.path.join(savepath, 'new_fcn1D_model_best.npz'),
+                     *lasagne.layers.get_all_param_values(simple_net_output))
+            np.savez(os.path.join(savepath, "fcn1D_errors_best.npz"),
+                     err_train=err_train, acc_train=acc_train,
+                     err_valid=err_valid, acc_valid=acc_valid, jacc_valid=jacc_valid)
+        else:
+            patience += 1
+            print('saving last model')
+
+        np.savez(os.path.join(savepath, 'new_fcn1D_model_last.npz'),
+                 *lasagne.layers.get_all_param_values(simple_net_output))
+        np.savez(os.path.join(savepath, "fcn1D_errors_last.npz"),
+                 err_train=err_train, acc_train=acc_train,
+                 err_valid=err_valid, acc_valid=acc_valid, jacc_valid=jacc_valid)
+        # Finish training if patience has expired or max nber of epochs reached
+
+        if patience == max_patience or epoch == num_epochs - 1:
+            if savepath != loadpath:
+                print('Copying model and other training files to {}'.format(loadpath))
+                copy_tree(savepath, loadpath)
+            break
+
+
+def main():
+    parser = argparse.ArgumentParser(description='FCN-1D model training')
+    parser.add_argument('-dataset',
+                        default='cortical_layers',
+                        help='Dataset.')
+    parser.add_argument('-learning_rate',
+                        default=0.0005,
+                        help='Learning Rate')
+    parser.add_argument('--num_epochs',
+                        '-ne',
+                        type=int,
+                        default=500,
+                        help='Optional. Int to indicate the max'
+                             'number of epochs.')
+    parser.add_argument('-max_patience',
+                        type=int,
+                        default=25,
+                        help='Max patience')
+    parser.add_argument('-batch_size',
+                        type=int,
+                        nargs='+',
+                        default=[1024, 1024, 1],
+                        help='Batch size [train, val, test]. Default: -batch_size 1024 1024 1')
+    parser.add_argument('-data_augmentation',
+                        type=json.loads,
+                        default={},
+                        help='use data augmentation')
+    args = parser.parse_args()
+
+    train(dataset=args.dataset, learning_rate=args.learning_rate,
+          num_epochs=args.num_epochs, max_patience=args.max_patience, data_augmentation=args.data_augmentation,
+          batch_size=args.batch_size, savepath=SAVEPATH, loadpath=LOADPATH)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/code/conlleval.pl b/code/conlleval.pl
new file mode 100644
index 00000000..34afe965
--- /dev/null
+++ b/code/conlleval.pl
@@ -0,0 +1,319 @@
+#!/usr/bin/perl -w
+# conlleval: evaluate result of processing CoNLL-2000 shared task
+# usage:     conlleval [-l] [-r] [-d delimiterTag] [-o oTag] < file
+#            README: http://www.clips.uantwerpen.be/conll2000/chunking/output.html
+# options:   l: generate LaTeX output for tables like in
+#               https://www.clips.uantwerpen.be/conll2003/ner/example.tex
+#            r: accept raw result tags (without B- and I- prefix;
+#                                       assumes one word per chunk)
+#            d: alternative delimiter tag (default is single space)
+#            o: alternative outside tag (default is O)
+# note:      the file should contain lines with items separated
+#            by $delimiter characters (default space). The final
+#            two items should contain the correct tag and the 
+#            guessed tag in that order. Sentences should be
+#            separated from each other by empty lines or lines
+#            with $boundary fields (default -X-).
+# url:       http://www.clips.uantwerpen.be/conll2000/chunking/
+# started:   1998-09-25
+# version:   2018-03-09
+# original author:  Erik Tjong Kim Sang <erikt@uia.ua.ac.be>
+# modifications:    Grégoire Mesnil for Deep Learning Tutorials
+#                   https://github.com/lisa-lab/DeepLearningTutorials
+
+use strict;
+
+my $false = 0;
+my $true = 42;
+
+my $boundary = "-X-";     # sentence boundary
+my $correct;              # current corpus chunk tag (I,O,B)
+my $correctChunk = 0;     # number of correctly identified chunks
+my $correctTags = 0;      # number of correct chunk tags
+my $correctType;          # type of current corpus chunk tag (NP,VP,etc.)
+my $delimiter = " ";      # field delimiter
+my $FB1 = 0.0;            # FB1 score (Van Rijsbergen 1979)
+my $firstItem;            # first feature (for sentence boundary checks)
+my $foundCorrect = 0;     # number of chunks in corpus
+my $foundGuessed = 0;     # number of identified chunks
+my $guessed;              # current guessed chunk tag
+my $guessedType;          # type of current guessed chunk tag
+my $i;                    # miscellaneous counter
+my $inCorrect = $false;   # currently processed chunk is correct until now
+my $lastCorrect = "O";    # previous chunk tag in corpus
+my $latex = 0;            # generate LaTeX formatted output
+my $lastCorrectType = ""; # type of previously identified chunk tag
+my $lastGuessed = "O";    # previously identified chunk tag
+my $lastGuessedType = ""; # type of previous chunk tag in corpus
+my $lastType;             # temporary storage for detecting duplicates
+my $line;                 # line
+my $nbrOfFeatures = -1;   # number of features per line
+my $precision = 0.0;      # precision score
+my $oTag = "O";           # outside tag, default O
+my $raw = 0;              # raw input: add B to every token
+my $recall = 0.0;         # recall score
+my $tokenCounter = 0;     # token counter (ignores sentence breaks)
+
+my %correctChunk = ();    # number of correctly identified chunks per type
+my %foundCorrect = ();    # number of chunks in corpus per type
+my %foundGuessed = ();    # number of identified chunks per type
+
+my @features;             # features on line
+my @sortedTypes;          # sorted list of chunk type names
+
+# sanity check
+while (@ARGV and $ARGV[0] =~ /^-/) {
+   if ($ARGV[0] eq "-l") { $latex = 1; shift(@ARGV); }
+   elsif ($ARGV[0] eq "-r") { $raw = 1; shift(@ARGV); }
+   elsif ($ARGV[0] eq "-d") { 
+      shift(@ARGV); 
+      if (not defined $ARGV[0]) { 
+         die "conlleval: -d requires delimiter character"; 
+      }
+      $delimiter = shift(@ARGV);
+   } elsif ($ARGV[0] eq "-o") {
+      shift(@ARGV);
+      if (not defined $ARGV[0]) {
+         die "conlleval: -o requires delimiter character";
+      }
+      $oTag = shift(@ARGV);
+   } else { die "conlleval: unknown argument $ARGV[0]\n"; }
+}
+if (@ARGV) { die "conlleval: unexpected command line argument\n"; }
+# process input
+while (<STDIN>) {
+   chomp($line = $_);
+   @features = split(/$delimiter/,$line);
+   if ($nbrOfFeatures < 0) { $nbrOfFeatures = $#features; }
+   elsif ($nbrOfFeatures != $#features and @features != 0) {
+      printf STDERR "unexpected number of features: %d (%d)\n",
+         $#features+1,$nbrOfFeatures+1;
+      exit(1);
+   }
+   if (@features == 0 or 
+       $features[0] eq $boundary) { @features = ($boundary,"O","O"); }
+   if (@features < 2) { 
+      die "conlleval: unexpected number of features in line $line\n"; 
+   }
+   if ($raw) {
+      if ($features[$#features] eq $oTag) { $features[$#features] = "O"; } 
+      if ($features[$#features-1] eq $oTag) { $features[$#features-1] = "O"; } 
+      if ($features[$#features] ne "O") { 
+         $features[$#features] = "B-$features[$#features]";
+      }
+      if ($features[$#features-1] ne "O") { 
+         $features[$#features-1] = "B-$features[$#features-1]";
+      }
+   }
+   # 20040126 ET code which allows hyphens in the types
+   if ($features[$#features] =~ /^([^-]*)-(.*)$/) {
+      $guessed = $1;
+      $guessedType = $2;
+   } else { 
+      $guessed = $features[$#features]; 
+      $guessedType = ""; 
+   }
+   pop(@features);
+   if ($features[$#features] =~ /^([^-]*)-(.*)$/) {
+      $correct = $1;
+      $correctType = $2;
+   } else { 
+      $correct = $features[$#features]; 
+      $correctType = ""; 
+   }
+   pop(@features);
+#  ($guessed,$guessedType) = split(/-/,pop(@features));
+#  ($correct,$correctType) = split(/-/,pop(@features));
+   $guessedType = $guessedType ? $guessedType : "";
+   $correctType = $correctType ? $correctType : "";
+   $firstItem = shift(@features);
+
+   # 1999-06-26 sentence breaks should always be counted as out of chunk
+   if ( $firstItem eq $boundary ) { $guessed = "O"; }
+
+   if ($inCorrect) {
+      if ( &endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and
+           &endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and
+           $lastGuessedType eq $lastCorrectType) {
+         $inCorrect=$false;
+         $correctChunk++;
+         $correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ?
+             $correctChunk{$lastCorrectType}+1 : 1;
+      } elsif ( 
+           &endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) != 
+           &endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) or
+           $guessedType ne $correctType ) {
+         $inCorrect=$false; 
+      }
+   }
+
+   if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and 
+        &startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and
+        $guessedType eq $correctType) { $inCorrect = $true; }
+
+   if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) ) {
+      $foundCorrect++; 
+      $foundCorrect{$correctType} = $foundCorrect{$correctType} ?
+          $foundCorrect{$correctType}+1 : 1;
+   }
+   if ( &startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) ) {
+      $foundGuessed++; 
+      $foundGuessed{$guessedType} = $foundGuessed{$guessedType} ?
+          $foundGuessed{$guessedType}+1 : 1;
+   }
+   if ( $firstItem ne $boundary ) { 
+      if ( $correct eq $guessed and $guessedType eq $correctType ) { 
+         $correctTags++; 
+      }
+      $tokenCounter++; 
+   }
+
+   $lastGuessed = $guessed;
+   $lastCorrect = $correct;
+   $lastGuessedType = $guessedType;
+   $lastCorrectType = $correctType;
+}
+if ($inCorrect) { 
+   $correctChunk++;
+   $correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ?
+       $correctChunk{$lastCorrectType}+1 : 1;
+}
+
+if (not $latex) {
+   # compute overall precision, recall and FB1 (default values are 0.0)
+   $precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0);
+   $recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0);
+   $FB1 = 2*$precision*$recall/($precision+$recall)
+      if ($precision+$recall > 0);
+   
+   # print overall performance
+   printf "processed $tokenCounter tokens with $foundCorrect phrases; ";
+   printf "found: $foundGuessed phrases; correct: $correctChunk.\n";
+   if ($tokenCounter>0) {
+      printf "accuracy: %6.2f%%; ",100*$correctTags/$tokenCounter;
+      print "$correctChunk $foundCorrect $foundGuessed ";
+      printf "precision: %6.2f%%; ",$precision;
+      printf "recall: %6.2f%%; ",$recall;
+      printf "FB1: %6.2f\n",$FB1;
+   }
+}
+
+# sort chunk type names
+undef($lastType);
+@sortedTypes = ();
+foreach $i (sort (keys %foundCorrect,keys %foundGuessed)) {
+   if (not($lastType) or $lastType ne $i) { 
+      push(@sortedTypes,($i));
+   }
+   $lastType = $i;
+}
+# print performance per chunk type
+if (not $latex) {
+   for $i (@sortedTypes) {
+      $correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0;
+      if (not($foundGuessed{$i})) { $foundGuessed{$i} = 0; $precision = 0.0; }
+      else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; }
+      if (not($foundCorrect{$i})) { $recall = 0.0; }
+      else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; }
+      if ($precision+$recall == 0.0) { $FB1 = 0.0; }
+      else { $FB1 = 2*$precision*$recall/($precision+$recall); }
+      printf "%17s: ",$i;
+      printf "% 4d % 4d % 4d ", $correctChunk{$i}, $foundCorrect{$i}, $foundGuessed{$i};
+      printf "precision: %6.2f%%; ",$precision;
+      printf "recall: %6.2f%%; ",$recall;
+      printf "FB1: %6.2f  %d\n",$FB1,$foundGuessed{$i};
+   }
+} else {
+   print "        & Precision &  Recall  & F\$_{\\beta=1} \\\\\\hline";
+   for $i (@sortedTypes) {
+      $correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0;
+      if (not($foundGuessed{$i})) { $precision = 0.0; }
+      else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; }
+      if (not($foundCorrect{$i})) { $recall = 0.0; }
+      else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; }
+      if ($precision+$recall == 0.0) { $FB1 = 0.0; }
+      else { $FB1 = 2*$precision*$recall/($precision+$recall); }
+      printf "\n%-7s &  %6.2f\\%% & %6.2f\\%% & %6.2f \\\\",
+             $i,$precision,$recall,$FB1;
+   }
+   print "\\hline\n";
+   $precision = 0.0;
+   $recall = 0;
+   $FB1 = 0.0;
+   $precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0);
+   $recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0);
+   $FB1 = 2*$precision*$recall/($precision+$recall)
+      if ($precision+$recall > 0);
+   printf "Overall &  %6.2f\\%% & %6.2f\\%% & %6.2f \\\\\\hline\n",
+          $precision,$recall,$FB1;
+}
+
+exit 0;
+
+# endOfChunk: checks if a chunk ended between the previous and current word
+# arguments:  previous and current chunk tags, previous and current types
+# note:       this code is capable of handling other chunk representations
+#             than the default CoNLL-2000 ones, see EACL'99 paper of Tjong
+#             Kim Sang and Veenstra http://xxx.lanl.gov/abs/cs.CL/9907006
+
+sub endOfChunk {
+   my $prevTag = shift(@_);
+   my $tag = shift(@_);
+   my $prevType = shift(@_);
+   my $type = shift(@_);
+   my $chunkEnd = $false;
+
+   if ( $prevTag eq "B" and $tag eq "B" ) { $chunkEnd = $true; }
+   if ( $prevTag eq "B" and $tag eq "O" ) { $chunkEnd = $true; }
+   if ( $prevTag eq "I" and $tag eq "B" ) { $chunkEnd = $true; }
+   if ( $prevTag eq "I" and $tag eq "O" ) { $chunkEnd = $true; }
+
+   if ( $prevTag eq "E" and $tag eq "E" ) { $chunkEnd = $true; }
+   if ( $prevTag eq "E" and $tag eq "I" ) { $chunkEnd = $true; }
+   if ( $prevTag eq "E" and $tag eq "O" ) { $chunkEnd = $true; }
+   if ( $prevTag eq "I" and $tag eq "O" ) { $chunkEnd = $true; }
+
+   if ($prevTag ne "O" and $prevTag ne "." and $prevType ne $type) { 
+      $chunkEnd = $true; 
+   }
+
+   # corrected 1998-12-22: these chunks are assumed to have length 1
+   if ( $prevTag eq "]" ) { $chunkEnd = $true; }
+   if ( $prevTag eq "[" ) { $chunkEnd = $true; }
+
+   return($chunkEnd);   
+}
+
+# startOfChunk: checks if a chunk started between the previous and current word
+# arguments:    previous and current chunk tags, previous and current types
+# note:         this code is capable of handling other chunk representations
+#               than the default CoNLL-2000 ones, see EACL'99 paper of Tjong
+#               Kim Sang and Veenstra http://xxx.lanl.gov/abs/cs.CL/9907006
+
+sub startOfChunk {
+   my $prevTag = shift(@_);
+   my $tag = shift(@_);
+   my $prevType = shift(@_);
+   my $type = shift(@_);
+   my $chunkStart = $false;
+
+   if ( $prevTag eq "B" and $tag eq "B" ) { $chunkStart = $true; }
+   if ( $prevTag eq "I" and $tag eq "B" ) { $chunkStart = $true; }
+   if ( $prevTag eq "O" and $tag eq "B" ) { $chunkStart = $true; }
+   if ( $prevTag eq "O" and $tag eq "I" ) { $chunkStart = $true; }
+
+   if ( $prevTag eq "E" and $tag eq "E" ) { $chunkStart = $true; }
+   if ( $prevTag eq "E" and $tag eq "I" ) { $chunkStart = $true; }
+   if ( $prevTag eq "O" and $tag eq "E" ) { $chunkStart = $true; }
+   if ( $prevTag eq "O" and $tag eq "I" ) { $chunkStart = $true; }
+
+   if ($tag ne "O" and $tag ne "." and $prevType ne $type) { 
+      $chunkStart = $true; 
+   }
+
+   # corrected 1998-12-22: these chunks are assumed to have length 1
+   if ( $tag eq "[" ) { $chunkStart = $true; }
+   if ( $tag eq "]" ) { $chunkStart = $true; }
+
+   return($chunkStart);   
+}
diff --git a/code/convolutional_mlp.py b/code/convolutional_mlp.py
index 64bf5e69..6bbb47a1 100644
--- a/code/convolutional_mlp.py
+++ b/code/convolutional_mlp.py
@@ -21,6 +21,9 @@
    http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
 
 """
+
+from __future__ import print_function
+
 import os
 import sys
 import timeit
@@ -29,8 +32,8 @@
 
 import theano
 import theano.tensor as T
-from theano.tensor.signal import downsample
-from theano.tensor.nnet import conv
+from theano.tensor.signal import pool
+from theano.tensor.nnet import conv2d
 
 from logistic_sgd import LogisticRegression, load_data
 from mlp import HiddenLayer
@@ -70,7 +73,7 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
         # each unit in the lower layer receives a gradient from:
         # "num output feature maps * filter height * filter width" /
         #   pooling size
-        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
+        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) //
                    numpy.prod(poolsize))
         # initialize weights with random weights
         W_bound = numpy.sqrt(6. / (fan_in + fan_out))
@@ -87,15 +90,15 @@ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
         self.b = theano.shared(value=b_values, borrow=True)
 
         # convolve input feature maps with filters
-        conv_out = conv.conv2d(
+        conv_out = conv2d(
             input=input,
             filters=self.W,
             filter_shape=filter_shape,
-            image_shape=image_shape
+            input_shape=image_shape
         )
 
-        # downsample each feature map individually, using maxpooling
-        pooled_out = downsample.max_pool_2d(
+        # pool each feature map individually, using maxpooling
+        pooled_out = pool.pool_2d(
             input=conv_out,
             ds=poolsize,
             ignore_border=True
@@ -145,9 +148,9 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
     n_train_batches = train_set_x.get_value(borrow=True).shape[0]
     n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
     n_test_batches = test_set_x.get_value(borrow=True).shape[0]
-    n_train_batches /= batch_size
-    n_valid_batches /= batch_size
-    n_test_batches /= batch_size
+    n_train_batches //= batch_size
+    n_valid_batches //= batch_size
+    n_test_batches //= batch_size
 
     # allocate symbolic variables for the data
     index = T.lscalar()  # index to a [mini]batch
@@ -160,7 +163,7 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
     ######################
     # BUILD ACTUAL MODEL #
     ######################
-    print '... building the model'
+    print('... building the model')
 
     # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
     # to a 4D tensor, compatible with our LeNetConvPoolLayer
@@ -261,14 +264,14 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
     ###############
     # TRAIN MODEL #
     ###############
-    print '... training'
+    print('... training')
     # early-stopping parameters
     patience = 10000  # look as this many examples regardless
     patience_increase = 2  # wait this much longer when a new best is
                            # found
     improvement_threshold = 0.995  # a relative improvement of this much is
                                    # considered significant
-    validation_frequency = min(n_train_batches, patience / 2)
+    validation_frequency = min(n_train_batches, patience // 2)
                                   # go through this many
                                   # minibatche before checking the network
                                   # on the validation set; in this case we
@@ -284,19 +287,19 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
 
     while (epoch < n_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
             iter = (epoch - 1) * n_train_batches + minibatch_index
 
             if iter % 100 == 0:
-                print 'training @ iter = ', iter
+                print('training @ iter = ', iter)
             cost_ij = train_model(minibatch_index)
 
             if (iter + 1) % validation_frequency == 0:
 
                 # compute zero-one loss on validation set
                 validation_losses = [validate_model(i) for i
-                                     in xrange(n_valid_batches)]
+                                     in range(n_valid_batches)]
                 this_validation_loss = numpy.mean(validation_losses)
                 print('epoch %i, minibatch %i/%i, validation error %f %%' %
                       (epoch, minibatch_index + 1, n_train_batches,
@@ -317,7 +320,7 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
                     # test it on the test set
                     test_losses = [
                         test_model(i)
-                        for i in xrange(n_test_batches)
+                        for i in range(n_test_batches)
                     ]
                     test_score = numpy.mean(test_losses)
                     print(('     epoch %i, minibatch %i/%i, test error of '
@@ -334,9 +337,9 @@ def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
     print('Best validation score of %f %% obtained at iteration %i, '
           'with test performance %f %%' %
           (best_validation_loss * 100., best_iter + 1, test_score * 100.))
-    print >> sys.stderr, ('The code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print(('The code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
 
 if __name__ == '__main__':
     evaluate_lenet5()
diff --git a/code/dA.py b/code/dA.py
index 8ea94e33..7d054b20 100644
--- a/code/dA.py
+++ b/code/dA.py
@@ -30,6 +30,8 @@
 
 """
 
+from __future__ import print_function
+
 import os
 import sys
 import timeit
@@ -38,7 +40,7 @@
 
 import theano
 import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 
 from logistic_sgd import load_data
 from utils import tile_raster_images
@@ -193,7 +195,7 @@ def __init__(
 
     def get_corrupted_input(self, input, corruption_level):
         """This function keeps ``1-corruption_level`` entries of the inputs the
-        same and zero-out randomly selected subset of size ``coruption_level``
+        same and zero-out randomly selected subset of size ``corruption_level``
         Note : first argument of theano.rng.binomial is the shape(size) of
                random numbers that it should produce
                second argument is the number of trials
@@ -280,7 +282,7 @@ def test_dA(learning_rate=0.1, training_epochs=15,
     train_set_x, train_set_y = datasets[0]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
 
     # start-snippet-2
     # allocate symbolic variables for the data
@@ -328,21 +330,21 @@ def test_dA(learning_rate=0.1, training_epochs=15,
     ############
 
     # go through training epochs
-    for epoch in xrange(training_epochs):
+    for epoch in range(training_epochs):
         # go through trainng set
         c = []
-        for batch_index in xrange(n_train_batches):
+        for batch_index in range(n_train_batches):
             c.append(train_da(batch_index))
 
-        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
+        print('Training epoch %d, cost ' % epoch, numpy.mean(c, dtype='float64'))
 
     end_time = timeit.default_timer()
 
     training_time = (end_time - start_time)
 
-    print >> sys.stderr, ('The no corruption code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((training_time) / 60.))
+    print(('The no corruption code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((training_time) / 60.)), file=sys.stderr)
     image = Image.fromarray(
         tile_raster_images(X=da.W.get_value(borrow=True).T,
                            img_shape=(28, 28), tile_shape=(10, 10),
@@ -386,21 +388,21 @@ def test_dA(learning_rate=0.1, training_epochs=15,
     ############
 
     # go through training epochs
-    for epoch in xrange(training_epochs):
+    for epoch in range(training_epochs):
         # go through trainng set
         c = []
-        for batch_index in xrange(n_train_batches):
+        for batch_index in range(n_train_batches):
             c.append(train_da(batch_index))
 
-        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
+        print('Training epoch %d, cost ' % epoch, numpy.mean(c, dtype='float64'))
 
     end_time = timeit.default_timer()
 
     training_time = (end_time - start_time)
 
-    print >> sys.stderr, ('The 30% corruption code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % (training_time / 60.))
+    print(('The 30% corruption code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % (training_time / 60.)), file=sys.stderr)
     # end-snippet-3
 
     # start-snippet-4
diff --git a/code/fcn_2D_segm/__init__.py b/code/fcn_2D_segm/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/code/fcn_2D_segm/fcn8.py b/code/fcn_2D_segm/fcn8.py
new file mode 100644
index 00000000..5b19f320
--- /dev/null
+++ b/code/fcn_2D_segm/fcn8.py
@@ -0,0 +1,152 @@
+import numpy as np
+import scipy.io as sio
+import theano.tensor as T
+import lasagne
+from lasagne.layers import InputLayer, DropoutLayer, ReshapeLayer,\
+    DimshuffleLayer
+from lasagne.layers import Pool2DLayer as PoolLayer
+from lasagne.layers import Conv2DLayer as ConvLayer
+from lasagne.layers import ElemwiseSumLayer, ElemwiseMergeLayer
+from lasagne.layers import Deconv2DLayer as DeconvLayer
+from lasagne.nonlinearities import softmax, linear
+
+
+
+def freezeParameters(net, single=True):
+    """
+    Freeze parameters of a layer or a network so that they are not trainable
+    anymore
+
+    Parameters
+    ----------
+    net: a network layer
+    single: whether to freeze a single layer of all of the layers below as well
+    """
+    all_layers = lasagne.layers.get_all_layers(net)
+
+    if single:
+        all_layers = [all_layers[-1]]
+
+    for layer in all_layers:
+        layer_params = layer.get_params()
+        for p in layer_params:
+            try:
+                layer.params[p].remove('trainable')
+            except KeyError:
+                pass
+
+
+# start-snippet-1
+def buildFCN8(nb_in_channels, input_var,
+              path_weights='/Tmp/romerosa/itinf/models/' +
+              'camvid/new_fcn8_model_best.npz',
+              n_classes=21, load_weights=True,
+              void_labels=[], trainable=False,
+              layer=['probs_dimshuffle'], pascal=False,
+              temperature=1.0, dropout=0.5):
+    '''
+    Build fcn8 model
+    '''
+
+    net = {}
+
+    # Contracting path
+    net['input'] = InputLayer((None, nb_in_channels, None, None),input_var)
+
+    # pool 1
+    net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=100, flip_filters=False)
+    net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad='same', flip_filters=False)
+    net['pool1'] = PoolLayer(net['conv1_2'], 2)
+
+    # pool 2
+    net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad='same', flip_filters=False)
+    net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad='same', flip_filters=False)
+    net['pool2'] = PoolLayer(net['conv2_2'], 2)
+
+    # pool 3
+    net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad='same', flip_filters=False)
+    net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad='same', flip_filters=False)
+    net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad='same', flip_filters=False)
+    net['pool3'] = PoolLayer(net['conv3_3'], 2)
+
+    # pool 4
+    net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad='same', flip_filters=False)
+    net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad='same', flip_filters=False)
+    net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad='same', flip_filters=False)
+    net['pool4'] = PoolLayer(net['conv4_3'], 2)
+
+    # pool 5
+    net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad='same', flip_filters=False)
+    net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad='same', flip_filters=False)
+    net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad='same', flip_filters=False)
+    net['pool5'] = PoolLayer(net['conv5_3'], 2)
+
+    # fc6
+    net['fc6'] = ConvLayer(net['pool5'], 4096, 7, pad='valid', flip_filters=False)
+    net['fc6_dropout'] = DropoutLayer(net['fc6'], p=dropout)
+
+    # fc7
+    net['fc7'] = ConvLayer(net['fc6_dropout'], 4096, 1, pad='valid', flip_filters=False)
+    net['fc7_dropout'] = DropoutLayer(net['fc7'], p=dropout)
+
+    net['score_fr'] = ConvLayer(net['fc7_dropout'], n_classes, 1, pad='valid', flip_filters=False)
+
+    # Upsampling path
+
+    # Unpool
+    net['score2'] = DeconvLayer(net['score_fr'], n_classes, 4,
+                                stride=2, crop='valid', nonlinearity=linear)
+    net['score_pool4'] = ConvLayer(net['pool4'], n_classes, 1,pad='same')
+    net['score_fused'] = ElemwiseSumLayer((net['score2'],net['score_pool4']),
+                                cropping=[None, None, 'center','center'])
+
+    # Unpool
+    net['score4'] = DeconvLayer(net['score_fused'], n_classes, 4,
+                                stride=2, crop='valid', nonlinearity=linear)
+    net['score_pool3'] = ConvLayer(net['pool3'], n_classes, 1,pad='valid')
+    net['score_final'] = ElemwiseSumLayer((net['score4'],net['score_pool3']),
+                                cropping=[None, None, 'center','center'])
+    # Unpool
+    net['upsample'] = DeconvLayer(net['score_final'], n_classes, 16,
+                                stride=8, crop='valid', nonlinearity=linear)
+    upsample_shape = lasagne.layers.get_output_shape(net['upsample'])[1]
+    net['input_tmp'] = InputLayer((None, upsample_shape, None, None), input_var)
+
+    net['score'] = ElemwiseMergeLayer((net['input_tmp'], net['upsample']),
+                                      merge_function=lambda input, deconv:
+                                      deconv,
+                                      cropping=[None, None, 'center',
+                                                'center'])
+
+    # Final dimshuffle, reshape and softmax
+    net['final_dimshuffle'] = \
+        lasagne.layers.DimshuffleLayer(net['score'], (0, 2, 3, 1))
+    laySize = lasagne.layers.get_output(net['final_dimshuffle']).shape
+    net['final_reshape'] = \
+        lasagne.layers.ReshapeLayer(net['final_dimshuffle'],
+                                    (T.prod(laySize[0:3]),
+                                     laySize[3]))
+    net['probs'] = lasagne.layers.NonlinearityLayer(net['final_reshape'],
+                                                    nonlinearity=softmax)
+    # end-snippet-1
+
+
+    # Do not train
+    if not trainable:
+        freezeParameters(net['probs'])
+
+    # Go back to 4D
+    net['probs_reshape'] = ReshapeLayer(net['probs'], (laySize[0], laySize[1],
+                                                       laySize[2], n_classes))
+
+    net['probs_dimshuffle'] = DimshuffleLayer(net['probs_reshape'],
+                                              (0, 3, 1, 2))
+
+    # Apply temperature
+    if load_weights:
+        soft_value = net['upsample'].W.get_value() / temperature
+        net['upsample'].W.set_value(soft_value)
+        soft_value = net['upsample'].b.get_value() / temperature
+        net['upsample'].b.set_value(soft_value)
+
+    return [net[el] for el in layer]
diff --git a/code/fcn_2D_segm/train_fcn8.py b/code/fcn_2D_segm/train_fcn8.py
new file mode 100644
index 00000000..d106baee
--- /dev/null
+++ b/code/fcn_2D_segm/train_fcn8.py
@@ -0,0 +1,420 @@
+#!/usr/bin/env python2
+from __future__ import absolute_import, print_function, division
+import os
+import argparse
+import time
+import json
+
+import numpy as np
+import theano
+import theano.tensor as T
+from theano import config
+import lasagne
+from lasagne.regularization import regularize_network_params
+
+from dataset_loaders.images.polyps912 import Polyps912Dataset
+from fcn8 import buildFCN8
+
+
+_FLOATX = config.floatX
+_EPSILON = 10e-7
+
+
+def jaccard_metric(y_pred, y_true, n_classes, one_hot=False):
+
+    assert (y_pred.ndim == 2) or (y_pred.ndim == 1)
+
+    # y_pred to indices
+    if y_pred.ndim == 2:
+        y_pred = T.argmax(y_pred, axis=1)
+
+    if one_hot:
+        y_true = T.argmax(y_true, axis=1)
+
+    # Compute confusion matrix
+    cm = T.zeros((n_classes, n_classes))
+    for i in range(n_classes):
+        for j in range(n_classes):
+            cm = T.set_subtensor(
+                cm[i, j], T.sum(T.eq(y_pred, i) * T.eq(y_true, j)))
+
+    # Compute Jaccard Index
+    TP_perclass = T.cast(cm.diagonal(), _FLOATX)
+    FP_perclass = cm.sum(1) - TP_perclass
+    FN_perclass = cm.sum(0) - TP_perclass
+
+    num = TP_perclass
+    denom = TP_perclass + FP_perclass + FN_perclass
+
+    return T.stack([num, denom], axis=0)
+
+
+def accuracy_metric(y_pred, y_true, void_labels, one_hot=False):
+
+    assert (y_pred.ndim == 2) or (y_pred.ndim == 1)
+
+    # y_pred to indices
+    if y_pred.ndim == 2:
+        y_pred = T.argmax(y_pred, axis=1)
+
+    if one_hot:
+        y_true = T.argmax(y_true, axis=1)
+
+    # Compute accuracy
+    acc = T.eq(y_pred, y_true).astype(_FLOATX)
+
+    # Create mask
+    mask = T.ones_like(y_true, dtype=_FLOATX)
+    for el in void_labels:
+        indices = T.eq(y_true, el).nonzero()
+        if any(indices):
+            mask = T.set_subtensor(mask[indices], 0.)
+
+    # Apply mask
+    acc *= mask
+    acc = T.sum(acc) / T.sum(mask)
+
+    return acc
+
+
+def crossentropy_metric(y_pred, y_true, void_labels, one_hot=False):
+    # Clip predictions
+    y_pred = T.clip(y_pred, _EPSILON, 1.0 - _EPSILON)
+
+    if one_hot:
+        y_true = T.argmax(y_true, axis=1)
+
+    # Create mask
+    mask = T.ones_like(y_true, dtype=_FLOATX)
+    for el in void_labels:
+        mask = T.set_subtensor(mask[T.eq(y_true, el).nonzero()], 0.)
+
+    # Modify y_true temporarily
+    y_true_tmp = y_true * mask
+    y_true_tmp = y_true_tmp.astype('int32')
+
+    # Compute cross-entropy
+    loss = T.nnet.categorical_crossentropy(y_pred, y_true_tmp)
+
+    # Compute masked mean loss
+    loss *= mask
+    loss = T.sum(loss) / T.sum(mask)
+
+    return loss
+
+
+SAVEPATH = 'save_models/'
+LOADPATH = SAVEPATH
+WEIGHTS_PATH = SAVEPATH
+
+
+def train(dataset, learn_step=0.005,
+          weight_decay=1e-4, num_epochs=500,
+          max_patience=100, data_augmentation={},
+          savepath=None, #loadpath=None,
+          early_stop_class=None,
+          batch_size=None,
+          resume=False,
+          train_from_0_255=False):
+
+    #
+    # Prepare load/save directories
+    #
+    exp_name = 'fcn8_' + 'data_aug' if bool(data_augmentation) else ''
+
+    if savepath is None:
+        raise ValueError('A saving directory must be specified')
+
+    savepath = os.path.join(savepath, dataset, exp_name)
+    # loadpath = os.path.join(loadpath, dataset, exp_name)
+    print(savepath)
+    # print loadpath
+
+    if not os.path.exists(savepath):
+        os.makedirs(savepath)
+    else:
+        print('\033[93m The following folder already exists {}. '
+              'It will be overwritten in a few seconds...\033[0m'.format(
+                  savepath))
+
+    print('Saving directory : ' + savepath)
+    with open(os.path.join(savepath, "config.txt"), "w") as f:
+        for key, value in locals().items():
+            f.write('{} = {}\n'.format(key, value))
+
+    #
+    # Define symbolic variables
+    #
+    input_var = T.tensor4('input_var')
+    target_var = T.ivector('target_var')
+
+    #
+    # Build dataset iterator
+    #
+    if batch_size is not None:
+        bs = batch_size
+    else:
+        bs = [10, 1, 1]
+    train_iter = Polyps912Dataset(which_set='train',
+                                  batch_size=batch_size[0],
+                                  seq_per_subset=0,
+                                  seq_length=0,
+                                  data_augm_kwargs=data_augmentation,
+                                  return_one_hot=False,
+                                  return_01c=False,
+                                  overlap=0,
+                                  use_threads=False,
+                                  shuffle_at_each_epoch=True,
+                                  return_list=True,
+                                  return_0_255=False)
+    val_iter = Polyps912Dataset(which_set='val',
+                                batch_size=batch_size[1],
+                                seq_per_subset=0,
+                                seq_length=0,
+                                return_one_hot=False,
+                                return_01c=False,
+                                overlap=0,
+                                use_threads=False,
+                                shuffle_at_each_epoch=False,
+                                return_list=True,
+                                return_0_255=False)
+    test_iter = Polyps912Dataset(which_set='test',
+                                 batch_size=batch_size[2],
+                                 seq_per_subset=0,
+                                 seq_length=0,
+                                 return_one_hot=False,
+                                 return_01c=False,
+                                 overlap=0,
+                                 use_threads=False,
+                                 shuffle_at_each_epoch=False,
+                                 return_list=True,
+                                 return_0_255=False)
+
+
+    n_batches_train = train_iter.nbatches
+    n_batches_val = val_iter.nbatches
+    n_batches_test = test_iter.nbatches if test_iter is not None else 0
+    n_classes = train_iter.non_void_nclasses
+    void_labels = train_iter.void_labels
+    nb_in_channels = train_iter.data_shape[0]
+
+    print("Batch. train: %d, val %d, test %d" % (n_batches_train, n_batches_val, n_batches_test))
+    print("Nb of classes: %d" % (n_classes))
+    print("Nb. of input channels: %d" % (nb_in_channels))
+
+    #
+    # Build network
+    #
+    convmodel = buildFCN8(nb_in_channels, input_var, n_classes=n_classes,
+                          void_labels=void_labels, trainable=True,
+                          load_weights=resume, pascal=True, layer=['probs'])
+
+    #
+    # Define and compile theano functions
+    #
+    print("Defining and compiling training functions")
+    prediction = lasagne.layers.get_output(convmodel)[0]
+    loss = crossentropy_metric(prediction, target_var, void_labels)
+
+    if weight_decay > 0:
+        weightsl2 = regularize_network_params(
+            convmodel, lasagne.regularization.l2)
+        loss += weight_decay * weightsl2
+
+    params = lasagne.layers.get_all_params(convmodel, trainable=True)
+    updates = lasagne.updates.adam(loss, params, learning_rate=learn_step)
+
+    train_fn = theano.function([input_var, target_var], loss, updates=updates)
+
+    print("Defining and compiling test functions")
+    test_prediction = lasagne.layers.get_output(convmodel, deterministic=True)[0]
+    test_loss = crossentropy_metric(test_prediction, target_var, void_labels)
+    test_acc = accuracy_metric(test_prediction, target_var, void_labels)
+    test_jacc = jaccard_metric(test_prediction, target_var, n_classes)
+
+    val_fn = theano.function([input_var, target_var], [test_loss, test_acc, test_jacc])
+
+    #
+    # Train
+    #
+    err_train = []
+    err_valid = []
+    acc_valid = []
+    jacc_valid = []
+    patience = 0
+
+    ## Uncomment this to test the training
+    # n_batches_train = 1
+    # n_batches_val = 1
+    # n_batches_test = 1
+    # num_epochs = 1
+
+    # Training main loop
+    print("Start training")
+    for epoch in range(num_epochs):
+        # Single epoch training and validation
+        start_time = time.time()
+        cost_train_tot = 0
+
+        # Train
+        for i in range(n_batches_train):
+            print('Training batch ', i)
+            # Get minibatch
+            X_train_batch, L_train_batch = train_iter.next()
+            L_train_batch = np.reshape(L_train_batch, np.prod(L_train_batch.shape))
+
+
+            # Training step
+            cost_train = train_fn(X_train_batch, L_train_batch)
+            out_str = "cost %f" % (cost_train)
+            cost_train_tot += cost_train
+
+        err_train += [cost_train_tot/n_batches_train]
+
+        # Validation
+        cost_val_tot = 0
+        acc_val_tot = 0
+        jacc_val_tot = np.zeros((2, n_classes))
+        for i in range(n_batches_val):
+            print('Valid batch ', i)
+            # Get minibatch
+            X_val_batch, L_val_batch = val_iter.next()
+            L_val_batch = np.reshape(L_val_batch, np.prod(L_val_batch.shape))
+
+            # Validation step
+            cost_val, acc_val, jacc_val = val_fn(X_val_batch, L_val_batch)
+
+            acc_val_tot += acc_val
+            cost_val_tot += cost_val
+            jacc_val_tot += jacc_val
+
+        err_valid += [cost_val_tot/n_batches_val]
+        acc_valid += [acc_val_tot/n_batches_val]
+        jacc_perclass_valid = jacc_val_tot[0, :] / jacc_val_tot[1, :]
+        if early_stop_class == None:
+            jacc_valid += [np.mean(jacc_perclass_valid)]
+        else:
+            jacc_valid += [jacc_perclass_valid[early_stop_class]]
+
+
+        out_str = "EPOCH %i: Avg epoch training cost train %f, cost val %f" +\
+            ", acc val %f, jacc val class 0 %f, jacc val class 1 %f, jacc val %f took %f s"
+        out_str = out_str % (epoch, err_train[epoch],
+                             err_valid[epoch],
+                             acc_valid[epoch],
+                             jacc_perclass_valid[0],
+                             jacc_perclass_valid[1],
+                             jacc_valid[epoch],
+                             time.time()-start_time)
+        print(out_str)
+
+        with open(os.path.join(savepath, "fcn8_output.log"), "a") as f:
+            f.write(out_str + "\n")
+
+        # Early stopping and saving stuff
+        if epoch == 0:
+            best_jacc_val = jacc_valid[epoch]
+        elif epoch > 1 and jacc_valid[epoch] > best_jacc_val:
+            best_jacc_val = jacc_valid[epoch]
+            patience = 0
+            np.savez(os.path.join(savepath, 'new_fcn8_model_best.npz'),  *lasagne.layers.get_all_param_values(convmodel))
+            np.savez(os.path.join(savepath, "fcn8_errors_best.npz"),  err_valid, err_train, acc_valid, jacc_valid)
+        else:
+            patience += 1
+
+        np.savez(os.path.join(savepath, 'new_fcn8_model_last.npz'), *lasagne.layers.get_all_param_values(convmodel))
+        np.savez(os.path.join(savepath, "fcn8_errors_last.npz"),  err_valid, err_train, acc_valid, jacc_valid)
+        # Finish training if patience has expired or max nber of epochs
+        # reached
+        if patience == max_patience or epoch == num_epochs-1:
+            if test_iter is not None:
+                # Load best model weights
+                with np.load(os.path.join(savepath, 'new_fcn8_model_best.npz')) as f:
+                    param_values = [f['arr_%d' % i] for i in range(len(f.files))]
+                nlayers = len(lasagne.layers.get_all_params(convmodel))
+                lasagne.layers.set_all_param_values(convmodel, param_values[:nlayers])
+                # Test
+                cost_test_tot = 0
+                acc_test_tot = 0
+                jacc_test_tot = np.zeros((2, n_classes))
+                for i in range(n_batches_test):
+                    # Get minibatch
+                    X_test_batch, L_test_batch = test_iter.next()
+                    L_test_batch = np.reshape(L_test_batch, np.prod(L_test_batch.shape))
+
+                    # Test step
+                    cost_test, acc_test, jacc_test = val_fn(X_test_batch, L_test_batch)
+
+                    acc_test_tot += acc_test
+                    cost_test_tot += cost_test
+                    jacc_test_tot += jacc_test
+
+                err_test = cost_test_tot/n_batches_test
+                acc_test = acc_test_tot/n_batches_test
+                jacc_test_perclass = jacc_test_tot[0, :] / jacc_test_tot[1, :]
+                jacc_test = np.mean(jacc_test_perclass)
+
+                out_str = "FINAL MODEL: err test % f, acc test %f, "
+                out_str += "jacc test class 0 % f, jacc test class 1 %f, jacc test %f"
+                out_str = out_str % (err_test,
+                                     acc_test,
+                                     jacc_test_perclass[0],
+                                     jacc_test_perclass[1],
+                                     jacc_test)
+                print(out_str)
+            # if savepath != loadpath:
+            #     print('Copying model and other training files to {}'.format(loadpath))
+            #     copy_tree(savepath, loadpath)
+
+            # End
+            return
+
+
+def main():
+    parser = argparse.ArgumentParser(description='FCN8 model training')
+    parser.add_argument('-dataset',
+                        default='polyps',
+                        help='Dataset.')
+    parser.add_argument('-learning_rate',
+                        default=0.0001,
+                        help='Learning Rate')
+    parser.add_argument('-penal_cst',
+                        default=0.0,
+                        help='regularization constant')
+    parser.add_argument('--num_epochs',
+                        '-ne',
+                        type=int,
+                        default=750,
+                        help='Optional. Int to indicate the max'
+                        'number of epochs.')
+    parser.add_argument('-max_patience',
+                        type=int,
+                        default=100,
+                        help='Max patience')
+    parser.add_argument('-batch_size',
+                        type=int,
+                        nargs='+',
+                        default=[10, 1, 1],
+                        help='Batch size [train, val, test]. Default: -batch_size 10 1 1')
+    parser.add_argument('-data_augmentation',
+                        type=json.loads,
+                        default={'crop_size': (224, 224), 'horizontal_flip': True, 'fill_mode':'constant'},
+                        help='use data augmentation')
+    parser.add_argument('-early_stop_class',
+                        type=int,
+                        default=None,
+                        help='class to early stop on')
+    parser.add_argument('-train_from_0_255',
+                        type=bool,
+                        default=False,
+                        help='Whether to train from images within 0-255 range')
+    args = parser.parse_args()
+
+    train(args.dataset, float(args.learning_rate),
+          float(args.penal_cst), int(args.num_epochs), int(args.max_patience),
+          data_augmentation=args.data_augmentation, batch_size=args.batch_size,
+          early_stop_class=args.early_stop_class, savepath=SAVEPATH,
+          train_from_0_255=args.train_from_0_255)#, loadpath=LOADPATH)
+
+if __name__ == "__main__":
+    main()
diff --git a/code/guidelines_segm_tutos_with_conda.sh b/code/guidelines_segm_tutos_with_conda.sh
new file mode 100644
index 00000000..93057b38
--- /dev/null
+++ b/code/guidelines_segm_tutos_with_conda.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+### Base installation.
+
+# Create and enter main directory.
+mkdir main_directory
+cd main_directory
+# Create and activate conda environment.
+conda create --yes -n tuto python=2
+source activate tuto
+# Install theano.
+conda install --yes -c mila-udem theano
+# Install Lasagne.
+git clone https://github.com/Lasagne/Lasagne.git
+cd Lasagne/
+pip install -e .
+cd ..
+# Install dataset_loaders.
+conda install --yes matplotlib numpy Pillow scipy scikit-image seaborn h5py
+git clone https://github.com/fvisin/dataset_loaders.git
+cd dataset_loaders/
+pip install -e .
+cd ..
+# Create config.ini.
+cd dataset_loaders/dataset_loaders
+touch config.ini
+cd ../../
+# Get tutorials code.
+git clone https://github.com/lisa-lab/DeepLearningTutorials.git
+
+# NB: Don't forget to correctly set config.ini with section [general]
+# and other relevant sections for segmentation tutorials before
+# running following lines.
+# Field `datasets_local_path` in [general] section should indicate a working
+# directory for dataset_loaders module. You can use a directory within
+# the main directory, for example main_directory/datasets_local_dir.
+# If specified folder does not exist, it will be created.
+
+# NB: Following lines should be executed in the main directory created above.
+# If any problem occures, consider deleting folder save_models (created by tutorial scripts)
+# and wordking directory you specified for dataset_loaders:
+# rm -rf save_models datasets_local_dir
+
+### Tutorial FCN 2D.
+## Get polyps_split7.zip from https://drive.google.com/file/d/0B_60jvsCt1hhZWNfcW4wbHE5N3M/view
+## Directory for [polyps912] section in config.ini should be full path to main_directory/polyps_split7
+unzip polyps_split7.zip
+THEANO_FLAGS=device=cuda,floatX=float32 python DeepLearningTutorials/code/fcn_2D_segm/train_fcn8.py --num_epochs 60
+
+### Tutorial UNET.
+## Get test-volume.tif, train-labels.tif, train-volume.tif from ISBI challenge: http://brainiac2.mit.edu/isbi_challenge/home
+## Directory for [isbi_em_stacks] section in config.ini should be full path to main_directory/isbi
+pip install simpleitk
+mkdir isbi
+mv test-volume.tif  train-labels.tif  train-volume.tif isbi
+THEANO_FLAGS=device=cuda,floatX=float32 python DeepLearningTutorials/code/unet/train_unet.py --num_epochs 60
+
+### Tutorial FCN 1D.
+## Get TrainingData190417.tar.gz from https://drive.google.com/file/d/0B3tbeSUS2FsVOVlIamlDdkNBQUE/edit
+## Directory for [cortical_layers] section in config.ini should be full path to main_directory/cortical_layers
+mkdir cortical_layers
+cd cortical_layers/
+tar -xvf ../TrainingData190417.tar.gz
+mv TrainingData 6layers_segmentation
+cd ..
+THEANO_FLAGS=device=cuda,floatX=float32 python DeepLearningTutorials/code/cnn_1D_segm/train_fcn1D.py --num_epochs 60
diff --git a/code/hmc/hmc.py b/code/hmc/hmc.py
index b9c872f0..cf4d20a1 100644
--- a/code/hmc/hmc.py
+++ b/code/hmc/hmc.py
@@ -7,6 +7,7 @@
 from theano import function, shared
 from theano import tensor as TT
 import theano
+import theano.sandbox.rng_mrg
 
 sharedX = (lambda X, name:
            shared(numpy.asarray(X, dtype=theano.config.floatX), name=name))
@@ -128,14 +129,14 @@ def leapfrog(pos, vel, step):
         rval2: dictionary
             Dictionary of updates for the Scan Op
         """
-        # from pos(t) and vel(t-stepsize/2), compute vel(t+stepsize/2)
+        # from pos(t) and vel(t-stepsize//2), compute vel(t+stepsize//2)
         dE_dpos = TT.grad(energy_fn(pos).sum(), pos)
         new_vel = vel - step * dE_dpos
-        # from vel(t+stepsize/2) compute pos(t+stepsize)
+        # from vel(t+stepsize//2) compute pos(t+stepsize)
         new_pos = pos + step * new_vel
         return [new_pos, new_vel], {}
 
-    # compute velocity at time-step: t + stepsize/2
+    # compute velocity at time-step: t + stepsize//2
     initial_energy = energy_fn(initial_pos)
     dE_dpos = TT.grad(initial_energy.sum(), initial_pos)
     vel_half_step = initial_vel - 0.5 * stepsize * dE_dpos
@@ -275,14 +276,14 @@ def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept,
 
     """
 
-    ## POSITION UPDATES ##
+    # POSITION UPDATES #
     # broadcast `accept` scalar to tensor with the same dimensions as
     # final_pos.
     accept_matrix = accept.dimshuffle(0, *(('x',) * (final_pos.ndim - 1)))
     # if accept is True, update to `final_pos` else stay put
     new_positions = TT.switch(accept_matrix, final_pos, positions)
     # end-snippet-5 start-snippet-7
-    ## STEPSIZE UPDATES ##
+    # STEPSIZE UPDATES #
     # if acceptance rate is too low, our sampler is too "noisy" and we reduce
     # the stepsize. If it is too high, our sampler is too conservative, we can
     # get away with a larger stepsize (resulting in better mixing).
@@ -292,7 +293,7 @@ def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept,
     new_stepsize = TT.clip(_new_stepsize, stepsize_min, stepsize_max)
 
     # end-snippet-7 start-snippet-6
-    ## ACCEPT RATE UPDATES ##
+    # ACCEPT RATE UPDATES #
     # perform exponential moving average
     mean_dtype = theano.scalar.upcast(accept.dtype, avg_acceptance_rate.dtype)
     new_acceptance_rate = TT.add(
@@ -358,7 +359,7 @@ def new_from_shared_positions(
         stepsize = sharedX(initial_stepsize, 'hmc_stepsize')
         avg_acceptance_rate = sharedX(target_acceptance_rate,
                                       'avg_acceptance_rate')
-        s_rng = TT.shared_randomstreams.RandomStreams(seed)
+        s_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(seed)
 
         # define graph for an `n_steps` HMC simulation
         accept, final_pos = hmc_move(
diff --git a/code/hmc/test_hmc.py b/code/hmc/test_hmc.py
index f6c3b522..42dbc3a7 100644
--- a/code/hmc/test_hmc.py
+++ b/code/hmc/test_hmc.py
@@ -1,7 +1,15 @@
+
+from __future__ import print_function
+
 import numpy
 import theano
 
-from hmc import HMC_sampler
+try:
+    from hmc import HMC_sampler
+except ImportError as e:
+    # python 3 compatibility
+    # http://stackoverflow.com/questions/3073259/python-nose-import-error
+    from hmc.hmc import HMC_sampler
 
 
 def sampler_on_nd_gaussian(sampler_cls, burnin, n_samples, dim=10):
@@ -30,24 +38,24 @@ def gaussian_energy(x):
                           initial_stepsize=1e-3, stepsize_max=0.5)
 
     # Start with a burn-in process
-    garbage = [sampler.draw() for r in xrange(burnin)]  # burn-in Draw
+    garbage = [sampler.draw() for r in range(burnin)]  # burn-in Draw
     # `n_samples`: result is a 3D tensor of dim [n_samples, batchsize,
     # dim]
-    _samples = numpy.asarray([sampler.draw() for r in xrange(n_samples)])
+    _samples = numpy.asarray([sampler.draw() for r in range(n_samples)])
     # Flatten to [n_samples * batchsize, dim]
     samples = _samples.T.reshape(dim, -1).T
 
-    print '****** TARGET VALUES ******'
-    print 'target mean:', mu
-    print 'target cov:\n', cov
+    print('****** TARGET VALUES ******')
+    print('target mean:', mu)
+    print('target cov:\n', cov)
 
-    print '****** EMPIRICAL MEAN/COV USING HMC ******'
-    print 'empirical mean: ', samples.mean(axis=0)
-    print 'empirical_cov:\n', numpy.cov(samples.T)
+    print('****** EMPIRICAL MEAN/COV USING HMC ******')
+    print('empirical mean: ', samples.mean(axis=0))
+    print('empirical_cov:\n', numpy.cov(samples.T))
 
-    print '****** HMC INTERNALS ******'
-    print 'final stepsize', sampler.stepsize.get_value()
-    print 'final acceptance_rate', sampler.avg_acceptance_rate.get_value()
+    print('****** HMC INTERNALS ******')
+    print('final stepsize', sampler.stepsize.get_value())
+    print('final acceptance_rate', sampler.avg_acceptance_rate.get_value())
 
     return sampler
 
diff --git a/code/imdb.py b/code/imdb.py
index 21e0e376..341be231 100644
--- a/code/imdb.py
+++ b/code/imdb.py
@@ -1,4 +1,7 @@
-import cPickle
+from __future__ import print_function
+from six.moves import xrange
+import six.moves.cPickle as pickle
+
 import gzip
 import os
 
@@ -68,9 +71,11 @@ def get_dataset_file(dataset, default_dataset, origin):
             dataset = new_path
 
     if (not os.path.isfile(dataset)) and data_file == default_dataset:
-        import urllib
-        print 'Downloading data from %s' % origin
-        urllib.urlretrieve(origin, dataset)
+        from six.moves import urllib
+        print('Downloading data from %s' % origin)
+        urllib.request.urlretrieve(origin, dataset)
+
+        
     return dataset
 
 
@@ -110,8 +115,8 @@ def load_data(path="imdb.pkl", n_words=100000, valid_portion=0.1, maxlen=None,
     else:
         f = open(path, 'rb')
 
-    train_set = cPickle.load(f)
-    test_set = cPickle.load(f)
+    train_set = pickle.load(f)
+    test_set = pickle.load(f)
     f.close()
     if maxlen:
         new_train_set_x = []
diff --git a/code/imdb_preprocess.py b/code/imdb_preprocess.py
index c20b37b6..62ebb556 100644
--- a/code/imdb_preprocess.py
+++ b/code/imdb_preprocess.py
@@ -8,7 +8,7 @@
 
 3) Then run this script.
 """
-
+from __future__ import print_function
 dataset_path='/Tmp/bastienf/aclImdb/'
 
 import numpy
@@ -27,12 +27,12 @@
 
 def tokenize(sentences):
 
-    print 'Tokenizing..',
+    print('Tokenizing..', end=' ')
     text = "\n".join(sentences)
     tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
     tok_text, _ = tokenizer.communicate(text)
     toks = tok_text.split('\n')[:-1]
-    print 'Done'
+    print('Done')
 
     return toks
 
@@ -52,7 +52,7 @@ def build_dict(path):
 
     sentences = tokenize(sentences)
 
-    print 'Building dictionary..',
+    print('Building dictionary..', end=' ')
     wordcount = dict()
     for ss in sentences:
         words = ss.strip().lower().split()
@@ -72,7 +72,7 @@ def build_dict(path):
     for idx, ss in enumerate(sorted_idx):
         worddict[keys[ss]] = idx+2  # leave 0 and 1 (UNK)
 
-    print numpy.sum(counts), ' total words ', len(keys), ' unique words'
+    print(numpy.sum(counts), ' total words ', len(keys), ' unique words')
 
     return worddict
 
diff --git a/code/logistic_cg.py b/code/logistic_cg.py
index db9822ef..c2970d51 100644
--- a/code/logistic_cg.py
+++ b/code/logistic_cg.py
@@ -33,6 +33,7 @@
 
 
 """
+from __future__ import print_function, division
 __docformat__ = 'restructedtext en'
 
 
@@ -165,9 +166,9 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
 
     batch_size = 600    # size of the minibatch
 
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
 
     n_in = 28 * 28  # number of input units
     n_out = 10  # number of output units
@@ -175,7 +176,7 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
     ######################
     # BUILD ACTUAL MODEL #
     ######################
-    print '... building the model'
+    print('... building the model')
 
     # allocate symbolic variables for the data
     minibatch_offset = T.lscalar()  # offset to the start of a [mini]batch
@@ -239,7 +240,7 @@ def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
     def train_fn(theta_value):
         classifier.theta.set_value(theta_value, borrow=True)
         train_losses = [batch_cost(i * batch_size)
-                        for i in xrange(n_train_batches)]
+                        for i in range(n_train_batches)]
         return numpy.mean(train_losses)
 
     # creates a function that computes the average gradient of cost with
@@ -247,7 +248,7 @@ def train_fn(theta_value):
     def train_fn_grad(theta_value):
         classifier.theta.set_value(theta_value, borrow=True)
         grad = batch_grad(0)
-        for i in xrange(1, n_train_batches):
+        for i in range(1, n_train_batches):
             grad += batch_grad(i * batch_size)
         return grad / n_train_batches
 
@@ -258,9 +259,9 @@ def callback(theta_value):
         classifier.theta.set_value(theta_value, borrow=True)
         #compute the validation loss
         validation_losses = [validate_model(i * batch_size)
-                             for i in xrange(n_valid_batches)]
+                             for i in range(n_valid_batches)]
         this_validation_loss = numpy.mean(validation_losses)
-        print('validation error %f %%' % (this_validation_loss * 100.,))
+        print(('validation error %f %%' % (this_validation_loss * 100.,)))
 
         # check if it is better then best validation score got until now
         if this_validation_loss < validation_scores[0]:
@@ -268,7 +269,7 @@ def callback(theta_value):
             # testing dataset
             validation_scores[0] = this_validation_loss
             test_losses = [test_model(i * batch_size)
-                           for i in xrange(n_test_batches)]
+                           for i in range(n_test_batches)]
             validation_scores[1] = numpy.mean(test_losses)
 
     ###############
@@ -288,17 +289,13 @@ def callback(theta_value):
         maxiter=n_epochs
     )
     end_time = timeit.default_timer()
-    print(
-        (
-            'Optimization complete with best validation score of %f %%, with '
-            'test performance %f %%'
-        )
-        % (validation_scores[0] * 100., validation_scores[1] * 100.)
+    print(('Optimization complete with best validation score of %f %%, with '
+           'test performance %f %%'
+           ) % (validation_scores[0] * 100., validation_scores[1] * 100.)
     )
 
-    print >> sys.stderr, ('The code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.1fs' % ((end_time - start_time)))
+    print('The code for file ' + os.path.split(__file__)[1] +
+          ' ran for %.1fs' % (end_time - start_time), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/code/logistic_sgd.py b/code/logistic_sgd.py
index c944f8b3..9f4427e7 100644
--- a/code/logistic_sgd.py
+++ b/code/logistic_sgd.py
@@ -32,9 +32,12 @@
                  Christopher M. Bishop, section 4.3.2
 
 """
+
+from __future__ import print_function
+
 __docformat__ = 'restructedtext en'
 
-import cPickle
+import six.moves.cPickle as pickle
 import gzip
 import os
 import sys
@@ -194,25 +197,27 @@ def load_data(dataset):
             dataset = new_path
 
     if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
-        import urllib
+        from six.moves import urllib
         origin = (
             'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
         )
-        print 'Downloading data from %s' % origin
-        urllib.urlretrieve(origin, dataset)
+        print('Downloading data from %s' % origin)
+        urllib.request.urlretrieve(origin, dataset)
 
-    print '... loading data'
+    print('... loading data')
 
     # Load the dataset
-    f = gzip.open(dataset, 'rb')
-    train_set, valid_set, test_set = cPickle.load(f)
-    f.close()
-    #train_set, valid_set, test_set format: tuple(input, target)
-    #input is an numpy.ndarray of 2 dimensions (a matrix)
-    #witch row's correspond to an example. target is a
-    #numpy.ndarray of 1 dimensions (vector)) that have the same length as
-    #the number of rows in the input. It should give the target
-    #target to the example with the same index in the input.
+    with gzip.open(dataset, 'rb') as f:
+        try:
+            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
+        except:
+            train_set, valid_set, test_set = pickle.load(f)
+    # train_set, valid_set, test_set format: tuple(input, target)
+    # input is a numpy.ndarray of 2 dimensions (a matrix)
+    # where each row corresponds to an example. target is a
+    # numpy.ndarray of 1 dimension (vector) that has the same length as
+    # the number of rows in the input. It should give the target
+    # to the example with the same index in the input.
 
     def shared_dataset(data_xy, borrow=True):
         """ Function that loads the dataset into shared variables
@@ -276,14 +281,14 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
     test_set_x, test_set_y = datasets[2]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
 
     ######################
     # BUILD ACTUAL MODEL #
     ######################
-    print '... building the model'
+    print('... building the model')
 
     # allocate symbolic variables for the data
     index = T.lscalar()  # index to a [mini]batch
@@ -348,14 +353,14 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
     ###############
     # TRAIN MODEL #
     ###############
-    print '... training the model'
+    print('... training the model')
     # early-stopping parameters
     patience = 5000  # look as this many examples regardless
     patience_increase = 2  # wait this much longer when a new best is
                                   # found
     improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
-    validation_frequency = min(n_train_batches, patience / 2)
+    validation_frequency = min(n_train_batches, patience // 2)
                                   # go through this many
                                   # minibatche before checking the network
                                   # on the validation set; in this case we
@@ -369,7 +374,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
     epoch = 0
     while (epoch < n_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
             minibatch_avg_cost = train_model(minibatch_index)
             # iteration number
@@ -378,7 +383,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
             if (iter + 1) % validation_frequency == 0:
                 # compute zero-one loss on validation set
                 validation_losses = [validate_model(i)
-                                     for i in xrange(n_valid_batches)]
+                                     for i in range(n_valid_batches)]
                 this_validation_loss = numpy.mean(validation_losses)
 
                 print(
@@ -402,7 +407,7 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
                     # test it on the test set
 
                     test_losses = [test_model(i)
-                                   for i in xrange(n_test_batches)]
+                                   for i in range(n_test_batches)]
                     test_score = numpy.mean(test_losses)
 
                     print(
@@ -419,8 +424,8 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
                     )
 
                     # save the best model
-                    with open('best_model.pkl', 'w') as f:
-                        cPickle.dump(classifier, f)
+                    with open('best_model.pkl', 'wb') as f:
+                        pickle.dump(classifier, f)
 
             if patience <= iter:
                 done_looping = True
@@ -434,11 +439,11 @@ def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
         )
         % (best_validation_loss * 100., test_score * 100.)
     )
-    print 'The code run for %d epochs, with %f epochs/sec' % (
-        epoch, 1. * epoch / (end_time - start_time))
-    print >> sys.stderr, ('The code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.1fs' % ((end_time - start_time)))
+    print('The code run for %d epochs, with %f epochs/sec' % (
+        epoch, 1. * epoch / (end_time - start_time)))
+    print(('The code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.1fs' % ((end_time - start_time))), file=sys.stderr)
 
 
 def predict():
@@ -448,7 +453,7 @@ def predict():
     """
 
     # load the saved model
-    classifier = cPickle.load(open('best_model.pkl'))
+    classifier = pickle.load(open('best_model.pkl'))
 
     # compile a predictor function
     predict_model = theano.function(
@@ -462,8 +467,8 @@ def predict():
     test_set_x = test_set_x.get_value()
 
     predicted_values = predict_model(test_set_x[:10])
-    print ("Predicted values for the first 10 examples in test set:")
-    print predicted_values
+    print("Predicted values for the first 10 examples in test set:")
+    print(predicted_values)
 
 
 if __name__ == '__main__':
diff --git a/code/lstm.py b/code/lstm.py
index 1d87cfb3..a3010a9f 100644
--- a/code/lstm.py
+++ b/code/lstm.py
@@ -1,8 +1,11 @@
 '''
 Build a tweet sentiment analyzer
 '''
+
+from __future__ import print_function
+import six.moves.cPickle as pickle
+
 from collections import OrderedDict
-import cPickle as pkl
 import sys
 import time
 
@@ -56,7 +59,7 @@ def zipp(params, tparams):
     """
     When we reload the model. Needed for the GPU stuff.
     """
-    for kk, vv in params.iteritems():
+    for kk, vv in params.items():
         tparams[kk].set_value(vv)
 
 
@@ -65,7 +68,7 @@ def unzip(zipped):
     When we pickle the model. Needed for the GPU stuff.
     """
     new_params = OrderedDict()
-    for kk, vv in zipped.iteritems():
+    for kk, vv in zipped.items():
         new_params[kk] = vv.get_value()
     return new_params
 
@@ -106,7 +109,7 @@ def init_params(options):
 
 def load_params(path, params):
     pp = numpy.load(path)
-    for kk, vv in params.iteritems():
+    for kk, vv in params.items():
         if kk not in pp:
             raise Warning('%s is not in the archive' % kk)
         params[kk] = pp[kk]
@@ -116,7 +119,7 @@ def load_params(path, params):
 
 def init_tparams(params):
     tparams = OrderedDict()
-    for kk, pp in params.iteritems():
+    for kk, pp in params.items():
         tparams[kk] = theano.shared(params[kk], name=kk)
     return tparams
 
@@ -217,7 +220,7 @@ def sgd(lr, tparams, grads, x, mask, y, cost):
     # New set of shared variable that will contain the gradient
     # for a mini-batch.
     gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
-               for k, p in tparams.iteritems()]
+               for k, p in tparams.items()]
     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
 
     # Function that computes gradients for a mini-batch, but do not
@@ -266,13 +269,13 @@ def adadelta(lr, tparams, grads, x, mask, y, cost):
 
     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                   name='%s_grad' % k)
-                    for k, p in tparams.iteritems()]
+                    for k, p in tparams.items()]
     running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                  name='%s_rup2' % k)
-                   for k, p in tparams.iteritems()]
+                   for k, p in tparams.items()]
     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                     name='%s_rgrad2' % k)
-                      for k, p in tparams.iteritems()]
+                      for k, p in tparams.items()]
 
     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
@@ -329,13 +332,13 @@ def rmsprop(lr, tparams, grads, x, mask, y, cost):
 
     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                   name='%s_grad' % k)
-                    for k, p in tparams.iteritems()]
+                    for k, p in tparams.items()]
     running_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                    name='%s_rgrad' % k)
-                     for k, p in tparams.iteritems()]
+                     for k, p in tparams.items()]
     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                     name='%s_rgrad2' % k)
-                      for k, p in tparams.iteritems()]
+                      for k, p in tparams.items()]
 
     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
@@ -348,7 +351,7 @@ def rmsprop(lr, tparams, grads, x, mask, y, cost):
 
     updir = [theano.shared(p.get_value() * numpy_floatX(0.),
                            name='%s_updir' % k)
-             for k, p in tparams.iteritems()]
+             for k, p in tparams.items()]
     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
                  for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
                                             running_grads2)]
@@ -418,7 +421,7 @@ def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
 
         n_done += len(valid_index)
         if verbose:
-            print '%d/%d samples classified' % (n_done, n_samples)
+            print('%d/%d samples classified' % (n_done, n_samples))
 
     return probs
 
@@ -470,11 +473,11 @@ def train_lstm(
 
     # Model options
     model_options = locals().copy()
-    print "model options", model_options
+    print("model options", model_options)
 
     load_data, prepare_data = get_dataset(dataset)
 
-    print 'Loading data'
+    print('Loading data')
     train, valid, test = load_data(n_words=n_words, valid_portion=0.05,
                                    maxlen=maxlen)
     if test_size > 0:
@@ -490,7 +493,7 @@ def train_lstm(
 
     model_options['ydim'] = ydim
 
-    print 'Building model'
+    print('Building model')
     # This create the initial parameters as numpy ndarrays.
     # Dict name (string) -> numpy ndarray
     params = init_params(model_options)
@@ -516,36 +519,36 @@ def train_lstm(
 
     f_cost = theano.function([x, mask, y], cost, name='f_cost')
 
-    grads = tensor.grad(cost, wrt=tparams.values())
+    grads = tensor.grad(cost, wrt=list(tparams.values()))
     f_grad = theano.function([x, mask, y], grads, name='f_grad')
 
     lr = tensor.scalar(name='lr')
     f_grad_shared, f_update = optimizer(lr, tparams, grads,
                                         x, mask, y, cost)
 
-    print 'Optimization'
+    print('Optimization')
 
     kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
     kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)
 
-    print "%d train examples" % len(train[0])
-    print "%d valid examples" % len(valid[0])
-    print "%d test examples" % len(test[0])
+    print("%d train examples" % len(train[0]))
+    print("%d valid examples" % len(valid[0]))
+    print("%d test examples" % len(test[0]))
 
     history_errs = []
     best_p = None
     bad_count = 0
 
     if validFreq == -1:
-        validFreq = len(train[0]) / batch_size
+        validFreq = len(train[0]) // batch_size
     if saveFreq == -1:
-        saveFreq = len(train[0]) / batch_size
+        saveFreq = len(train[0]) // batch_size
 
     uidx = 0  # the number of update done
     estop = False  # early stop
     start_time = time.time()
     try:
-        for eidx in xrange(max_epochs):
+        for eidx in range(max_epochs):
             n_samples = 0
 
             # Get new shuffled index for the training set.
@@ -569,22 +572,22 @@ def train_lstm(
                 f_update(lrate)
 
                 if numpy.isnan(cost) or numpy.isinf(cost):
-                    print 'bad cost detected: ', cost
+                    print('bad cost detected: ', cost)
                     return 1., 1., 1.
 
                 if numpy.mod(uidx, dispFreq) == 0:
-                    print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost
+                    print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost)
 
                 if saveto and numpy.mod(uidx, saveFreq) == 0:
-                    print 'Saving...',
+                    print('Saving...')
 
                     if best_p is not None:
                         params = best_p
                     else:
                         params = unzip(tparams)
                     numpy.savez(saveto, history_errs=history_errs, **params)
-                    pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
-                    print 'Done'
+                    pickle.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
+                    print('Done')
 
                 if numpy.mod(uidx, validFreq) == 0:
                     use_noise.set_value(0.)
@@ -602,7 +605,7 @@ def train_lstm(
                         best_p = unzip(tparams)
                         bad_counter = 0
 
-                    print ('Train ', train_err, 'Valid ', valid_err,
+                    print('Train ', train_err, 'Valid ', valid_err,
                            'Test ', test_err)
 
                     if (len(history_errs) > patience and
@@ -610,17 +613,17 @@ def train_lstm(
                                                                0].min()):
                         bad_counter += 1
                         if bad_counter > patience:
-                            print 'Early Stop!'
+                            print('Early Stop!')
                             estop = True
                             break
 
-            print 'Seen %d samples' % n_samples
+            print('Seen %d samples' % n_samples)
 
             if estop:
                 break
 
     except KeyboardInterrupt:
-        print "Training interupted"
+        print("Training interupted")
 
     end_time = time.time()
     if best_p is not None:
@@ -634,15 +637,15 @@ def train_lstm(
     valid_err = pred_error(f_pred, prepare_data, valid, kf_valid)
     test_err = pred_error(f_pred, prepare_data, test, kf_test)
 
-    print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err
+    print( 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err )
     if saveto:
         numpy.savez(saveto, train_err=train_err,
                     valid_err=valid_err, test_err=test_err,
                     history_errs=history_errs, **best_p)
-    print 'The code run for %d epochs, with %f sec/epochs' % (
-        (eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))
-    print >> sys.stderr, ('Training took %.1fs' %
-                          (end_time - start_time))
+    print('The code run for %d epochs, with %f sec/epochs' % (
+        (eidx + 1), (end_time - start_time) / (1. * (eidx + 1))))
+    print( ('Training took %.1fs' %
+            (end_time - start_time)), file=sys.stderr)
     return train_err, valid_err, test_err
 
 
diff --git a/code/mlp.py b/code/mlp.py
index 18f34e7c..e865bc8f 100644
--- a/code/mlp.py
+++ b/code/mlp.py
@@ -18,6 +18,9 @@
                  Christopher M. Bishop, section 5
 
 """
+
+from __future__ import print_function
+
 __docformat__ = 'restructedtext en'
 
 
@@ -231,14 +234,14 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
     test_set_x, test_set_y = datasets[2]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
-    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
 
     ######################
     # BUILD ACTUAL MODEL #
     ######################
-    print '... building the model'
+    print('... building the model')
 
     # allocate symbolic variables for the data
     index = T.lscalar()  # index to a [mini]batch
@@ -289,7 +292,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
     )
 
     # start-snippet-5
-    # compute the gradient of cost with respect to theta (sotred in params)
+    # compute the gradient of cost with respect to theta (sorted in params)
     # the resulting gradients will be stored in a list gparams
     gparams = [T.grad(cost, param) for param in classifier.params]
 
@@ -322,7 +325,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
     ###############
     # TRAIN MODEL #
     ###############
-    print '... training'
+    print('... training')
 
     # early-stopping parameters
     patience = 10000  # look as this many examples regardless
@@ -330,7 +333,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
                            # found
     improvement_threshold = 0.995  # a relative improvement of this much is
                                    # considered significant
-    validation_frequency = min(n_train_batches, patience / 2)
+    validation_frequency = min(n_train_batches, patience // 2)
                                   # go through this many
                                   # minibatche before checking the network
                                   # on the validation set; in this case we
@@ -346,7 +349,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
 
     while (epoch < n_epochs) and (not done_looping):
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
             minibatch_avg_cost = train_model(minibatch_index)
             # iteration number
@@ -355,7 +358,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             if (iter + 1) % validation_frequency == 0:
                 # compute zero-one loss on validation set
                 validation_losses = [validate_model(i) for i
-                                     in xrange(n_valid_batches)]
+                                     in range(n_valid_batches)]
                 this_validation_loss = numpy.mean(validation_losses)
 
                 print(
@@ -382,7 +385,7 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
 
                     # test it on the test set
                     test_losses = [test_model(i) for i
-                                   in xrange(n_test_batches)]
+                                   in range(n_test_batches)]
                     test_score = numpy.mean(test_losses)
 
                     print(('     epoch %i, minibatch %i/%i, test error of '
@@ -398,9 +401,9 @@ def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
     print(('Optimization complete. Best validation score of %f %% '
            'obtained at iteration %i, with test performance %f %%') %
           (best_validation_loss * 100., best_iter + 1, test_score * 100.))
-    print >> sys.stderr, ('The code for file ' +
-                          os.path.split(__file__)[1] +
-                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
+    print(('The code for file ' +
+           os.path.split(__file__)[1] +
+           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/code/rbm.py b/code/rbm.py
index 1ba4c86d..6e4f1012 100644
--- a/code/rbm.py
+++ b/code/rbm.py
@@ -4,6 +4,9 @@
 contain hidden variables. Restricted Boltzmann Machines further restrict BMs
 to those without visible-visible and hidden-hidden connections.
 """
+
+from __future__ import print_function
+
 import timeit
 
 try:
@@ -17,7 +20,7 @@
 import theano.tensor as T
 import os
 
-from theano.tensor.shared_randomstreams import RandomStreams
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 
 from utils import tile_raster_images
 from logistic_sgd import load_data
@@ -254,7 +257,8 @@ def get_cost_updates(self, lr=0.1, persistent=None, k=1):
             # chain_start is the initial state corresponding to the
             # 6th output
             outputs_info=[None, None, None, None, None, chain_start],
-            n_steps=k
+            n_steps=k,
+            name="gibbs_hvh"
         )
         # start-snippet-3
         # determine gradients on RBM parameters
@@ -384,7 +388,7 @@ def test_rbm(learning_rate=0.1, training_epochs=15,
     test_set_x, test_set_y = datasets[2]
 
     # compute number of minibatches for training, validation and testing
-    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
 
     # allocate symbolic variables for the data
     index = T.lscalar()    # index to a [mini]batch
@@ -431,14 +435,14 @@ def test_rbm(learning_rate=0.1, training_epochs=15,
     start_time = timeit.default_timer()
 
     # go through training epochs
-    for epoch in xrange(training_epochs):
+    for epoch in range(training_epochs):
 
         # go through the training set
         mean_cost = []
-        for batch_index in xrange(n_train_batches):
+        for batch_index in range(n_train_batches):
             mean_cost += [train_rbm(batch_index)]
 
-        print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)
+        print('Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost))
 
         # Plot filters after each training epoch
         plotting_start = timeit.default_timer()
@@ -493,7 +497,8 @@ def test_rbm(learning_rate=0.1, training_epochs=15,
     ) = theano.scan(
         rbm.gibbs_vhv,
         outputs_info=[None, None, None, None, None, persistent_vis_chain],
-        n_steps=plot_every
+        n_steps=plot_every,
+        name="gibbs_vhv"
     )
 
     # add to updates the shared variable that takes care of our persistent
@@ -518,11 +523,11 @@ def test_rbm(learning_rate=0.1, training_epochs=15,
         (29 * n_samples + 1, 29 * n_chains - 1),
         dtype='uint8'
     )
-    for idx in xrange(n_samples):
+    for idx in range(n_samples):
         # generate `plot_every` intermediate samples that we discard,
         # because successive samples in the chain are too correlated
         vis_mf, vis_sample = sample_fn()
-        print ' ... plotting sample ', idx
+        print(' ... plotting sample %d' % idx)
         image_data[29 * idx:29 * idx + 28, :] = tile_raster_images(
             X=vis_mf,
             img_shape=(28, 28),
diff --git a/code/rnnrbm.py b/code/rnnrbm.py
index e1f40b5a..43bda691 100644
--- a/code/rnnrbm.py
+++ b/code/rnnrbm.py
@@ -3,6 +3,8 @@
 # RNN-RBM deep learning tutorial
 # More information at http://deeplearning.net/tutorial/rnnrbm.html
 
+from __future__ import print_function
+
 import glob
 import os
 import sys
@@ -11,15 +13,13 @@
 try:
     import pylab
 except ImportError:
-    print (
-        "pylab isn't available. If you use its functionality, it will crash."
-    )
-    print "It can be installed with 'pip install -q Pillow'"
+    print ("pylab isn't available. If you use its functionality, it will crash.")
+    print("It can be installed with 'pip install -q Pillow'")
 
 from midi.utils import midiread, midiwrite
 import theano
 import theano.tensor as T
-from theano.tensor.shared_randomstreams import RandomStreams
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 
 #Don't use a python long as this don't work on 32 bits computers.
 numpy.random.seed(0xbeef)
@@ -248,21 +248,21 @@ def train(self, files, batch_size=100, num_epochs=200):
                    for f in files]
 
         try:
-            for epoch in xrange(num_epochs):
+            for epoch in range(num_epochs):
                 numpy.random.shuffle(dataset)
                 costs = []
 
                 for s, sequence in enumerate(dataset):
-                    for i in xrange(0, len(sequence), batch_size):
+                    for i in range(0, len(sequence), batch_size):
                         cost = self.train_function(sequence[i:i + batch_size])
                         costs.append(cost)
 
-                print 'Epoch %i/%i' % (epoch + 1, num_epochs),
-                print numpy.mean(costs)
+                print('Epoch %i/%i' % (epoch + 1, num_epochs))
+                print(numpy.mean(costs))
                 sys.stdout.flush()
 
         except KeyboardInterrupt:
-            print 'Interrupted by user.'
+            print('Interrupted by user.')
 
     def generate(self, filename, show=True):
         '''Generate a sample sequence, plot the resulting piano-roll and save
@@ -288,7 +288,8 @@ def generate(self, filename, show=True):
 
 def test_rnnrbm(batch_size=100, num_epochs=200):
     model = RnnRbm()
-    re = os.path.join(os.path.split(os.path.dirname(__file__))[0],
+    cwd = os.path.dirname(os.path.abspath(__file__))
+    re = os.path.join(os.path.split(cwd)[0],
                       'data', 'Nottingham', 'train', '*.mid')
     model.train(glob.glob(re),
                 batch_size=batch_size, num_epochs=num_epochs)
diff --git a/code/rnnslu.py b/code/rnnslu.py
index 2ea55978..d020db59 100644
--- a/code/rnnslu.py
+++ b/code/rnnslu.py
@@ -1,6 +1,9 @@
+
+from __future__ import print_function
+import six.moves.cPickle as pickle
+
 from collections import OrderedDict
 import copy
-import cPickle
 import gzip
 import os
 import urllib
@@ -66,7 +69,10 @@ def atisfold(fold):
     assert fold in range(5)
     filename = os.path.join(PREFIX, 'atis.fold'+str(fold)+'.pkl.gz')
     f = gzip.open(filename, 'rb')
-    train_set, valid_set, test_set, dicts = cPickle.load(f)
+    try:
+        train_set, valid_set, test_set, dicts = pickle.load(f, encoding='latin1')
+    except:
+        train_set, valid_set, test_set, dicts = pickle.load(f)
     return train_set, valid_set, test_set, dicts
 
 
@@ -101,36 +107,27 @@ def conlleval(p, g, w, filename, script_path):
 
     return get_perf(filename, script_path)
 
-
-def download(origin, destination):
-    '''
-    download the corresponding atis file
-    from http://www-etud.iro.umontreal.ca/~mesnilgr/atis/
-    '''
-    print 'Downloading data from %s' % origin
-    urllib.urlretrieve(origin, destination)
-
-
 def get_perf(filename, folder):
     ''' run conlleval.pl perl script to obtain
     precision/recall and F1 score '''
     _conlleval = os.path.join(folder, 'conlleval.pl')
-    if not os.path.isfile(_conlleval):
-        url = 'http://www-etud.iro.umontreal.ca/~mesnilgr/atis/conlleval.pl'
-        download(url, _conlleval)
-        os.chmod(_conlleval, stat.S_IRWXU)  # give the execute permissions
 
     proc = subprocess.Popen(["perl",
                             _conlleval],
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)
 
-    stdout, _ = proc.communicate(''.join(open(filename).readlines()))
+    stdout, _ = proc.communicate(''.join(open(filename).readlines()).encode('utf-8'))
+    stdout = stdout.decode('utf-8')
+    out = None
+
     for line in stdout.split('\n'):
         if 'accuracy' in line:
             out = line.split()
             break
-
+    # To help debug
+    if out is None:
+        print(stdout.split('\n'))
     precision = float(out[6][:-2])
     recall = float(out[8][:-2])
     f1score = float(out[10])
@@ -234,7 +231,7 @@ def recurrence(x_t, h_tm1):
     def train(self, x, y, window_size, learning_rate):
 
         cwords = contextwin(x, window_size)
-        words = map(lambda x: numpy.asarray(x).astype('int32'), cwords)
+        words = list(map(lambda x: numpy.asarray(x).astype('int32'), cwords))
         labels = y
 
         self.sentence_train(words, labels, learning_rate)
@@ -271,18 +268,19 @@ def main(param=None):
             'nepochs': 60,
             # 60 is recommended
             'savemodel': False}
-    print param
+    print(param)
 
     folder_name = os.path.basename(__file__).split('.')[0]
     folder = os.path.join(os.path.dirname(__file__), folder_name)
     if not os.path.exists(folder):
         os.mkdir(folder)
+    script_path = os.path.dirname(__file__)
 
     # load the dataset
     train_set, valid_set, test_set, dic = atisfold(param['fold'])
 
-    idx2label = dict((k, v) for v, k in dic['labels2idx'].iteritems())
-    idx2word = dict((k, v) for v, k in dic['words2idx'].iteritems())
+    idx2label = dict((k, v) for v, k in dic['labels2idx'].items())
+    idx2word = dict((k, v) for v, k in dic['words2idx'].items())
 
     train_lex, train_ne, train_y = train_set
     valid_lex, valid_ne, valid_y = valid_set
@@ -310,7 +308,7 @@ def main(param=None):
     # train with early stopping on validation set
     best_f1 = -numpy.inf
     param['clr'] = param['lr']
-    for e in xrange(param['nepochs']):
+    for e in range(param['nepochs']):
 
         # shuffle
         shuffle([train_lex, train_ne, train_y], param['seed'])
@@ -320,9 +318,9 @@ def main(param=None):
 
         for i, (x, y) in enumerate(zip(train_lex, train_y)):
             rnn.train(x, y, param['win'], param['clr'])
-            print '[learning] epoch %i >> %2.2f%%' % (
-                e, (i + 1) * 100. / nsentences),
-            print 'completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic),
+            print('[learning] epoch %i >> %2.2f%%' % (
+                e, (i + 1) * 100. / nsentences), end=' ')
+            print('completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic), end='')
             sys.stdout.flush()
 
         # evaluation // back into the real world : idx -> words
@@ -340,12 +338,12 @@ def main(param=None):
                              groundtruth_test,
                              words_test,
                              folder + '/current.test.txt',
-                             folder)
+                             script_path)
         res_valid = conlleval(predictions_valid,
                               groundtruth_valid,
                               words_valid,
                               folder + '/current.valid.txt',
-                              folder)
+                              script_path)
 
         if res_valid['f1'] > best_f1:
 
@@ -371,7 +369,7 @@ def main(param=None):
                             folder + '/best.valid.txt'])
         else:
             if param['verbose']:
-                print ''
+                print('')
 
         # learning rate decay if no improvement in 10 epochs
         if param['decay'] and abs(param['be']-param['ce']) >= 10:
@@ -382,9 +380,9 @@ def main(param=None):
             break
 
     print('BEST RESULT: epoch', param['be'],
-          'valid F1', param['vf1'],
-          'best test F1', param['tf1'],
-          'with the model', folder)
+           'valid F1', param['vf1'],
+           'best test F1', param['tf1'],
+           'with the model', folder)
 
 
 if __name__ == '__main__':
diff --git a/code/test.py b/code/test.py
index 76c95b38..8768d8c1 100644
--- a/code/test.py
+++ b/code/test.py
@@ -1,3 +1,4 @@
+from __future__ import absolute_import, print_function, division
 import sys
 
 import numpy
@@ -15,10 +16,6 @@
 import lstm
 
 
-def test_rnnslu():
-    rnnslu.main()
-
-
 def test_logistic_sgd():
     logistic_sgd.sgd_optimization_mnist(n_epochs=10)
 
@@ -62,6 +59,26 @@ def test_rnnrbm():
     rnnrbm.test_rnnrbm(num_epochs=1)
 
 
+def test_rnnslu():
+    s = {'fold': 3,
+         # 5 folds 0,1,2,3,4
+         'data': 'atis',
+         'lr': 0.0970806646812754,
+         'verbose': 1,
+         'decay': True,
+         # decay on the learning rate if improvement stops
+         'win': 7,
+         # number of words in the context window
+         'nhidden': 200,
+         # number of hidden units
+         'seed': 345,
+         'emb_dimension': 50,
+         # dimension of word embedding
+         'nepochs': 1, # CHANGED
+         'savemodel': False}
+    rnnslu.main(s)
+
+
 def test_lstm():
     lstm.train_lstm(max_epochs=1, test_size=1000, saveto='')
 
@@ -81,52 +98,16 @@ def speed():
     do_gpu = True
 
     algo_executed = [s for idx, s in enumerate(algo) if to_exec[idx]]
-    #Timming expected are from the buildbot that have an i7-920 @
-    # 2.67GHz with hyperthread enabled for the cpu, 12G of ram. An GeForce GTX
-    # 580 for the GPU. OS=Fedora 14, gcc=4.5.1, python/BLAS from EPD
-    # 7.1-2 (python 2.7.2, mkl unknow). BLAS with only 1 thread.
-
-    expected_times_64 = numpy.asarray([9.3, 21.0, 76.1, 73.7, 116.4,
-                                       346.9, 355.0, 510.9, 130.4, 23.2, 106])
-    expected_times_32 = numpy.asarray([6.4, 14.7, 42.5, 66.5, 71,
-                                       191.2, 199.0, 400.4, 119.5, 36.9, 67.2])
-
-    # Number with just 1 decimal are new value that are faster with
-    # the Theano version 0.5rc2 Other number are older. They are not
-    # updated, as we where faster in the past!
-    # TODO: find why and fix this!
-
-# Here is the value for the buildbot on February 3th 2012 with a GTX 285
-#              sgd,         cg           mlp          conv        da
-#              sda          dbn          rbm
-#    gpu times[3.72957802,  9.94316864,  29.1772666,  9.13857198, 25.91144657,
-#              18.30802011, 53.38651466, 285.41386175]
-#    expected [3.076634879, 7.555234910, 18.99226785, 9.58915591, 24.130070450,
-#              24.77524018, 92.66246653, 322.340329170]
-#              sgd,         cg           mlp          conv        da
-#              sda          dbn          rbm
-#expected/get [0.82492841,  0.75984178,  0.65092691,  1.04930573, 0.93125138
-#              1.35324519 1.7356905   1.12937868]
-
-    expected_times_gpu = numpy.asarray([3.0, 7.55523491, 18.99226785,
-                                        5.8, 20.0,
-                                        11.2, 17.2, 244.3, 118.8, 34.2, 8.7])
-    expected_times_64 = [s for idx, s in enumerate(expected_times_64)
-                         if to_exec[idx]]
-    expected_times_32 = [s for idx, s in enumerate(expected_times_32)
-                         if to_exec[idx]]
-    expected_times_gpu = [s for idx, s in enumerate(expected_times_gpu)
-                          if to_exec[idx]]
-
+ 
     def time_test(m, l, idx, f, **kwargs):
         if not to_exec[idx]:
             return
-        print algo[idx]
+        print(algo[idx])
         ts = m.call_time
         try:
             f(**kwargs)
-        except Exception, e:
-            print >> sys.stderr, 'test', algo[idx], 'FAILED', e
+        except Exception as e:
+            print('test', algo[idx], 'FAILED', e, file=sys.stderr)
             l.append(numpy.nan)
             return
         te = m.call_time
@@ -171,114 +152,88 @@ def do_tests():
                   saveto='')
         return numpy.asarray(l)
 
+    # Initialize test count and results dictionnary
+    test_total = 0
+    times_dic = {}
+
     #test in float64 in FAST_RUN mode on the cpu
     import theano
     if do_float64:
         theano.config.floatX = 'float64'
         theano.config.mode = 'FAST_RUN'
         float64_times = do_tests()
-        print >> sys.stderr, algo_executed
-        print >> sys.stderr, 'float64 times', float64_times
-        print >> sys.stderr, 'float64 expected', expected_times_64
-        print >> sys.stderr, 'float64 % expected/get', (
-            expected_times_64 / float64_times)
+        times_dic['float64'] = float64_times
+        test_total += numpy.size(float64_times)
+        print(algo_executed, file=sys.stderr)
+        print('float64 times', float64_times, file=sys.stderr)
 
     #test in float32 in FAST_RUN mode on the cpu
     theano.config.floatX = 'float32'
     if do_float32:
         float32_times = do_tests()
-        print >> sys.stderr, algo_executed
-        print >> sys.stderr, 'float32 times', float32_times
-        print >> sys.stderr, 'float32 expected', expected_times_32
-        print >> sys.stderr, 'float32 % expected/get', (
-            expected_times_32 / float32_times)
+        times_dic['float32'] = float32_times
+        test_total += numpy.size(float32_times)
+        print(algo_executed, file=sys.stderr)
+        print('float32 times', float32_times, file=sys.stderr)
 
         if do_float64:
-            print >> sys.stderr, 'float64/float32', (
-                float64_times / float32_times)
-            print >> sys.stderr
-            print >> sys.stderr, ('Duplicate the timing to have everything '
-                                  'in one place')
-            print >> sys.stderr, algo_executed
-            print >> sys.stderr, 'float64 times', float64_times
-            print >> sys.stderr, 'float64 expected', expected_times_64
-            print >> sys.stderr, 'float64 % expected/get', (
-                expected_times_64 / float64_times)
-            print >> sys.stderr, 'float32 times', float32_times
-            print >> sys.stderr, 'float32 expected', expected_times_32
-            print >> sys.stderr, 'float32 % expected/get', (
-                expected_times_32 / float32_times)
-
-            print >> sys.stderr, 'float64/float32', (
-                float64_times / float32_times)
-            print >> sys.stderr, 'expected float64/float32', (
-                expected_times_64 / float32_times)
+            print('float64/float32', (
+                float64_times / float32_times), file=sys.stderr)
+            print(file=sys.stderr)
+            print(('Duplicate the timing to have everything '
+                                  'in one place'), file=sys.stderr)
+            print(algo_executed, file=sys.stderr)
+            print('float64 times', float64_times, file=sys.stderr)
+            print('float32 times', float32_times, file=sys.stderr)
+
+            print('float64/float32', (
+                float64_times / float32_times), file=sys.stderr)
 
     #test in float32 in FAST_RUN mode on the gpu
-    import theano.sandbox.cuda
+    import theano.gpuarray
     if do_gpu:
-        theano.sandbox.cuda.use('gpu')
+        theano.gpuarray.use('cuda')
         gpu_times = do_tests()
-        print >> sys.stderr, algo_executed
-        print >> sys.stderr, 'gpu times', gpu_times
-        print >> sys.stderr, 'gpu expected', expected_times_gpu
-        print >> sys.stderr, 'gpu % expected/get', (
-            expected_times_gpu / gpu_times)
+        times_dic['gpu'] = gpu_times
+        test_total += numpy.size(gpu_times)
+        print(algo_executed, file=sys.stderr)
+        print('gpu times', gpu_times, file=sys.stderr)
 
         if do_float64:
-            print >> sys.stderr, 'float64/gpu', float64_times / gpu_times
+            print('float64/gpu', float64_times / gpu_times, file=sys.stderr)
 
         if (do_float64 + do_float32 + do_gpu) > 1:
-            print >> sys.stderr
-            print >> sys.stderr, ('Duplicate the timing to have everything '
-                                  'in one place')
-            print >> sys.stderr, algo_executed
+            print(file=sys.stderr)
+            print(('Duplicate the timing to have everything '
+                                  'in one place'), file=sys.stderr)
+            print(algo_executed, file=sys.stderr)
             if do_float64:
-                print >> sys.stderr, 'float64 times', float64_times
-                print >> sys.stderr, 'float64 expected', expected_times_64
-                print >> sys.stderr, 'float64 % expected/get', (
-                    expected_times_64 / float64_times)
+                print('float64 times', float64_times, file=sys.stderr)
             if do_float32:
-                print >> sys.stderr, 'float32 times', float32_times
-                print >> sys.stderr, 'float32 expected', expected_times_32
-                print >> sys.stderr, 'float32 % expected/get', (
-                    expected_times_32 / float32_times)
+                print('float32 times', float32_times, file=sys.stderr)
             if do_gpu:
-                print >> sys.stderr, 'gpu times', gpu_times
-                print >> sys.stderr, 'gpu expected', expected_times_gpu
-                print >> sys.stderr, 'gpu % expected/get', (
-                    expected_times_gpu / gpu_times)
+                print('gpu times', gpu_times, file=sys.stderr)
 
-            print
+            print()
             if do_float64 and do_float32:
-                print >> sys.stderr, 'float64/float32', (
-                    float64_times / float32_times)
-                print >> sys.stderr, 'expected float64/float32', (
-                    expected_times_64 / float32_times)
+                print('float64/float32', (
+                    float64_times / float32_times), file=sys.stderr)
             if do_float64 and do_gpu:
-                print >> sys.stderr, 'float64/gpu', float64_times / gpu_times
-                print >> sys.stderr, 'expected float64/gpu', (
-                    expected_times_64 / gpu_times)
+                print('float64/gpu', float64_times / gpu_times, file=sys.stderr)
             if do_float32 and do_gpu:
-                print >> sys.stderr, 'float32/gpu', float32_times / gpu_times
-                print >> sys.stderr, 'expected float32/gpu', (
-                    expected_times_32 / gpu_times)
-
-    def compare(x, y):
-        ratio = x / y
-        # If there is more then 5% difference between the expected
-        # time and the real time, we consider this an error.
-        return sum((ratio < 0.95) + (ratio > 1.05))
+                print('float32/gpu', float32_times / gpu_times, file=sys.stderr)
+        
+    # Generate JUnit performance report
+    for label, times in times_dic.items():
+        with open('speedtests_{label}.xml'.format(label=label), 'w') as f:
+            f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+            f.write('<testsuite name="dlt_speedtests_{label}" tests="{ntests}">\n'
+                    .format(label=label, ntests=test_total/len(times_dic)))
+            for algo, time in zip(algo_executed, times):
+                f.write('   <testcase classname="speed.{label}" name="{algo}" time="{time}">'
+                        .format(label=label, algo=algo, time=time))
+                f.write('   </testcase>\n')
+            f.write('</testsuite>\n')
 
-    print
-    if do_float64:
-        err = compare(expected_times_64, float64_times)
-        print >> sys.stderr, 'speed_failure_float64=' + str(err)
-    if do_float32:
-        err = compare(expected_times_32, float32_times)
-        print >> sys.stderr, 'speed_failure_float32=' + str(err)
     if do_gpu:
-        err = compare(expected_times_gpu, gpu_times)
-        print >> sys.stderr, 'speed_failure_gpu=' + str(err)
-
         assert not numpy.isnan(gpu_times).any()
diff --git a/code/unet/Unet_lasagne_recipes.py b/code/unet/Unet_lasagne_recipes.py
new file mode 100644
index 00000000..ff7a02f0
--- /dev/null
+++ b/code/unet/Unet_lasagne_recipes.py
@@ -0,0 +1,75 @@
+# start-snippet-1
+__author__ = 'Fabian Isensee'
+from collections import OrderedDict
+from lasagne.layers import (InputLayer, ConcatLayer, Pool2DLayer, ReshapeLayer, DimshuffleLayer, NonlinearityLayer,
+                            DropoutLayer, Deconv2DLayer, batch_norm)
+try:
+    from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer
+except ImportError:
+    from lasagne.layers import Conv2DLayer as ConvLayer
+import lasagne
+from lasagne.init import HeNormal
+# end-snippet-1
+
+# start-snippet-downsampling
+def build_UNet(n_input_channels=1, BATCH_SIZE=None, num_output_classes=2, pad='same', nonlinearity=lasagne.nonlinearities.elu, input_dim=(None, None), base_n_filters=64, do_dropout=False):
+    net = OrderedDict()
+    net['input'] = InputLayer((BATCH_SIZE, n_input_channels, input_dim[0], input_dim[1]))
+
+    net['contr_1_1'] = batch_norm(ConvLayer(net['input'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+    net['contr_1_2'] = batch_norm(ConvLayer(net['contr_1_1'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+    net['pool1'] = Pool2DLayer(net['contr_1_2'], 2)
+
+    net['contr_2_1'] = batch_norm(ConvLayer(net['pool1'], base_n_filters*2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+    net['contr_2_2'] = batch_norm(ConvLayer(net['contr_2_1'], base_n_filters*2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+    net['pool2'] = Pool2DLayer(net['contr_2_2'], 2)
+
+    net['contr_3_1'] = batch_norm(ConvLayer(net['pool2'], base_n_filters*4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+    net['contr_3_2'] = batch_norm(ConvLayer(net['contr_3_1'], base_n_filters*4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+    net['pool3'] = Pool2DLayer(net['contr_3_2'], 2)
+
+    net['contr_4_1'] = batch_norm(ConvLayer(net['pool3'], base_n_filters*8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+    net['contr_4_2'] = batch_norm(ConvLayer(net['contr_4_1'], base_n_filters*8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+    l = net['pool4'] = Pool2DLayer(net['contr_4_2'], 2)
+	# end-snippet-downsampling
+
+    # start-snippet-bottleneck
+    # the paper does not really describe where and how dropout is added. Feel free to try more options
+    if do_dropout:
+        l = DropoutLayer(l, p=0.4)
+
+    net['encode_1'] = batch_norm(ConvLayer(l, base_n_filters*16, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+    net['encode_2'] = batch_norm(ConvLayer(net['encode_1'], base_n_filters*16, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+   	# end-snippet-bottleneck
+
+    # start-snippet-upsampling
+    net['upscale1'] = batch_norm(Deconv2DLayer(net['encode_2'], base_n_filters*16, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu")))
+    net['concat1'] = ConcatLayer([net['upscale1'], net['contr_4_2']], cropping=(None, None, "center", "center"))
+    net['expand_1_1'] = batch_norm(ConvLayer(net['concat1'], base_n_filters*8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+    net['expand_1_2'] = batch_norm(ConvLayer(net['expand_1_1'], base_n_filters*8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+
+    net['upscale2'] = batch_norm(Deconv2DLayer(net['expand_1_2'], base_n_filters*8, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu")))
+    net['concat2'] = ConcatLayer([net['upscale2'], net['contr_3_2']], cropping=(None, None, "center", "center"))
+    net['expand_2_1'] = batch_norm(ConvLayer(net['concat2'], base_n_filters*4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+    net['expand_2_2'] = batch_norm(ConvLayer(net['expand_2_1'], base_n_filters*4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+
+    net['upscale3'] = batch_norm(Deconv2DLayer(net['expand_2_2'], base_n_filters*4, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu")))
+    net['concat3'] = ConcatLayer([net['upscale3'], net['contr_2_2']], cropping=(None, None, "center", "center"))
+    net['expand_3_1'] = batch_norm(ConvLayer(net['concat3'], base_n_filters*2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+    net['expand_3_2'] = batch_norm(ConvLayer(net['expand_3_1'], base_n_filters*2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+
+    net['upscale4'] = batch_norm(Deconv2DLayer(net['expand_3_2'], base_n_filters*2, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu")))
+    net['concat4'] = ConcatLayer([net['upscale4'], net['contr_1_2']], cropping=(None, None, "center", "center"))
+    net['expand_4_1'] = batch_norm(ConvLayer(net['concat4'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+    net['expand_4_2'] = batch_norm(ConvLayer(net['expand_4_1'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu")))
+    # end-snippet-upsampling
+
+    # start-snippet-output
+    net['output_segmentation'] = ConvLayer(net['expand_4_2'], num_output_classes, 1, nonlinearity=None)
+    net['dimshuffle'] = DimshuffleLayer(net['output_segmentation'], (1, 0, 2, 3))
+    net['reshapeSeg'] = ReshapeLayer(net['dimshuffle'], (num_output_classes, -1))
+    net['dimshuffle2'] = DimshuffleLayer(net['reshapeSeg'], (1, 0))
+    net['output_flattened'] = NonlinearityLayer(net['dimshuffle2'], nonlinearity=lasagne.nonlinearities.softmax)
+
+    return net
+# end-snippet-output
diff --git a/code/unet/train_unet.py b/code/unet/train_unet.py
new file mode 100644
index 00000000..87136e27
--- /dev/null
+++ b/code/unet/train_unet.py
@@ -0,0 +1,419 @@
+#!/usr/bin/env python2
+from __future__ import absolute_import, print_function, division
+import os
+import argparse
+import time
+import json
+from distutils.dir_util import copy_tree
+
+import numpy as np
+import theano
+import theano.tensor as T
+from theano import config
+import lasagne
+from lasagne.regularization import regularize_network_params
+
+
+from dataset_loaders.images.isbi_em_stacks import IsbiEmStacksDataset
+from Unet_lasagne_recipes import build_UNet
+
+
+_FLOATX = config.floatX
+_EPSILON = 10e-7
+
+
+def jaccard_metric(y_pred, y_true, n_classes, one_hot=False):
+
+    assert (y_pred.ndim == 2) or (y_pred.ndim == 1)
+
+    # y_pred to indices
+    if y_pred.ndim == 2:
+        y_pred = T.argmax(y_pred, axis=1)
+
+    if one_hot:
+        y_true = T.argmax(y_true, axis=1)
+
+    # Compute confusion matrix
+    # cm = T.nnet.confusion_matrix(y_pred, y_true)
+    cm = T.zeros((n_classes, n_classes))
+    for i in range(n_classes):
+        for j in range(n_classes):
+            cm = T.set_subtensor(
+                cm[i, j], T.sum(T.eq(y_pred, i) * T.eq(y_true, j)))
+
+    # Compute Jaccard Index
+    TP_perclass = T.cast(cm.diagonal(), _FLOATX)
+    FP_perclass = cm.sum(1) - TP_perclass
+    FN_perclass = cm.sum(0) - TP_perclass
+
+    num = TP_perclass
+    denom = TP_perclass + FP_perclass + FN_perclass
+
+    return T.stack([num, denom], axis=0)
+
+
+def accuracy_metric(y_pred, y_true, void_labels, one_hot=False):
+
+    assert (y_pred.ndim == 2) or (y_pred.ndim == 1)
+
+    # y_pred to indices
+    if y_pred.ndim == 2:
+        y_pred = T.argmax(y_pred, axis=1)
+
+    if one_hot:
+        y_true = T.argmax(y_true, axis=1)
+
+    # Compute accuracy
+    acc = T.eq(y_pred, y_true).astype(_FLOATX)
+
+    # Create mask
+    mask = T.ones_like(y_true, dtype=_FLOATX)
+    for el in void_labels:
+        indices = T.eq(y_true, el).nonzero()
+        if any(indices):
+            mask = T.set_subtensor(mask[indices], 0.)
+
+    # Apply mask
+    acc *= mask
+    acc = T.sum(acc) / T.sum(mask)
+
+    return acc
+
+
+def crossentropy_metric(y_pred, y_true, void_labels, one_hot=False):
+    # Clip predictions
+    y_pred = T.clip(y_pred, _EPSILON, 1.0 - _EPSILON)
+
+    if one_hot:
+        y_true = T.argmax(y_true, axis=1)
+
+    # Create mask
+    mask = T.ones_like(y_true, dtype=_FLOATX)
+    for el in void_labels:
+        mask = T.set_subtensor(mask[T.eq(y_true, el).nonzero()], 0.)
+
+    # Modify y_true temporarily
+    y_true_tmp = y_true * mask
+    y_true_tmp = y_true_tmp.astype('int32')
+
+    # Compute cross-entropy
+    loss = T.nnet.categorical_crossentropy(y_pred, y_true_tmp)
+
+    # Compute masked mean loss
+    loss *= mask
+    loss = T.sum(loss) / T.sum(mask)
+
+    return loss
+
+
+SAVEPATH = 'save_models/'
+LOADPATH = SAVEPATH
+WEIGHTS_PATH = SAVEPATH
+
+
+def train(dataset, learn_step=0.005,
+          weight_decay=1e-4, num_epochs=500,
+          max_patience=100, data_augmentation={},
+          savepath=None, loadpath=None,
+          early_stop_class=None,
+          batch_size=None,
+          resume=False,
+          train_from_0_255=False):
+
+    #
+    # Prepare load/save directories
+    #
+    exp_name = 'unet_' + 'data_aug' if bool(data_augmentation) else ''
+
+    if savepath is None:
+        raise ValueError('A saving directory must be specified')
+
+    savepath = os.path.join(savepath, dataset, exp_name)
+    # loadpath = os.path.join(loadpath, dataset, exp_name)
+    print(savepath)
+    # print loadpath
+
+    if not os.path.exists(savepath):
+        os.makedirs(savepath)
+    else:
+        print('\033[93m The following folder already exists {}. '
+              'It will be overwritten in a few seconds...\033[0m'.format(
+                  savepath))
+
+    print('Saving directory : ' + savepath)
+    with open(os.path.join(savepath, "config.txt"), "w") as f:
+        for key, value in locals().items():
+            f.write('{} = {}\n'.format(key, value))
+
+    #
+    # Define symbolic variables
+    #
+    input_var = T.tensor4('input_var')
+    target_var = T.ivector('target_var')
+
+    #
+    # Build dataset iterator
+    #
+    if batch_size is not None:
+        bs = batch_size
+    else:
+        bs = [10, 1, 1]
+
+
+    train_iter = IsbiEmStacksDataset(which_set='train',
+                                     batch_size=batch_size[0],
+                                     seq_per_subset=0,
+                                     seq_length=0,
+                                     data_augm_kwargs=data_augmentation,
+                                     return_one_hot=False,
+                                     return_01c=False,
+                                     overlap=0,
+                                     use_threads=True,
+                                     shuffle_at_each_epoch=True,
+                                     return_list=True,
+                                     return_0_255=False)
+
+    val_iter = IsbiEmStacksDataset(which_set='val',
+                                   batch_size=batch_size[1],
+                                   seq_per_subset=0,
+                                   seq_length=0,
+                                   return_one_hot=False,
+                                   return_01c=False,
+                                   use_threads=True,
+                                   shuffle_at_each_epoch=False,
+                                   return_list=True,
+                                   return_0_255=False)
+    test_iter = None
+
+    batch = train_iter.next()
+    input_dim = (np.shape(batch[0])[2], np.shape(batch[0])[3]) #(x,y) image shape
+
+
+    n_batches_train = train_iter.nbatches
+    n_batches_val = val_iter.nbatches
+    n_batches_test = test_iter.nbatches if test_iter is not None else 0
+    n_classes = train_iter.non_void_nclasses
+    void_labels = train_iter.void_labels
+    nb_in_channels = train_iter.data_shape[0]
+
+    print("Batch. train: %d, val %d, test %d" % (n_batches_train, n_batches_val, n_batches_test))
+    print("Nb of classes: %d" % (n_classes))
+    print("Nb. of input channels: %d" % (nb_in_channels))
+
+    #
+    # Build network
+    #
+
+    net = build_UNet(n_input_channels= nb_in_channels,# BATCH_SIZE = batch_size,
+                num_output_classes = n_classes, base_n_filters = 64, do_dropout=False,
+                input_dim = (None, None))
+
+    output_layer = net["output_flattened"]
+    #
+    # Define and compile theano functions
+    #
+    print("Defining and compiling training functions")
+    prediction = lasagne.layers.get_output(output_layer, input_var)
+    loss = crossentropy_metric(prediction, target_var, void_labels)
+
+    if weight_decay > 0:
+        weightsl2 = regularize_network_params(output_layer, lasagne.regularization.l2)
+        loss += weight_decay * weightsl2
+
+    params = lasagne.layers.get_all_params(output_layer, trainable=True)
+    updates = lasagne.updates.adam(loss, params, learning_rate=learn_step)
+
+    train_fn = theano.function([input_var, target_var], loss, updates=updates)
+
+    print("Defining and compiling test functions")
+    test_prediction = lasagne.layers.get_output(output_layer, input_var,deterministic=True)
+    test_loss = crossentropy_metric(test_prediction, target_var, void_labels)
+    test_acc = accuracy_metric(test_prediction, target_var, void_labels)
+    test_jacc = jaccard_metric(test_prediction, target_var, n_classes)
+
+    val_fn = theano.function([input_var, target_var], [test_loss, test_acc, test_jacc])
+
+    #
+    # Train
+    #
+    err_train = []
+    err_valid = []
+    acc_valid = []
+    jacc_valid = []
+    patience = 0
+
+    # Training main loop
+    print("Start training")
+    for epoch in range(num_epochs):
+        # Single epoch training and validation
+        start_time = time.time()
+        cost_train_tot = 0
+        # Train
+        print('Training steps ')
+        for i in range(n_batches_train):
+            print(i)
+            # Get minibatch
+            X_train_batch, L_train_batch = train_iter.next()
+            L_train_batch = np.reshape(L_train_batch, np.prod(L_train_batch.shape))
+
+            # Training step
+            cost_train = train_fn(X_train_batch, L_train_batch)
+            out_str = "cost %f" % (cost_train)
+            cost_train_tot += cost_train
+
+        err_train += [cost_train_tot/n_batches_train]
+
+        # Validation
+        cost_val_tot = 0
+        acc_val_tot = 0
+        jacc_val_tot = np.zeros((2, n_classes))
+
+        print('Validation steps')
+        for i in range(n_batches_val):
+            print(i)
+            # Get minibatch
+            X_val_batch, L_val_batch = val_iter.next()
+            L_val_batch = np.reshape(L_val_batch, np.prod(L_val_batch.shape))
+
+            # Validation step
+            cost_val, acc_val, jacc_val = val_fn(X_val_batch, L_val_batch)
+
+            acc_val_tot += acc_val
+            cost_val_tot += cost_val
+            jacc_val_tot += jacc_val
+
+        err_valid += [cost_val_tot/n_batches_val]
+        acc_valid += [acc_val_tot/n_batches_val]
+        jacc_perclass_valid = jacc_val_tot[0, :] / jacc_val_tot[1, :]
+        if early_stop_class == None:
+            jacc_valid += [np.mean(jacc_perclass_valid)]
+        else:
+            jacc_valid += [jacc_perclass_valid[early_stop_class]]
+
+
+        out_str = "EPOCH %i: Avg epoch training cost train %f, cost val %f" +\
+            ", acc val %f, jacc val class 0 % f, jacc val class 1 %f, jacc val %f took %f s"
+        out_str = out_str % (epoch, err_train[epoch],
+                             err_valid[epoch],
+                             acc_valid[epoch],
+                             jacc_perclass_valid[0],
+                             jacc_perclass_valid[1],
+                             jacc_valid[epoch],
+                             time.time()-start_time)
+        print(out_str)
+
+        with open(os.path.join(savepath, "unet_output.log"), "a") as f:
+            f.write(out_str + "\n")
+
+        # Early stopping and saving stuff
+        if epoch == 0:
+            best_jacc_val = jacc_valid[epoch]
+        elif epoch > 1 and jacc_valid[epoch] > best_jacc_val:
+            best_jacc_val = jacc_valid[epoch]
+            patience = 0
+            np.savez(os.path.join(savepath, 'new_unet_model_best.npz'),  *lasagne.layers.get_all_param_values(output_layer))
+            np.savez(os.path.join(savepath, 'unet_errors_best.npz'), err_valid, err_train, acc_valid, jacc_valid)
+        else:
+            patience += 1
+
+        np.savez(os.path.join(savepath, 'new_unet_model_last.npz'), *lasagne.layers.get_all_param_values(output_layer))
+        np.savez(os.path.join(savepath, 'unet_errors_last.npz'),  err_valid, err_train, acc_valid, jacc_valid)
+        # Finish training if patience has expired or max nber of epochs
+        # reached
+        if patience == max_patience or epoch == num_epochs-1:
+            if test_iter is not None:
+                # Load best model weights
+                with np.load(os.path.join(savepath, 'new_unet_model_best.npz')) as f:
+                    param_values = [f['arr_%d' % i] for i in range(len(f.files))]
+                nlayers = len(lasagne.layers.get_all_params(output_layer))
+                lasagne.layers.set_all_param_values(output_layer, param_values[:nlayers])
+                # Test
+                cost_test_tot = 0
+                acc_test_tot = 0
+                jacc_test_tot = np.zeros((2, n_classes))
+                for i in range(n_batches_test):
+                    # Get minibatch
+                    X_test_batch, L_test_batch = test_iter.next()
+                    L_test_batch = np.reshape(L_test_batch, np.prod(L_test_batch.shape))
+
+                    # Test step
+                    cost_test, acc_test, jacc_test = val_fn(X_test_batch, L_test_batch)
+
+                    acc_test_tot += acc_test
+                    cost_test_tot += cost_test
+                    jacc_test_tot += jacc_test
+
+                err_test = cost_test_tot/n_batches_test
+                acc_test = acc_test_tot/n_batches_test
+                jacc_test_perclass = jacc_test_tot[0, :] / jacc_test_tot[1, :]
+                jacc_test = np.mean(jacc_test_perclass)
+
+                out_str = "FINAL MODEL: err test % f, acc test %f, " +\
+                    "jacc test class 0 %f, jacc test class 1 %f, jacc test %f"
+                out_str = out_str % (err_test, acc_test, jacc_test_perclass[0],
+                                     jacc_test_perclass[1], jacc_test)
+                print(out_str)
+            if savepath != loadpath:
+                print('Copying model and other training files to {}'.format(loadpath))
+                copy_tree(savepath, loadpath)
+
+            # End
+            return
+
+
+def main():
+    parser = argparse.ArgumentParser(description='U-Net model training')
+    parser.add_argument('-dataset',
+                        default='em',
+                        help='Dataset.')
+    parser.add_argument('-learning_rate',
+                        default=0.0001,
+                        help='Learning Rate')
+    parser.add_argument('-penal_cst',
+                        default=0.0,
+                        help='regularization constant')
+    parser.add_argument('--num_epochs',
+                        '-ne',
+                        type=int,
+                        default=750,
+                        help='Optional. Int to indicate the max'
+                        'number of epochs.')
+    parser.add_argument('-max_patience',
+                        type=int,
+                        default=100,
+                        help='Max patience')
+    parser.add_argument('-batch_size',
+                        type=int,
+                        nargs='+',
+                        default=[5, 5, 1],
+                        help='Batch size [train, val, test]. Default: -batch_size 5 5 1')
+    parser.add_argument('-data_augmentation',
+                        type=json.loads,
+                        default={'rotation_range':25,
+                                 'shear_range':0.41,
+                                 'horizontal_flip':True,
+                                 'vertical_flip':True,
+                                 'fill_mode':'reflect',
+                                 'spline_warp':True,
+                                 'warp_sigma':10,
+                                 'warp_grid_size':3,
+                                 'crop_size': (224, 224)},
+                        help='use data augmentation')
+    parser.add_argument('-early_stop_class',
+                        type=int,
+                        default=None,
+                        help='class to early stop on')
+    parser.add_argument('-train_from_0_255',
+                        type=bool,
+                        default=False,
+                        help='Whether to train from images within 0-255 range')
+    args = parser.parse_args()
+
+    train(args.dataset, float(args.learning_rate),
+          float(args.penal_cst), int(args.num_epochs), int(args.max_patience),
+          data_augmentation=args.data_augmentation, batch_size=args.batch_size,
+          early_stop_class=args.early_stop_class, savepath=SAVEPATH,
+          train_from_0_255=args.train_from_0_255, loadpath=LOADPATH)
+
+if __name__ == "__main__":
+    main()
diff --git a/code/utils.py b/code/utils.py
index 3b50019c..ff772ad4 100644
--- a/code/utils.py
+++ b/code/utils.py
@@ -6,7 +6,6 @@
 image from a set of samples or weights.
 """
 
-
 import numpy
 
 
@@ -85,7 +84,7 @@ def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
         else:
             channel_defaults = [0., 0., 0., 1.]
 
-        for i in xrange(4):
+        for i in range(4):
             if X[i] is None:
                 # if channel is None, fill it with zeros of the correct
                 # dtype
@@ -115,8 +114,8 @@ def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
             dt = 'uint8'
         out_array = numpy.zeros(out_shape, dtype=dt)
 
-        for tile_row in xrange(tile_shape[0]):
-            for tile_col in xrange(tile_shape[1]):
+        for tile_row in range(tile_shape[0]):
+            for tile_col in range(tile_shape[1]):
                 if tile_row * tile_shape[1] + tile_col < X.shape[0]:
                     this_x = X[tile_row * tile_shape[1] + tile_col]
                     if scale_rows_to_unit_interval:
diff --git a/data/download.sh b/data/download.sh
index ed273bbb..67c5c057 100755
--- a/data/download.sh
+++ b/data/download.sh
@@ -5,7 +5,7 @@ WGET=$?
 which curl >/dev/null 2>&1
 CURL=$?
 if [ "$WGET" -eq 0 ]; then
-    DL_CMD="wget -c"
+    DL_CMD="wget --no-verbose -c"
 elif [ "$CURL" -eq 0 ]; then
     DL_CMD="curl -C - -O"
 else
@@ -15,8 +15,8 @@ fi
 
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist_py3k.pkl.gz
-$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl.gz && gunzip imdb.pkl.gz
-$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.dict.pkl.gz && gunzip imdb.dict.pkl.gz
+$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl.gz && gunzip -f imdb.pkl.gz
+$DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/imdb.dict.pkl.gz && gunzip -f imdb.dict.pkl.gz
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/data/Nottingham.zip && unzip -u Nottingham.zip
 $DL_CMD http://www.iro.umontreal.ca/~lisa/deep/midi.zip && unzip -u midi.zip -d ../code && echo "extracted Modified Python MIDI package (GPL)"
 $DL_CMD http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold0.pkl.gz
diff --git a/doc/DBN.txt b/doc/DBN.txt
index bb0571eb..be7bfbdc 100644
--- a/doc/DBN.txt
+++ b/doc/DBN.txt
@@ -6,7 +6,7 @@ Deep Belief Networks
 .. note::
   This section assumes the reader has already read through :doc:`logreg`
   and :doc:`mlp` and :doc:`rbm`. Additionally it uses the following Theano
-  functions and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic
+  functions and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic
   ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the
   code on GPU also read `GPU`_.
 
@@ -210,7 +210,7 @@ obtained over these sets.
 Putting it all together
 +++++++++++++++++++++++
 
-The few lines of code below constructs the deep belief network : 
+The few lines of code below constructs the deep belief network: 
 
 .. literalinclude:: ../code/DBN.py
   :start-after: # numpy random generator
diff --git a/doc/SdA.txt b/doc/SdA.txt
index 289a8b0a..6d9ba0da 100644
--- a/doc/SdA.txt
+++ b/doc/SdA.txt
@@ -6,7 +6,7 @@ Stacked Denoising Autoencoders (SdA)
 .. note::
   This section assumes you have already read through :doc:`logreg`
   and :doc:`mlp`. Additionally it uses the following Theano functions
-  and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_.
+  and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_.
 
 .. _T.tanh: http://deeplearning.net/software/theano/tutorial/examples.html?highlight=tanh
 
diff --git a/doc/cnn_1D_segm.txt b/doc/cnn_1D_segm.txt
new file mode 100644
index 00000000..f81ea164
--- /dev/null
+++ b/doc/cnn_1D_segm.txt
@@ -0,0 +1,243 @@
+.. _cnn_1D_segm:
+
+Network for 1D segmentation
+***************************
+
+.. note::
+    This section assumes the reader has already read through :doc:`lenet` for
+    convolutional networks motivation and :doc:`fcn_2D_segm` for segmentation
+    standard network.
+
+
+Summary
++++++++
+
+The fundamental notions behind segmentation have been explained in :doc:`fcn_2D_segm`.
+A particularity here is that some of these notions will be applied to 1D
+segmentation. However, almost every Lasagne layer used for 2D segmentation have
+their respective 1D layer, so the implementation would look alike if the same
+model was used.
+
+
+
+
+Data
+++++
+
+The `BigBrain <https://bigbrain.loris.ca/main.php>`__ dataset is a 3D ultra-high resolution model of the brain reconstructed from 2D sections.
+We are interested in the outer part of the brain, the cortex.
+More precisely, we are interested in segmenting the 6 different layers of the cortex in 3D.
+Creating an expertly labelled training dataset with each 2D section (shown in figure 1) is unfeasible. Instead of giving as input a 2D image of one section of the brain, we give as input 1D vectors with information from across the cortex, extracted from smaller portions of manually labelled cortex
+as shown in Figure 2. The final dataset is not available yet, a preliminary version
+is available `here <https://drive.google.com/file/d/0B3tbeSUS2FsVOVlIamlDdkNBQUE/>`_ .
+
+.. figure:: images/big_brain_section.png
+    :align: center
+    :scale: 100%
+
+    **Figure 1** : Big Brain section
+
+.. figure:: images/ray.png
+    :align: center
+    :scale: 50%
+
+    **Figure 2** : Ray extraction from segmentated cortex
+
+We will call *rays* the vectors of size 200 going from outside the brain and
+through the cortex. As the images were stained for cell bodies, the intensity of each pixel of these rays represents the cell densities
+and sizes contained in the cortical layer to which the pixel belongs. Since the 6 cortical layers
+have different properties (cell density and size), the intensity profile can be used to
+detect boundaries of the cortical layers.
+
+Each ray has 2 input channels, one representing the smoothed intensity and the other,
+the raw version, as shown in Figure 3. The next figure, Figure 4, shows the
+ground truth segmentation map, where each different color represent
+a different label. The purple color indicate that these pixels are
+outside the cortex, while the 6 other colors represent the 6 cortical layers.
+For example, the first layer of the cortex is between pixels ~ 35-55. The cortex
+for this sample starts at pixel ~35 and ends at pixel ~170.
+
+
+.. figure:: images/raw_smooth.png
+    :align: center
+    :scale: 100%
+
+    **Figure 3** : Raw and smooth intensity profiles (input channels)
+
+
+.. figure:: images/labels.png
+    :align: center
+    :scale: 100%
+
+    **Figure 4** : Cortical layers labels for this ray
+
+
+
+Model
++++++
+
+We first started our experiment with more complex models, but we finally found that
+the simpler model present here had enough capacity to learn how and where the layer boundaries are.
+This model (depicted in Figure 5) is composed of 8 identical blocks, followed by a
+last convolution and a softmax non linearity.
+
+Each block is composed of :
+
+* Batch Normalization layer
+* Rectify nonlinearity layer
+* Convolution layer, with kernel size 25, with enough padding such that the convolution does not change the feature resolution, and 64 features maps
+
+The last convolution has kernel size 1 and *number of classes* feature maps.
+The softmax is then
+used to detect which of these classes is more likely for each pixel.
+Note that any input image size could be used here, since the model is built from
+locally connected layers exclusively.
+
+.. figure:: images/cortical_layers_net.png
+    :align: center
+    :scale: 100%
+
+    **Figure 5** : Model
+
+Note that we didn't use any pooling, because it was not needed. However, if
+pooling layers were used, an upsampling path would have been necessary to recover full
+spatial size of the input ray. Also, since each pixel of the output prediction has
+a receptive field that includes all of the input pixel, the network is able to extract
+enough contextual information.
+
+
+
+
+
+
+
+Results
++++++++
+
+The model outputs a vector of the same size as the input (here, 200).
+There are 7 class labels, including the 6 cortical layers and the 'not in the brain yet'
+label. You can see in Figure 6 below the output of the model for some ray. The top
+of the plot represent the ground truth segmentation, while the bottoms represent
+the predicted segmentation. As you can see, there is only a small number of pixels
+not correctly segmented.
+
+.. figure:: images/cortical_ray_result.png
+    :align: center
+    :scale: 100%
+
+    **Figure 6** : Ground truth (top) vs prediction (bottom) for 1 ray
+
+However, since the purpose was to do 3D segmentation by using 1D segmentation
+of the rays, we needed to put back the rays on the brain section. After interpolation
+between those rays and smoothing, we get the results shown in Figure 7. The colored
+lines are from 3D meshes based on the prediction from the model, intersected with a 2D section, and the grayscale stripes correspond to the
+ground truth. As you can see, it achieves really good results on the small manually labelled
+sample, which extend well to previously unsegmented cortex.
+
+
+
+.. figure:: images/cortical_valid1.png
+    :align: center
+    :scale: 40%
+
+    **Figure 7** : Results put on the brain section
+
+
+Code
+++++
+
+.. warning::
+
+    * Current code works with Python 2 only.
+    * If you use Theano with GPU backend (e.g. with Theano flag ``device=cuda``),
+      you will need at least 12GB free in your video RAM.
+
+The FCN implementation can be found in the following file:
+
+* `fcn1D.py <../code/cnn_1D_segm/fcn1D.py>`_ : Main script. Defines the model.
+* `train_fcn1D.py <../code/cnn_1D_segm/train_fcn1D.py>`_ : Training loop
+
+Change the ``dataset_loaders/config.ini`` file and add the right path for the dataset:
+
+.. code-block:: cfg
+
+    [cortical_layers]
+    shared_path = /path/to/DeepLearningTutorials/data/cortical_layers/
+
+Folder indicated at section ``[cortical_layers]`` should contain a sub-folder named ``6layers_segmentation``
+(you can obtain it by just renaming the folder extracted from ``TrainingData190417.tar.gz``) which should
+itself contain files:
+
+* ``training_cls_indices.txt``
+* ``training_cls.txt``
+* ``training_geo.txt``
+* ``training_raw.txt``
+* ``training_regions.txt``
+
+
+First define a *bn+relu+conv* block that returns the name of the last layer of
+the block. Since the implementation uses a dictionary variable *net* that keeps
+the layer's name as key and the actual layer object as variable, the name of the
+last layer is sufficient
+
+.. literalinclude:: ../code/cnn_1D_segm/fcn1D.py
+  :start-after: start-snippet-bn_relu_conv
+  :end-before: end-snippet-bn_relu_conv
+
+The model is composed of 8 of these blocks, as seen below. Note that the
+model implementation is very tweakable, since the depth (number of blocks), the
+type of block, the filter size are the number of filters can all be changed by user.
+However, the hyperparameters used here were:
+
+* filter_size = 25
+* n_filters = 64
+* depth = 8
+* block = bn_relu_conv
+
+.. literalinclude:: ../code/cnn_1D_segm/fcn1D.py
+  :start-after: start-snippet-convolutions
+  :end-before: end-snippet-convolutions
+
+Finally, the last convolution and softmax are achieved by :
+
+.. literalinclude:: ../code/cnn_1D_segm/fcn1D.py
+  :start-after: start-snippet-output
+  :end-before: end-snippet-output
+
+Running ``train_fcn1D.py`` on a Titan X lasted for around 4 hours, ending with the following:
+
+.. code-block:: text
+
+    THEANO_FLAGS=device=cuda0,floatX=float32,dnn.conv.algo_fwd=time_once,dnn.conv.algo_bwd_data=time_once,dnn.conv.algo_bwd_filter=time_once,gpuarray.preallocate=1 python train_fcn1D.py
+    [...]
+    EPOCH 412: Avg cost train 0.065615, acc train 0.993349, cost val 0.041758, acc val 0.984398, jacc val per class ['0: 0.981183', '1: 0.953546', '2: 0.945765', '3: 0.980471', '4: 0.914617', '5: 0.968710', '6: 0.971049'], jacc val 0.959335 took 31.422823 s
+    saving last model
+
+
+References
+++++++++++
+
+If you use this tutorial, please cite the following papers:
+
+* References for BigBrain:
+
+  * `[pdf] <https://bigbrain.loris.ca/papers/HBM2014poster.pdf>`__ Lewis, L.B. et al.: BigBrain: Initial Tissue Classification and Surface Extraction, HBM 2014.
+  * `[website] <https://www.sciencemag.org/content/340/6139/1472.abstract>`__ Amunts, K. et al.: "BigBrain: An Ultrahigh-Resolution 3D Human Brain Model", Science (2013) 340 no. 6139 1472-1475, June 2013.
+  * `[pdf] <https://bigbrain.loris.ca/papers/Poster-A0-OHBM-2012.pdf>`__ Bludau, S. et al.: Two new Cytoarchitectonic Areas of the Human Frontal Pole, OHBM 2012.
+  * `[pdf] <https://bigbrain.loris.ca/papers/HBM2010poster.pdf>`__ Lepage, C. et al.: Automatic Repair of Acquisition Defects in Reconstruction of Histology Sections of a Human Brain, HBM 2010.
+
+* `[GitHub Repo] <https://github.com/fvisin/dataset_loaders>`__ Francesco Visin, Adriana Romero - Dataset loaders: a python library to load and preprocess datasets. 2017
+
+Papers related to Theano/Lasagne:
+
+* `[pdf] <https://arxiv.org/pdf/1605.02688.pdf>`_ Theano Development Team. Theano: A Python framework for fast computation of mathematical expresssions. May 2016.
+* `[website] <https://zenodo.org/record/27878#.WQocDrw18yc>`__ Sander Dieleman, Jan Schluter, Colin Raffel, Eben Olson, Søren Kaae Sønderby, Daniel Nouri, Daniel Maturana, Martin Thoma, Eric Battenberg, Jack Kelly, Jeffrey De Fauw, Michael Heilman, diogo149, Brian McFee, Hendrik Weideman, takacsg84, peterderivaz, Jon, instagibbs, Dr. Kashif Rasul, CongLiu, Britefury, and Jonas Degrave, “Lasagne: First release.” (2015).
+
+
+Acknowledgements
+================
+
+This work was done in collaboration with Konrad Wagstyl, PhD student, University of Cambridge.
+We would like to thank Professor Alan Evans' `[MCIN lab] <https://www.mcin-cnim.ca>`_ and Professor Katrin Amunts' `[INM-1 lab] <https://www.fz-juelich.de/inm/inm-1/EN/Home/home_node.html>`_.
+
+Thank you!
diff --git a/doc/conf.py b/doc/conf.py
index 52631d51..0f35bb34 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -25,11 +25,14 @@
 extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo']
 
 try:
-    from sphinx.ext import pngmath
-    extensions.append('sphinx.ext.pngmath')
+    from sphinx.ext import imgmath
+    extensions.append('sphinx.ext.imgmath')
 except ImportError:
-    print >>sys.stderr, 'Warning: could not import sphinx.ext.pngmath'
-    pass
+    try:
+        from sphinx.ext import pngmath
+        extensions.append('sphinx.ext.pngmath')
+    except ImportError:
+        pass
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['.templates']
@@ -155,12 +158,16 @@
 
 # Options for LaTeX output
 # ------------------------
+latex_elements = {
+    # The paper size ('letter' or 'a4').
+    #latex_paper_size = 'letter',
 
-# The paper size ('letter' or 'a4').
-#latex_paper_size = 'letter'
+    # The font size ('10pt', '11pt' or '12pt').
+    'pointsize': '11pt',
 
-# The font size ('10pt', '11pt' or '12pt').
-latex_font_size = '11pt'
+    # Additional stuff for the LaTeX preamble.
+    #latex_preamble = '',
+}
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, document class [howto/manual]).
diff --git a/doc/contents.txt b/doc/contents.txt
index 3b7a16eb..3246aec1 100644
--- a/doc/contents.txt
+++ b/doc/contents.txt
@@ -24,3 +24,6 @@ Contents
    rnnrbm
    utilities
    references
+   fcn_2D_segm
+   cnn_1D_segm
+   unet
diff --git a/doc/dA.txt b/doc/dA.txt
index 8ff26354..dd05acdf 100644
--- a/doc/dA.txt
+++ b/doc/dA.txt
@@ -6,7 +6,7 @@ Denoising Autoencoders (dA)
 .. note::
   This section assumes the reader has already read through :doc:`logreg`
   and :doc:`mlp`. Additionally it uses the following Theano functions
-  and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_.
+  and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_. If you intend to run the code on GPU also read `GPU`_.
 
 .. _T.tanh: http://deeplearning.net/software/theano/tutorial/examples.html?highlight=tanh
 
@@ -126,7 +126,7 @@ signal:
   :pyobject: dA.get_reconstructed_input
 
 And using these functions we can compute the cost and the updates of
-one stochastic gradient descent step :
+one stochastic gradient descent step:
 
 .. literalinclude:: ../code/dA.py
   :pyobject: dA.get_cost_updates
@@ -209,7 +209,7 @@ need to do is to add a stochastic corruption step operating on the input. The in
 corrupted in many ways, but in this tutorial we will stick to the original
 corruption mechanism of randomly masking entries of the input by making
 them zero. The code below
-does just that :
+does just that:
 
 .. literalinclude:: ../code/dA.py
   :pyobject: dA.get_corrupted_input
@@ -221,7 +221,7 @@ For this reason, the constructor of the ``dA`` also gets Theano variables
 pointing to the shared parameters. If those parameters are left to ``None``,
 new ones will be constructed.
 
-The final denoising autoencoder class becomes :
+The final denoising autoencoder class becomes:
 
 .. literalinclude:: ../code/dA.py
   :pyobject: dA
@@ -254,7 +254,7 @@ constant (weights are converted to values between 0 and 1).
 To plot our filters we will need the help of ``tile_raster_images`` (see
 :ref:`how-to-plot`) so we urge the reader to study it. Also
 using the help of the Python Image Library, the following lines of code will
-save the filters as an image :
+save the filters as an image:
 
 .. literalinclude:: ../code/dA.py
   :start-after: start-snippet-4
@@ -264,20 +264,20 @@ save the filters as an image :
 Running the Code
 ++++++++++++++++
 
-To run the code :
+To run the code:
 
 .. code-block:: bash
 
   python dA.py
 
-The resulted filters when we do not use any noise are :
+The resulted filters when we do not use any noise are:
 
 .. figure:: images/filters_corruption_0.png
     :align: center
 
 
 
-The filters for 30 percent noise :
+The filters for 30 percent noise:
 
 
 .. figure:: images/filters_corruption_30.png
diff --git a/doc/fcn_2D_segm.txt b/doc/fcn_2D_segm.txt
new file mode 100644
index 00000000..379dbe39
--- /dev/null
+++ b/doc/fcn_2D_segm.txt
@@ -0,0 +1,271 @@
+.. _fcn_2D_segm:
+
+Fully Convolutional Networks (FCN) for 2D segmentation
+******************************************************
+
+.. note::
+    This section assumes the reader has already read through :doc:`lenet` for
+    convolutional networks motivation.
+
+Summary
++++++++
+
+Segmentation task is different from classification task because it requires predicting
+a class for each pixel of the input image, instead of only 1 class for the whole input.
+Classification needs to understand *what* is in the input (namely, the context). However,
+in order to predict what is in the input for each pixel, segmentation needs to recover
+not only *what* is in the input, but also *where*.
+
+.. figure:: images/cat_segmentation.png
+    :align: center
+    :scale: 35%
+
+    **Figure 1** : Segmentation network (from FCN paper)
+
+**Fully Convolutional Networks** (FCNs) owe their name to their architecture, which is
+built only from locally connected layers, such as convolution, pooling and upsampling.
+Note that no dense layer is used in this kind of architecture. This reduces the number
+of parameters and computation time. Also, the network can work regardless of the original
+image size, without requiring any fixed number of units at any stage, givent that all
+connections are local. To obtain a segmentation map (output), segmentation
+networks usually have 2 parts :
+
+*  Downsampling path : capture semantic/contextual information
+*  Upsampling path : recover spatial information
+
+The **downsampling path** is used to extract and interpret the context (*what*), while the
+**upsampling path** is used to enable precise localization (*where*). Furthermore, to fully
+recover the fine-grained spatial information lost in the pooling or downsampling layers, we
+often use skip connections.
+
+A skip connection is a connection that bypasses at least one layer. Here, it
+is often used to transfer local information by concatenating or summing feature
+maps from the downsampling path with feature maps from the upsampling path. Merging features
+from various resolution levels helps combining context information with spatial information.
+
+
+Data
+++++
+
+The polyps dataset can be found `here <https://drive.google.com/file/d/0B_60jvsCt1hhZWNfcW4wbHE5N3M/view>`__.
+There is a total of 912 images taken from 36 patients.
+
+* Training set : 20 patients and 547 frames
+* Validation set : 8 patients and 183 frames
+* Test set : 8 patients and 182 frames
+
+Each pixel is labelled between 2 classes : polype or background.
+The size of the images vary. We use data augmentation for training, as specified
+in the default arguments in the code given below. Note that
+the data augmentation is necessary for training with batch size greater than 1
+in order to have same image size with a random cropping. If no random cropping,
+the batch size for the training set must be set to 1, like for validation and test
+sets (where there is no data augmentation).
+
+
+In each of the training, validation and test directory, the input images are in the
+``/images`` directory and the polyps masks (segmentation maps) are in ``/masks2``. The
+segmentation maps in the ``/masks2`` directory indicate the presence or absence
+of polyps for each pixel. The other subdirectories (``/masks3`` and ``/masks4``) are,
+respectively, for a segmentation task with 3 and 4 classes, but will not be
+presented here.
+
+
+Model
++++++
+
+There are variants of the FCN architecture, which mainly differ in the spatial precision of
+their output. For example, the figures below show the FCN-32, FCN-16 and FCN-8 variants. In the
+figures, convolutional layers are represented as vertical lines between pooling layers, which
+explicitely show the relative size of the feature maps.
+
+.. figure:: images/fcn.png
+    :align: center
+    :scale: 50%
+
+    **Figure 2** : FCN architecture (from FCN paper)
+
+**Difference between the 3 FCN variants**
+
+As shown below, these 3 different architectures differ in the stride of the last convolution,
+and the skip connections used to obtain the output segmentation maps. We will use the term
+*downsampling path* to refer to the network up to *conv7* layer and we will use the term
+*upsampling path* to refer to the network composed of all layers after *conv7*. It is worth
+noting that the 3 FCN architectures share the same downsampling path, but differ in their
+respective upsampling paths.
+
+
+1. **FCN-32** : Directly produces the segmentation map from *conv7*, by using a
+transposed convolution layer with stride 32.
+
+2. **FCN-16** : Sums the 2x upsampled prediction from *conv7*
+(using a transposed convolution with stride 2) with *pool4* and then
+produces the segmentation map, by using a transposed convolution layer with stride 16
+on top of that.
+
+3. **FCN-8** : Sums the 2x upsampled *conv7* (with a stride 2 transposed convolution)
+with *pool4*, upsamples them with a stride 2 transposed convolution and sums them
+with *pool3*, and applies a transposed convolution layer with stride 8 on the resulting
+feature maps to obtain the segmentation map.
+
+
+.. figure:: images/fcn_schema.png
+    :align: center
+    :scale: 65%
+
+    **Figure 3** : FCN architecture (from FCN paper)
+
+As explained above, the upsampling paths of the FCN variants are different, since they
+use different skip connection layers and strides for the last convolution, yielding
+different segmentations, as shown in Figure 4. Combining layers that have different
+precision helps retrieving fine-grained spatial information, as well as coarse
+contextual information.
+
+.. figure:: images/fcn32_16_8.png
+    :align: center
+    :scale: 30%
+
+    **Figure 4** : FCN results (from FCN paper)
+
+Note that the FCN-8 architecture was used on the polyps dataset below,
+since it produces more precise segmentation maps.
+
+
+Metrics
+=======
+
+**Per pixel accuracy**
+
+This metric is self explanatory, since it outputs the class prediction accuracy
+per pixel.
+
+.. math::
+   :label: jaccard
+
+    acc(P, GT) = \frac{|\text{pixels correctly predicted}|}{|\text{total nb of pixels}|}
+
+
+**Jaccard (Intersection over Union)**
+
+This evaluation metric is often used for image segmentation, since it is more structured.
+The jaccard is a per class evaluation metric, which computes the number of pixels in
+the intersection between the
+predicted and ground truth segmentation maps for a given class, divided by the
+number of pixels in the union between those two segmentation maps,
+also for that given class.
+
+.. math::
+   :label: jaccard_equation
+
+    jacc(P(class), GT(class)) = \frac{|P(class)\cap GT(class)|}{|P(class)\cup GT(class)|}
+
+where `P` is the predicted segmentation map and `GT` is the ground
+truth segmentation map. `P(class)` is then the binary mask indicating if each
+pixel is predicted as *class* or not. In general, the closer to 1, the better.
+
+.. figure:: images/jaccard.png
+    :align: center
+    :scale: 40%
+
+    **Figure 5** : Jaccard visualisation (from this `website <http://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/>`__)
+
+Code
+++++
+
+.. warning::
+
+    * Current code works with Python 2 only.
+    * If you use Theano with GPU backend (e.g. with Theano flag ``device=cuda``),
+      you will need at least 12GB free in your video RAM.
+
+The FCN-8 implementation can be found in the following files:
+
+* `fcn8.py  <../code/fcn_2D_segm/fcn8.py>`_  : Defines the model.
+* `train_fcn8.py <../code/fcn_2D_segm/train_fcn8.py>`_ : Training loop (main script to use).
+
+
+The user must install `Lasagne <http://lasagne.readthedocs.io/en/latest/user/installation.html>`_ ,
+and clone the GitHub repo `Dataset Loaders <https://github.com/fvisin/dataset_loaders>`_.
+
+.. code-block:: bash
+
+    ## Installation of dataset_loaders.
+
+    # dataset_loaders depends on Python modules matplotlib, numpy, scipy, Pillow, scikit-image, seaborn, and h5py.
+    # They can all be installed via conda.
+    conda install matplotlib numpy Pillow scipy scikit-image seaborn h5py
+
+    git clone https://github.com/fvisin/dataset_loaders.git
+
+    cd dataset_loaders/
+
+    pip install -e .
+
+
+Change the ``dataset_loaders/config.ini`` file and add the right path for the dataset:
+
+.. code-block:: bash
+
+    ## Into `dataset_loaders` git folder.
+
+    # If ``config.ini`` does not yet exit, create it:
+    cd dataset_loaders
+    touch config.ini
+
+    # ``config.ini`` must have at least the section ``[general]`` which indicates a work directory.
+
+.. code-block:: cfg
+
+    [general]
+    datasets_local_path = /the/local/path/where/the/datasets/will/be/copied
+
+    [polyps912]
+    shared_path = /path/to/DeepLearningTutorials/data/polyps_split7/
+
+Folder indicated at section ``[polyps912]`` should be the unzipped dataset archive ``polyps_split7.zip``, with sub-folders:
+
+* ``test``,
+* ``train``
+* ``valid``
+
+We used Lasagne layers, as you can see in the code below.
+
+.. literalinclude:: ../code/fcn_2D_segm/fcn8.py
+  :start-after: start-snippet-1
+  :end-before: end-snippet-1
+
+Running ``train_fcn8.py`` on a Titan X lasted for around 3.5 hours, ending with the following:
+
+.. code-block:: text
+
+    $ THEANO_FLAGS=device=cuda0,floatX=float32,dnn.conv.algo_fwd=time_on_shape_change,dnn.conv.algo_bwd_filter=time_on_shape_change,dnn.conv.algo_bwd_data=time_on_shape_change python train_fcn8.py
+    [...]
+    EPOCH 221: Avg epoch training cost train 0.031036, cost val 0.313757, acc val 0.954686, jacc val class 0 0.952469, jacc val class 1 0.335233, jacc val 0.643851 took 56.401966 s
+    FINAL MODEL: err test  0.473100, acc test 0.924871, jacc test class 0  0.941239, jacc test class 1 0.426777, jacc test 0.684008
+
+There is some variability in the training process. Another run of the same command gave the following after 6.5 hours:
+
+.. code-block:: text
+
+    EPOCH 344: Avg epoch training cost train 0.089571, cost val 0.272069, acc val 0.923673, jacc val class 0 0.926739, jacc val class 1 0.204083, jacc val 0.565411 took 56.540339 s
+    FINAL MODEL: err test  0.541459, acc test 0.846444, jacc test class 0  0.875290, jacc test class 1 0.186454, jacc test 0.530872
+
+
+References
+++++++++++
+
+If you use this tutorial, please cite the following papers.
+
+* `[pdf] <https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf>`__ Long, J., Shelhamer, E., Darrell, T. Fully Convolutional Networks for Semantic Segmentation. 2014.
+* `[pdf] <https://arxiv.org/pdf/1612.00799.pdf>`__ David Vázquez, Jorge Bernal, F. Javier Sánchez, Gloria Fernández-Esparrach, Antonio M. López, Adriana Romero, Michal Drozdzal, Aaron Courville. A Benchmark for Endoluminal Scene Segmentation of Colonoscopy Images. (2016).
+* `[GitHub Repo] <https://github.com/fvisin/dataset_loaders>`__ Francesco Visin, Adriana Romero - Dataset loaders: a python library to load and preprocess datasets. 2017.
+
+Papers related to Theano/Lasagne:
+
+* `[pdf] <https://arxiv.org/pdf/1605.02688.pdf>`__ Theano Development Team. Theano: A Python framework for fast computation of mathematical expresssions. May 2016.
+* `[website] <https://zenodo.org/record/27878#.WQocDrw18yc>`__ Sander Dieleman, Jan Schluter, Colin Raffel, Eben Olson, Søren Kaae Sønderby, Daniel Nouri, Daniel Maturana, Martin Thoma, Eric Battenberg, Jack Kelly, Jeffrey De Fauw, Michael Heilman, diogo149, Brian McFee, Hendrik Weideman, takacsg84, peterderivaz, Jon, instagibbs, Dr. Kashif Rasul, CongLiu, Britefury, and Jonas Degrave, “Lasagne: First release.” (2015).
+
+
+Thank you!
+
+
diff --git a/doc/gettingstarted.txt b/doc/gettingstarted.txt
index e838d706..99c7f054 100644
--- a/doc/gettingstarted.txt
+++ b/doc/gettingstarted.txt
@@ -22,6 +22,11 @@ On each learning algorithm page, you will be able to download the corresponding
 
     git clone https://github.com/lisa-lab/DeepLearningTutorials.git
 
+On Linux or Mac systems, after cloning, all datasets can be downloaded at once with:
+
+    cd DeepLearningTutorials/data
+    ./download.sh
+    
 
 .. _datasets:
 
@@ -85,7 +90,7 @@ MNIST Dataset
  variables and access it based on the minibatch index, given a fixed
  and known batch size. The reason behind shared variables is
  related to using the GPU. There is a large overhead when copying data
- into the GPU memory. If you would copy data on request ( each minibatch
+ into the GPU memory. If you would copy data on request (each minibatch
  individually when needed) as the code will do if you do not use shared
  variables, due to this overhead, the GPU code will not be much faster
  then the CPU code (maybe even slower). If you have your data in
@@ -147,7 +152,7 @@ MNIST Dataset
 
 The data has to be stored as floats on the GPU ( the right
 ``dtype`` for storing on the GPU is given by ``theano.config.floatX``).
-To get around this shortcomming for the labels, we store them as float,
+To get around this shortcoming for the labels, we store them as float,
 and then cast it to int.
 
 .. note::
@@ -286,7 +291,7 @@ In this tutorial, :math:`f` is defined as:
 
     f(x) = {\rm argmax}_k P(Y=k | x, \theta)
 
-In python, using Theano this can be written as :
+In python, using Theano this can be written as:
 
 .. code-block:: python
 
@@ -316,7 +321,7 @@ The likelihood of the correct class is not the same as the
 number of right predictions, but from the point of view of a randomly
 initialized classifier they are pretty similar.
 Remember that likelihood and zero-one loss are different objectives;
-you should see that they are corralated on the validation set but
+you should see that they are correlated on the validation set but
 sometimes one will rise while the other falls, or vice-versa.
 
 Since we usually speak in terms of minimizing a loss function, learning will
@@ -331,7 +336,7 @@ The NLL of our classifier is a differentiable surrogate for the zero-one loss,
 and we use the gradient of this function over our training data as a
 supervised learning signal for deep learning of a classifier.
 
-This can be computed using the following line of code :
+This can be computed using the following line of code:
 
 .. code-block:: python
 
@@ -357,7 +362,7 @@ algorithm in which we repeatedly make small steps downward on an error
 surface defined by a loss function of some parameters.
 For the purpose of ordinary gradient descent we consider that the training
 data is rolled into the loss function. Then the pseudocode of this
-algorithm can be described as :
+algorithm can be described as:
 
 .. code-block:: python
 
@@ -421,11 +426,11 @@ but this choice is almost arbitrary (though harmless).
     because it controls the number of updates done to your parameters. Training the same model
     for 10 epochs using a batch size of 1 yields completely different results compared
     to training for the same 10 epochs but with a batchsize of 20. Keep this in mind when
-    switching between batch sizes and be prepared to tweak all the other parameters acording
+    switching between batch sizes and be prepared to tweak all the other parameters according
     to the batch size used.
 
 All code-blocks above show pseudocode of how the algorithm looks like. Implementing such
-algorithm in Theano can be done as follows :
+algorithm in Theano can be done as follows:
 
 .. code-block:: python
 
@@ -578,7 +583,7 @@ of a strategy based on a geometrically increasing amount of patience.
     while (epoch < n_epochs) and (not done_looping):
         # Report "1" for first epoch, "n_epochs" for last epoch
         epoch = epoch + 1
-        for minibatch_index in xrange(n_train_batches):
+        for minibatch_index in range(n_train_batches):
 
             d_loss_wrt_params = ... # compute gradient
             params -= learning_rate * d_loss_wrt_params # gradient descent
diff --git a/doc/images/big_brain.png b/doc/images/big_brain.png
new file mode 100644
index 00000000..5725346b
Binary files /dev/null and b/doc/images/big_brain.png differ
diff --git a/doc/images/big_brain_section.png b/doc/images/big_brain_section.png
new file mode 100644
index 00000000..16612c0d
Binary files /dev/null and b/doc/images/big_brain_section.png differ
diff --git a/doc/images/cat_segmentation.png b/doc/images/cat_segmentation.png
new file mode 100644
index 00000000..490a2118
Binary files /dev/null and b/doc/images/cat_segmentation.png differ
diff --git a/doc/images/cortical_layers_net.png b/doc/images/cortical_layers_net.png
new file mode 100644
index 00000000..50c7ea20
Binary files /dev/null and b/doc/images/cortical_layers_net.png differ
diff --git a/doc/images/cortical_ray_result.png b/doc/images/cortical_ray_result.png
new file mode 100644
index 00000000..31799798
Binary files /dev/null and b/doc/images/cortical_ray_result.png differ
diff --git a/doc/images/cortical_valid1.png b/doc/images/cortical_valid1.png
new file mode 100644
index 00000000..9f76d7b2
Binary files /dev/null and b/doc/images/cortical_valid1.png differ
diff --git a/doc/images/cortical_valid2.png b/doc/images/cortical_valid2.png
new file mode 100644
index 00000000..1369b757
Binary files /dev/null and b/doc/images/cortical_valid2.png differ
diff --git a/doc/images/cortical_valid3_v1.png b/doc/images/cortical_valid3_v1.png
new file mode 100644
index 00000000..d25a3cd2
Binary files /dev/null and b/doc/images/cortical_valid3_v1.png differ
diff --git a/doc/images/cortical_valid4.png b/doc/images/cortical_valid4.png
new file mode 100644
index 00000000..4276d198
Binary files /dev/null and b/doc/images/cortical_valid4.png differ
diff --git a/doc/images/fcn.png b/doc/images/fcn.png
new file mode 100644
index 00000000..69ec4933
Binary files /dev/null and b/doc/images/fcn.png differ
diff --git a/doc/images/fcn32_16_8.png b/doc/images/fcn32_16_8.png
new file mode 100644
index 00000000..bbc92b32
Binary files /dev/null and b/doc/images/fcn32_16_8.png differ
diff --git a/doc/images/fcn_schema.png b/doc/images/fcn_schema.png
new file mode 100644
index 00000000..fce8add9
Binary files /dev/null and b/doc/images/fcn_schema.png differ
diff --git a/doc/images/jaccard.png b/doc/images/jaccard.png
new file mode 100644
index 00000000..2e7d6847
Binary files /dev/null and b/doc/images/jaccard.png differ
diff --git a/doc/images/labels.png b/doc/images/labels.png
new file mode 100644
index 00000000..35f84e94
Binary files /dev/null and b/doc/images/labels.png differ
diff --git a/doc/images/polyps_results.png b/doc/images/polyps_results.png
new file mode 100644
index 00000000..19c8d3ab
Binary files /dev/null and b/doc/images/polyps_results.png differ
diff --git a/doc/images/raw_smooth.png b/doc/images/raw_smooth.png
new file mode 100644
index 00000000..748d9ae9
Binary files /dev/null and b/doc/images/raw_smooth.png differ
diff --git a/doc/images/ray.png b/doc/images/ray.png
new file mode 100644
index 00000000..c4564676
Binary files /dev/null and b/doc/images/ray.png differ
diff --git a/doc/images/unet.jpg b/doc/images/unet.jpg
new file mode 100644
index 00000000..49cce6ff
Binary files /dev/null and b/doc/images/unet.jpg differ
diff --git a/doc/index.txt b/doc/index.txt
index 7c6605bf..27962583 100644
--- a/doc/index.txt
+++ b/doc/index.txt
@@ -10,7 +10,7 @@ and an `introduction to Deep Learning algorithms <http://www.iro.umontreal.ca/~p
 
 Deep Learning is about learning multiple levels of representation
 and abstraction that help to
-make sense of data such as images, sound, and text. 
+make sense of data such as images, sound, and text.
 For more about deep learning algorithms, see for example:
 
  - The monograph or review paper `Learning Deep Architectures for AI <http://www.iro.umontreal.ca/~lisa/publications2/index.php/publications/show/239>`_ (Foundations & Trends in Machine Learning, 2009).
@@ -25,7 +25,9 @@ training them on a GPU.
 The algorithm tutorials have some prerequisites.  You should know some python,
 and be familiar with numpy. Since this tutorial is about using Theano, you
 should read over the `Theano basic tutorial`_ first.  Once you've done that,
-read through our :ref:`gettingstarted` chapter -- it introduces the notation, and [downloadable] datasets used in the algorithm tutorials, and the way we do optimization by stochastic gradient descent.  
+read through our :ref:`gettingstarted` chapter -- it introduces the notation, and downloadable datasets used in the algorithm tutorials, and the way we do optimization by stochastic gradient descent.
+
+The code is available on the `Deep Learning Tutorial repositories <https://github.com/lisa-lab/DeepLearningTutorials>`_.
 
 The purely supervised learning algorithms are meant to be read in order:
 
@@ -58,8 +60,26 @@ LSTM network for sentiment analysis:
 Energy-based recurrent neural network (RNN-RBM):
   * :ref:`Modeling and generating sequences of polyphonic music <rnnrbm>`
 
+Segmentation for medical imagery (meant to be read in order):
+  * :ref:`Fully Convolutional Networks (FCN) for 2D segmentation <fcn_2D_segm>`
+  * :ref:`U-Net <unet>`
+  * :ref:`1D segmentation <cnn_1D_segm>`
+
+
 .. _Theano: http://deeplearning.net/software/theano
 
 .. _Theano basic tutorial: http://deeplearning.net/software/theano/tutorial
 
 .. _Contractive auto-encoders: https://github.com/lisa-lab/DeepLearningTutorials/blob/master/code/cA.py
+
+
+
+
+Note that the tutorials here are all compatible with Python 2 and 3,
+with the exception of :ref:`rnnrbm` which is only available for Python 2, like
+the tutorials in medical imagery segmentation.
+
+If you work with ``conda``, `these command-line guidelines <../code/guidelines_segm_tutos_with_conda.sh>`__
+may also help you run segmentation tutorials.
+
+
diff --git a/doc/lenet.txt b/doc/lenet.txt
index 117dfdab..84b7c3be 100644
--- a/doc/lenet.txt
+++ b/doc/lenet.txt
@@ -7,7 +7,7 @@ Convolutional Neural Networks (LeNet)
     This section assumes the reader has already read through :doc:`logreg` and
     :doc:`mlp`. Additionally, it uses the following new Theano functions and concepts:
     `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_,
-    `floatX`_, `downsample`_ , `conv2d`_, `dimshuffle`_. If you intend to run the
+    `floatX`_, `pool`_ , `conv2d`_, `dimshuffle`_. If you intend to run the
     code on GPU also read `GPU`_.
 
     To run this example on a GPU, you need a good GPU. It needs
@@ -35,7 +35,7 @@ Convolutional Neural Networks (LeNet)
 
 .. _GPU: http://deeplearning.net/software/theano/tutorial/using_gpu.html
 
-.. _downsample: http://deeplearning.net/software/theano/library/tensor/signal/downsample.html
+.. _pool: http://deeplearning.net/software/theano/library/tensor/signal/pool.html
 
 .. _conv2d: http://deeplearning.net/software/theano/library/tensor/signal/conv.html#module-conv
 
@@ -196,7 +196,7 @@ one of Figure 1. The input consists of 3 features maps (an RGB color image) of s
 
         import theano
         from theano import tensor as T
-        from theano.tensor.nnet import conv
+        from theano.tensor.nnet import conv2d
 
         import numpy
 
@@ -226,7 +226,7 @@ one of Figure 1. The input consists of 3 features maps (an RGB color image) of s
                     dtype=input.dtype), name ='b')
 
         # build symbolic expression that computes the convolution of input with filters in w
-        conv_out = conv.conv2d(input, W)
+        conv_out = conv2d(input, W)
 
         # build symbolic expression to add bias and apply activation function, i.e. produce neural net layer output
         # A few words on ``dimshuffle`` :
@@ -320,7 +320,7 @@ Max-pooling is useful in vision for two reasons:
      "smart" way of reducing the dimensionality of intermediate representations.
 
 Max-pooling is done in Theano by way of
-``theano.tensor.signal.downsample.max_pool_2d``. This function takes as input
+``theano.tensor.signal.pool.pool_2d``. This function takes as input
 an N dimensional tensor (where N >= 2) and a downscaling factor and performs
 max-pooling over the 2 trailing dimensions of the tensor.
 
@@ -328,11 +328,11 @@ An example is worth a thousand words:
 
 .. code-block:: python
 
-    from theano.tensor.signal import downsample
+    from theano.tensor.signal import pool
 
     input = T.dtensor4('input')
     maxpool_shape = (2, 2)
-    pool_out = downsample.max_pool_2d(input, maxpool_shape, ignore_border=True)
+    pool_out = pool.pool_2d(input, maxpool_shape, ignore_border=True)
     f = theano.function([input],pool_out)
 
     invals = numpy.random.RandomState(1).rand(3, 2, 5, 5)
@@ -340,7 +340,7 @@ An example is worth a thousand words:
     print 'invals[0, 0, :, :] =\n', invals[0, 0, :, :]
     print 'output[0, 0, :, :] =\n', f(invals)[0, 0, :, :]
 
-    pool_out = downsample.max_pool_2d(input, maxpool_shape, ignore_border=False)
+    pool_out = pool.pool_2d(input, maxpool_shape, ignore_border=False)
     f = theano.function([input],pool_out)
     print 'With ignore_border set to False:'
     print 'invals[1, 0, :, :] =\n ', invals[1, 0, :, :]
@@ -404,7 +404,7 @@ to be compatible with our previous MLP implementation.
     Note that the term "convolution" could corresponds to different mathematical operations:
 
     1. `theano.tensor.nnet.conv2d
-       <http://deeplearning.net/software/theano/library/tensor/nnet/conv.html#theano.tensor.nnet.conv.conv2d>`_,
+       <http://deeplearning.net/software/theano/library/tensor/nnet/conv.html#theano.tensor.nnet.conv2d>`_,
        which is the most common one in almost all of the recent published
        convolutional models.
        In this operation, each output feature map is connected to each
@@ -543,7 +543,7 @@ the task.
 
 Filter Shape
 ************
-Common filter shapes found in the litterature vary greatly, usually based on
+Common filter shapes found in the literature vary greatly, usually based on
 the dataset. Best results on MNIST-sized images (28x28) are usually in the 5x5
 range on the first layer, while natural image datasets (often with hundreds of pixels in each
 dimension) tend to use larger first-layer filters of shape 12x12 or 15x15.
diff --git a/doc/logreg.txt b/doc/logreg.txt
index c2979e63..b582acd4 100644
--- a/doc/logreg.txt
+++ b/doc/logreg.txt
@@ -246,7 +246,7 @@ within the DeepLearningTutorials folder:
 
     python code/logistic_sgd.py
 
-The output one should expect is of the form :
+The output one should expect is of the form:
 
 .. code-block:: bash
 
diff --git a/doc/lstm.txt b/doc/lstm.txt
index 828fd694..aec230ab 100644
--- a/doc/lstm.txt
+++ b/doc/lstm.txt
@@ -75,10 +75,10 @@ previous state, as needed.
 .. figure:: images/lstm_memorycell.png
     :align: center
 
-    **Figure 1** : Illustration of an LSTM memory cell.
+    **Figure 1**: Illustration of an LSTM memory cell.
 
 The equations below describe how a layer of memory cells is updated at every
-timestep :math:`t`. In these equations :
+timestep :math:`t`. In these equations:
 
 *       :math:`x_t` is the input to the memory cell layer at time :math:`t`
 *       :math:`W_i`, :math:`W_f`, :math:`W_c`, :math:`W_o`, :math:`U_i`,
@@ -89,7 +89,7 @@ timestep :math:`t`. In these equations :
 
 First, we compute the values for :math:`i_t`, the input gate, and
 :math:`\widetilde{C_t}` the candidate value for the states of the memory
-cells at time :math:`t` :
+cells at time :math:`t`:
 
 .. math::
     :label: 1
@@ -102,7 +102,7 @@ cells at time :math:`t` :
     \widetilde{C_t} = tanh(W_c x_t + U_c h_{t-1} + b_c)
 
 Second, we compute the value for :math:`f_t`, the activation of the memory
-cells' forget gates at time :math:`t` :
+cells' forget gates at time :math:`t`:
 
 .. math::
     :label: 3
@@ -111,7 +111,7 @@ cells' forget gates at time :math:`t` :
 
 Given the value of the input gate activation :math:`i_t`, the forget gate
 activation :math:`f_t` and the candidate state value :math:`\widetilde{C_t}`,
-we can compute :math:`C_t` the memory cells' new state at time :math:`t` :
+we can compute :math:`C_t` the memory cells' new state at time :math:`t`:
 
 .. math::
     :label: 4
@@ -119,7 +119,7 @@ we can compute :math:`C_t` the memory cells' new state at time :math:`t` :
     C_t = i_t * \widetilde{C_t} + f_t * C_{t-1}
 
 With the new state of the memory cells, we can compute the value of their
-output gates and, subsequently, their outputs :
+output gates and, subsequently, their outputs:
 
 .. math::
     :label: 5
@@ -139,7 +139,7 @@ In this variant, the activation of a cell’s output gate does not depend on the
 memory cell’s state :math:`C_t`. This allows us to perform part of the
 computation more efficiently (see the implementation note, below, for
 details). This means that, in the variant we have implemented, there is no
-matrix :math:`V_o` and equation :eq:`5` is replaced by equation :eq:`5-alt` :
+matrix :math:`V_o` and equation :eq:`5` is replaced by equation :eq:`5-alt`:
 
 .. math::
     :label: 5-alt
@@ -170,11 +170,11 @@ concatenating the four matrices :math:`W_*` into a single weight matrix
 :math:`W` and performing the same concatenation on the weight matrices
 :math:`U_*` to produce the matrix :math:`U` and the bias vectors :math:`b_*`
 to produce the vector :math:`b`. Then, the pre-nonlinearity activations can
-be computed with :
+be computed with:
 
 .. math::
 
-    z = \sigma(W x_t + U h_{t-1} + b)
+    z = W x_t + U h_{t-1} + b
 
 The result is then sliced to obtain the pre-nonlinearity activations for
 :math:`i`, :math:`f`, :math:`\widetilde{C_t}`, and :math:`o` and the
@@ -187,11 +187,11 @@ Code - Citations - Contact
 Code
 ====
 
-The LSTM implementation can be found in the two following files :
+The LSTM implementation can be found in the two following files:
 
-* `lstm.py <http://deeplearning.net/tutorial/code/lstm.py>`_ : Main script. Defines and train the model.
+* `lstm.py <http://deeplearning.net/tutorial/code/lstm.py>`_: Main script. Defines and train the model.
 
-* `imdb.py <http://deeplearning.net/tutorial/code/imdb.py>`_ : Secondary script. Handles the loading and preprocessing of the IMDB dataset.
+* `imdb.py <http://deeplearning.net/tutorial/code/imdb.py>`_: Secondary script. Handles the loading and preprocessing of the IMDB dataset.
 
 After downloading both scripts and putting both in the same folder, the user
 can run the code by calling:
@@ -202,7 +202,7 @@ can run the code by calling:
 
 The script will automatically download the data and decompress it.
 
-**Note** : The provided code supports the Stochastic Gradient Descent (SGD),
+**Note**: The provided code supports the Stochastic Gradient Descent (SGD),
 AdaDelta and RMSProp optimization methods. You are advised to use AdaDelta or
 RMSProp because SGD appears to performs poorly on this task with this
 particular model.
diff --git a/doc/mlp.txt b/doc/mlp.txt
index 2a74aaad..9e59ffbf 100644
--- a/doc/mlp.txt
+++ b/doc/mlp.txt
@@ -90,8 +90,8 @@ The set of parameters to learn is the set :math:`\theta =
 \{W^{(2)},b^{(2)},W^{(1)},b^{(1)}\}`.  Obtaining the gradients
 :math:`\partial{\ell}/\partial{\theta}` can be achieved through the
 **backpropagation algorithm** (a special case of the chain-rule of derivation).
-Thankfully, since Theano performs automatic differentation, we will not need to
-cover this in the tutorial !
+Thankfully, since Theano performs automatic differentiation, we will not need to
+cover this in the tutorial!
 
 
 Going from logistic regression to MLP
@@ -178,13 +178,13 @@ The code below shows how this can be done, in a way which is analogous to our pr
 
 .. literalinclude:: ../code/mlp.py
 
-The user can then run the code by calling :
+The user can then run the code by calling:
 
 .. code-block:: bash
 
     python code/mlp.py
 
-The output one should expect is of the form :
+The output one should expect is of the form:
 
 .. code-block:: bash
 
diff --git a/doc/rbm.txt b/doc/rbm.txt
index a8079012..7a052cc6 100644
--- a/doc/rbm.txt
+++ b/doc/rbm.txt
@@ -7,7 +7,7 @@ Restricted Boltzmann Machines (RBM)
 .. note::
   This section assumes the reader has already read through :doc:`logreg`
   and :doc:`mlp`. Additionally it uses the following Theano functions
-  and concepts : `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_ and `scan`_. If you intend to run the code on GPU also read `GPU`_.
+  and concepts: `T.tanh`_, `shared variables`_, `basic arithmetic ops`_, `T.grad`_, `Random numbers`_, `floatX`_ and `scan`_. If you intend to run the code on GPU also read `GPU`_.
 
 .. _T.tanh: http://deeplearning.net/software/theano/tutorial/examples.html?highlight=tanh
 
@@ -573,7 +573,7 @@ The output was the following:
      ... plotting sample  8
      ... plotting sample  9
 
-The pictures below show the filters after 15 epochs :
+The pictures below show the filters after 15 epochs:
 
 .. figure:: images/filters_at_epoch_14.png
     :align: center
diff --git a/doc/rnnrbm.txt b/doc/rnnrbm.txt
index d64a0c4a..75e681f8 100644
--- a/doc/rnnrbm.txt
+++ b/doc/rnnrbm.txt
@@ -17,7 +17,7 @@ Modeling and generating sequences of polyphonic music with the RNN-RBM
   The script also assumes that the content of the `Nottingham Database of folk tunes <http://www.iro.umontreal.ca/~lisa/deep/data/Nottingham.zip>`_ has been extracted in the ``../data`` directory.
   Alternative MIDI datasets are available `here <http://www-etud.iro.umontreal.ca/~boulanni/icml2012>`_.
   
-  Note that both dependencies above can be setup automatically by running the ``download.sh`` script in the ``../data`` directory.
+  Note that both dependencies above can be setup automatically by running the `download.sh <https://github.com/lisa-lab/DeepLearningTutorials/blob/master/data/download.sh>`_ script in the ``../data`` directory of the `Deep Learning Tutorials repository <https://github.com/lisa-lab/DeepLearningTutorials>`_.
 
 .. caution::
   Need Theano 0.6 or more recent.
diff --git a/doc/unet.txt b/doc/unet.txt
new file mode 100644
index 00000000..7f0446b6
--- /dev/null
+++ b/doc/unet.txt
@@ -0,0 +1,194 @@
+.. _unet:
+
+U-Net
+**********************************************
+
+.. note::
+    This section assumes the reader has already read through :doc:`lenet` for
+    convolutional networks motivation and :doc:`fcn_2D_segm` for segmentation
+    network.
+
+Summary
++++++++
+
+This tutorial provides a brief explanation of the U-Net architecture as well as a way to implement
+it using Theano and Lasagne. U-Net is a Fully Convolutional Network (FCN) that does image segmentation.
+Its goal is then to predict each pixel's class. See :doc:`fcn_2D_segm` for differences between
+network architecture for classification and segmentation tasks.
+
+Data
+++++
+
+The data is from ISBI challenge and can be found `here <http://brainiac2.mit.edu/isbi_challenge/home>`_.
+We use data augmentation for training, as specified
+in the defaults arguments in the code given below.
+
+Model
++++++
+
+The U-Net architecture is built upon the Fully Convolutional Network and modified
+in a way that it yields better segmentation in medical imaging.
+Compared to FCN-8, the two main differences are (1) U-net is symmetric and (2) the skip
+connections between the downsampling path and the upsampling path apply a concatenation
+operator instead of a sum. These skip connections intend to provide local information
+to the global information while upsampling.
+Because of its symmetry, the network has a large number of feature maps in the upsampling
+path, which allows to transfer information. By comparison, the basic FCN architecture only had
+*number of classes* feature maps in its upsampling path.
+
+The U-Net owes its name to its symmetric shape, which is different from other FCN variants.
+
+U-Net architecture is separated in 3 parts:
+
+- 1 : The contracting/downsampling path
+- 2 : Bottleneck
+- 3 : The expanding/upsampling path
+
+.. figure:: images/unet.jpg
+    :align: center
+    :scale: 60%
+
+    **Figure 1** : Illustration of U-Net architecture (from U-Net paper)
+
+
+Contracting/downsampling path
+=============================
+
+The contracting path is composed of 4 blocks. Each block is composed of
+
+* 3x3 Convolution Layer + activation function (with batch normalization)
+* 3x3 Convolution Layer + activation function (with batch normalization)
+* 2x2 Max Pooling
+
+Note that the number of feature maps doubles at each pooling, starting with
+64 feature maps for the first block, 128 for the second, and so on.
+The purpose of this contracting path is to capture the context of the input image
+in order to be able to do segmentation. This coarse contextual information will
+then be transfered to the upsampling path by means of skip connections.
+
+
+Bottleneck
+==========
+
+This part of the network is between the contracting and expanding paths.
+The bottleneck is built from simply 2 convolutional layers (with batch
+normalization), with dropout.
+
+
+Expanding/upsampling path
+=========================
+
+The expanding path is also composed of 4 blocks. Each of these blocks is composed of
+
+* Deconvolution layer with stride 2
+* Concatenation with the corresponding cropped feature map from the contracting path
+* 3x3 Convolution layer + activation function (with batch normalization)
+* 3x3 Convolution layer + activation function (with batch normalization)
+
+
+The purpose of this expanding path is to enable precise localization combined
+with contextual information from the contracting path.
+
+Advantages
+==========
+
+* The U-Net combines the location information from the downsampling path with the contextual information in the upsampling path to finally obtain a general information combining localisation and context, which is necessary to predict a good segmentation map.
+
+* No dense layer, so images of different sizes can be used as input (since the only parameters to learn on convolution layers are the kernel, and the size of the kernel is independent from input image' size).
+
+* The use of massive data augmentation is important in domains like biomedical segmentation, since the number of annotated samples is usually limited.
+
+
+Code
+++++
+
+.. warning::
+
+    * Current code works with Python 2 only.
+    * If you use Theano with GPU backend (e.g. with Theano flag ``device=cuda``),
+      you will need at least 12GB free in your video RAM.
+
+The U-Net implementation can be found in the following GitHub repo:
+
+* `Unet_lasagne_recipes.py <../code/unet/Unet_lasagne_recipes.py>`_, from original main script
+  `Unet.py <https://github.com/Lasagne/Recipes/blob/master/modelzoo/Unet.py>`_. Defines the model.
+
+* `train_unet.py <../code/unet/train_unet.py>`_ : Training loop (main script to use).
+
+
+The user must install `Lasagne <http://lasagne.readthedocs.io/en/latest/user/installation.html>`_ ,
+`SimpleITK <http://www.simpleitk.org/SimpleITK/resources/software.html>`_ and
+clone the GitHub repo `Dataset Loaders <https://github.com/fvisin/dataset_loaders>`_.
+
+Change the ``dataset_loaders/config.ini`` file to set the right path for the dataset:
+
+.. code-block:: cfg
+
+    [isbi_em_stacks]
+    shared_path = /path/to/DeepLearningTutorials/data/isbi_challenge_em_stacks/
+
+Folder indicated at section ``[isbi_em_stacks]`` should contain files:
+
+* ``test-volume.tif``
+* ``train-labels.tif``
+* ``train-volume.tif``
+
+The user can now build a U-Net with a specified number of input channels and number of classes.
+First include the Lasagne layers needed to define the U-Net architecture :
+
+.. literalinclude:: ../code/unet/Unet_lasagne_recipes.py
+  :start-after: start-snippet-1
+  :end-before: end-snippet-1
+
+The *net* variable will be an ordered dictionary containing layers names as keys and layers instances as value.
+This is needed to be able to concatenate the feature maps from the contracting to expanding path.
+
+
+First the contracting path :
+
+.. literalinclude:: ../code/unet/Unet_lasagne_recipes.py
+  :start-after: start-snippet-downsampling
+  :end-before: end-snippet-downsampling
+
+And then the bottleneck :
+
+.. literalinclude:: ../code/unet/Unet_lasagne_recipes.py
+  :start-after: start-snippet-bottleneck
+  :end-before: end-snippet-bottleneck
+
+Followed by the expanding path :
+
+.. literalinclude:: ../code/unet/Unet_lasagne_recipes.py
+  :start-after: start-snippet-upsampling
+  :end-before: end-snippet-upsampling
+
+And finally the output path (to obtain *number of classes* feature maps):
+
+.. literalinclude:: ../code/unet/Unet_lasagne_recipes.py
+  :start-after: start-snippet-output
+  :end-before: end-snippet-output
+
+Running ``train_unet.py`` on a Titan X lasted for around 60 minutes, ending with the following:
+
+.. code-block:: text
+
+    $ THEANO_FLAGS=device=cuda0,floatX=float32,dnn.conv.algo_fwd=time_once,dnn.conv.algo_bwd_data=time_once,dnn.conv.algo_bwd_filter=time_once,gpuarray.preallocate=1 python train_unet.py
+    [...]
+    EPOCH 364: Avg epoch training cost train 0.160667, cost val 0.265909, acc val 0.888796, jacc val class 0  0.636058, jacc val class 1 0.861970, jacc val 0.749014 took 4.379772 s
+
+
+References
+++++++++++
+
+If you use this tutorial, please cite the following papers.
+
+* `[pdf] <https://arxiv.org/pdf/1505.04597.pdf>`__ Olaf Ronneberger, Philipp Fischer, Thomas Brox. U_Net: Convolutional Networks for Biomedical Image Segmentation. May 2015.
+* `[GitHub Repo] <https://github.com/fvisin/dataset_loaders>`__ Francesco Visin, Adriana Romero - Dataset loaders: a python library to load and preprocess datasets. 2017.
+
+Papers related to Theano/Lasagne:
+
+* `[pdf] <https://arxiv.org/pdf/1605.02688.pdf>`__ Theano Development Team. Theano: A Python framework for fast computation of mathematical expresssions. May 2016.
+* `[website] <https://zenodo.org/record/27878#.WQocDrw18yc>`__ Sander Dieleman, Jan Schluter, Colin Raffel, Eben Olson, Søren Kaae Sønderby, Daniel Nouri, Daniel Maturana, Martin Thoma, Eric Battenberg, Jack Kelly, Jeffrey De Fauw, Michael Heilman, diogo149, Brian McFee, Hendrik Weideman, takacsg84, peterderivaz, Jon, instagibbs, Dr. Kashif Rasul, CongLiu, Britefury, and Jonas Degrave, “Lasagne: First release.” (2015).
+
+
+Thank you!
diff --git a/doc/utilities.txt b/doc/utilities.txt
index 0367127c..eb982ec2 100644
--- a/doc/utilities.txt
+++ b/doc/utilities.txt
@@ -112,7 +112,7 @@ Tiling minibatches together is done for us by the
         else:
             channel_defaults = [0., 0., 0., 1.]
 
-        for i in xrange(4):
+        for i in range(4):
             if X[i] is None:
                 # if channel is None, fill it with zeros of the correct
                 # dtype
@@ -134,8 +134,8 @@ Tiling minibatches together is done for us by the
         out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype)
 
 
-        for tile_row in xrange(tile_shape[0]):
-            for tile_col in xrange(tile_shape[1]):
+        for tile_row in range(tile_shape[0]):
+            for tile_col in range(tile_shape[1]):
                 if tile_row * tile_shape[1] + tile_col < X.shape[0]:
                     if scale_rows_to_unit_interval:
                         # if we should scale values to be between 0 and 1
diff --git a/misc/do_nightly_build b/misc/do_nightly_build
index bd703f04..ef2b8319 100755
--- a/misc/do_nightly_build
+++ b/misc/do_nightly_build
@@ -1,9 +1,18 @@
 #!/bin/bash
-#we set the compiledir to the /Tmp dir to make the test faster by bypassing the nfs network.
+
+# If not jenkins, set workspace to local Tmp
+if [ -v $WORKSPACE ]; then
+   if [ -v $TMPDIR ]; then
+      TMPDIR=/tmp
+   fi
+   WORKSPACE=$TMPDIR
+fi
+
 date
-ROOT_CWD=/Tmp/nightly_build
-COMPILEDIR=/Tmp/lisa_theano_compile_dir_deeplearning
+ROOT_CWD=$WORKSPACE/nightly_build
+COMPILEDIR=$WORKSPACE/compile/lisa_theano_compile_dir_deeplearning
 NOSETESTS=${ROOT_CWD}/Theano/bin/theano-nose
+XUNIT="--with-xunit --xunit-file="
 
 FLAGS=warn.ignore_bug_before=0.5,compiledir=${COMPILEDIR}
 export PYTHONPATH=${ROOT_CWD}/Theano:${ROOT_CWD}/Pylearn:$PYTHONPATH
@@ -19,14 +28,17 @@ echo "git version:" `git rev-parse HEAD`
 #echo "executing nosetests with mode=FAST_COMPILE"
 #THEANO_FLAGS=${FLAGS},mode=FAST_COMPILE ${NOSETESTS}
 echo "executing nosetests speed with mode=FAST_RUN"
-THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} test.py:speed
+FILE=${ROOT_CWD}/dlt_tests.xml
+THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} ${XUNIT}${FILE} test.py:speed
 #echo "executing nosetests speed with mode=FAST_RUN and OMP_NUM_THREADS=2"
 #OMP_NUM_THREADS=2 THEANO_FLAGS=${FLAGS},mode=FAST_RUN ${NOSETESTS} test.py:speed
 echo "executing nosetests with mode=FAST_RUN,floatX=float32"
-THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS}
+FILE=${ROOT_CWD}/dlt_float32_tests.xml
+THEANO_FLAGS=${FLAGS},mode=FAST_RUN,floatX=float32 ${NOSETESTS} ${XUNIT}${FILE}
 
 #we change the seed and record it everyday to test different combination. We record it to be able to reproduce bug caused by different seed. We don't want multiple test in DEBUG_MODE each day as this take too long.
 #seed=$RANDOM
 #echo "executing nosetests with mode=DEBUG_MODE with seed of the day $seed"
-#THEANO_DEBUGMODE_CHECK_STRIDES=0 THEANO_DEBUGMODE_PATIENCE=3 THEANO_COMPILEDIR=/Tmp/lisa_theano_compile_dir_deeplearning THEANO_UNITTEST_SEED=$seed THEANO_DEFAULT_MODE=DEBUG_MODE ${NOSETESTS}
+#FILE=${ROOT_CWD}/'dlt_debug_tests.xml'
+#THEANO_DEBUGMODE_CHECK_STRIDES=0 THEANO_DEBUGMODE_PATIENCE=3 THEANO_COMPILEDIR=$WORKSPACE/lisa_theano_compile_dir_deeplearning THEANO_UNITTEST_SEED=$seed THEANO_DEFAULT_MODE=DEBUG_MODE ${NOSETESTS} ${XUNIT}${FILE}