
### Class which implement an RTRBM.
### NB: rnn_trbm inherits from base_mat
### It includes functions to calculate the gradient and sample from the model.

import rbm
import gnumpy        as gpu
import numpy         as np
import data.words    as words
from   mats.base_mat import base_mat, up, down
from   pylab         import newaxis, Rsigmoid


class rnn_trbm(base_mat):
    
    def __init__(self, VH, HH, CD_n, vis_gauss = False):
        self.vis_gauss = vis_gauss
        v, h = VH.v, VH.h
        assert(HH.h == HH.v == h)
        self.v, self.h = v, h

        #---------------------------------## GPU
        self.w = [VH, HH, gpu.zeros(h)]   ## initial biases are zero (no context available)
        
        self.CD_n = CD_n

    def soft_copy(self):
        VH, HH, b_init = self
        A = rnn_trbm(VH, HH, self.CD_n, self.vis_gauss)
        A[2] = self[2]
        return A

    def grad(self, V, lamb = 1):
        bs, T, v = V.shape  ## V is a T (sequence length) x v (visible units) x bs (batch size) matrix (the sequences mini-batches)
        assert(v == self.v)
        h = self.h
        VH, HH, b_init = self
        G = 0 * self
        d_VH, d_HH, d_b_init = G

        #---------------------------------## GPU
        H = gpu.zeros((bs, T, h))         ## activation matriCES for hidden units (H[t] -> activations at time t)
        B = gpu.zeros((bs, T, h))         ## bias matriCES for hidden units (B[t] -> biases at time t)

        ### run the network over the whole sequence (computes all of r_t)
        ### first element of the sequence:

        ### b_init should always be zero during training for the first element??!
        #H[:, 0, :] = (VH * V[:, 0, :] + b_init).logistic()
        H[:, 0, :] = (VH * V[:, 0, :]).logistic()
        ### rest of the sequence:
        #grad_amplifier = gpu.ones(T)
        for t in range(1, T):
            ### first, update biases for hidden units
            B[:, t, :] = HH * H[:, t - 1, :]
            ### then calculate activations
            H[:, t, :] = (VH * V[:, t, :] + B[:, t, :]).logistic()
            #grad_amplifier[t] *= (4**t)

        ### compute (12) to backpropagate the error through time
        _r_ = gpu.zeros((bs, h))     ## derivatives
        dB  = gpu.zeros((bs, T, h))
        dBL = gpu.zeros((bs, T, h))
        F_t = gpu.zeros((bs, h))
        loss = 0
        VH_t = 1 * VH   ## VH_t is the set of weights at time t (same as normal vis-hid, except that it has dynamic biases

        #print 'new\n'
        for t in reversed(range(T)):

            _r_ = H[:, t, :] * (1 - H[:, t, :])  ## derivative of the logistic function

##            if (update == True) and ((t == T-1) or (t == 0)):
##                deriv = gpu.mean(_r_)
##                if deriv < 0.08:
##                    print 'Deriv close to 0! Lambda not updated.'
##                else:
##                    lamb = np.floor(1 / (2*deriv))
##                    print 'New lambda: ', lamb

            dB[:, t, :] = _r_ * F_t# * lamb      #* grad_amplifier[T - 1 - t]
            #print F_t[0, :].shape, gpu.mean(F_t[0, :])
            #print dB[0, t, :].shape, gpu.mean(dB[0, t, :]), '\n'
            
            VH_t[2] = VH[2] + B[:, t, :]
            #if self.CD_n > 0:
                ### dVH_t: contribution of the gradient from each timestep
            dVH_t, separate, dict_loss = rbm.rbm_grad_cd   (VH_t, V[:, t, :], self.CD_n, self.vis_gauss)
            #else:
            #    dVH_t, dict_loss = rbm.rbm_grad_exact(VH_t, V[:, t, :], self.vis_gauss)
            loss += dict_loss['loss']  ## sum up loss over each element of the sequence
            dBL[:, t, :] = separate
            d_VH += dVH_t ## dVH_t[2] == sum(separate, 0)
            
            HH.direction = up
            F_t = HH.T() * (dB[:, t, :] + dBL[:, t, :])
            HH.direction = None

        ### compute the gradients for the parameters using (13) and (14)
        HH.direction = up
        VH.direction = up
        for t in range(1, T):
            d_HH += HH.outp_up(H[:, t-1, :], dB[:, t, :] + dBL[:, t, :]) ## dot product
            d_VH += VH.outp_up(V[:, t, :], dB[:, t, :])
            #HH_inc = HH.outp_up(H[:, t-1, :], dB[:, t, :] + dBL[:, t, :])
            #print HH_inc.w[0][0]
            #print gpu.mean(HH_inc.w[0][0])
            #print'\n\n'
        d_VH += VH.outp_up(V[:, 0, :], dB[:, 0, :])

        ### need averaging at the end
        d_b_init   += gpu.mean(dB[:, 0, :], 0)
        d_VH[0][0] /= bs
        d_VH[1]    /= bs
        d_VH[2]    /= bs
        d_HH[0][0] /= bs
        d_HH[1]    /= bs
        d_HH[2]    /= bs
        
        #print '\n\nMemory in use (rnn_trbm.grad()):  ', gpu.memory_in_use(in_megabytes = True) , '\n\n'
        #---------------------------------#
        
        HH.direction = None
        VH.direction = None
        ## loss = cumulate average discrepancy between real vectors and generated vectors, for
        ## ALL elements and ALL sequences in the minibatch
        return G, dict(loss = loss / float(bs)), lamb

    def show_W(self):
        return self[0].show_W()

    def get_samples(self, T, g, num_samples, g0 = None):
        if g0 == None:
            g0 = g
        v, h = self.v, self.h
        VH, HH, b_init = self
        V = gpu.zeros((num_samples, T, v))
        H = gpu.zeros((num_samples, T, h))
        B = gpu.zeros((num_samples, T, h))
        VH_t = 1 * VH
        VH_t[2] = VH[2] + b_init
        ## both V and H_t_stoch are binary valued:
        V[:, 0, :], H_t_stoch, H_real = rbm.sample(VH_t, g0, num_samples, self.vis_gauss)
        H[:, 0, :] = (VH_t * V[:, 0, :]).logistic()
        ## now V becames real valued:
        V[:, 0, :] = (VH_t.T() * H_t_stoch).logistic()
        for t in range(1, T):
            B[:, t, :] = HH * H[:, t - 1, :]
            VH_t[2] = VH[2] + B[:, t, :]
            V[:, t, :], H_t_stoch, H_real = rbm.sample(VH_t, g, num_samples, self.vis_gauss)
            H[:, t, :] = (VH_t * V[:, t, :]).logistic()
            V[:, t, :] = (VH_t.T() * H_t_stoch).logistic()
        return V


    def get_samples_and_hidden(self, T, g, num_samples, real_and_bin = False, g0 = None):
        if g0 == None:
            g0 = g
        v, h = self.v, self.h
        VH, HH, b_init = self
        V   = gpu.zeros((num_samples, T, v))
        V_r = gpu.zeros((num_samples, T, v))
        H   = gpu.zeros((num_samples, T, h))
        H_r = gpu.zeros((num_samples, T, h))
        B   = gpu.zeros((num_samples, T, h))
        VH_t = 1 * VH
        VH_t[2] = VH[2] + b_init
        ## V and H are binary, while H_r and V_r are real-valued
        V[:, 0, :], H[:, 0, :], V_r[:, 0, :], H_r[:, 0, :] = rbm.sample(VH_t, g0, num_samples, self.vis_gauss)
        #H[:, 0, :] = (VH_t * V[:, 0, :]).logistic()
        ## now V becames real valued:
        #V[:, 0, :] = (VH_t.T() * H_t_stoch).logistic()
        for t in range(1, T):
            B[:, t, :] = HH * H_r[:, t - 1, :]
            VH_t[2]    = VH[2] + B[:, t, :]
            V[:, t, :], H[:, t, :], V_r[:, t, :], H_r[:, t, :] = rbm.sample(VH_t, g, num_samples, self.vis_gauss)
            #H[:, t, :] = (VH_t * V[:, t, :]).logistic()
            #V[:, t, :] = (VH_t.T() * H_t_stoch).logistic()
        if real_and_bin:
            return V_r, H_r, V, H
        else:
            return V_r, H_r


    def get_samples_and_last_hidden(self, T, g, num_samples, g0 = None):
        ## return samples and only the last hidden activations, which is
        ## therefore the internal representation of the sequence
        if g0 == None:
            g0 = g
        v, h = self.v, self.h
        VH, HH, b_init = self
        V = gpu.zeros((num_samples, T, v))
        H = gpu.zeros((num_samples, T, h))
        B = gpu.zeros((num_samples, T, h))
        LM = gpu.zeros((num_samples, h))
        letters = np.empty(num_samples, dtype = np.character)
        end     = np.empty(num_samples, dtype = np.bool)
        for i in range(0, num_samples):
            end[i] = False
        VH_t = 1 * VH
        VH_t[2] = VH[2] + b_init
        ## both V and H_t_stoch are binary valued:
        V[:, 0, :], H_t_stoch, M = rbm.sample(VH_t, g0, num_samples, self.vis_gauss)
        H[:, 0, :] = (VH_t * V[:, 0, :]).logistic()
        ## now V becames real valued:
        if self.vis_gauss:
            V[:, 0, :] = VH_t.T() * H_t_stoch
        else:
            V[:, 0, :] = (VH_t.T() * H_t_stoch).logistic()
        for t in range(1, T):
            B[:, t, :] = HH * H[:, t - 1, :]
            VH_t[2] = VH[2] + B[:, t, :]
            V[:, t, :], H_t_stoch, M = rbm.sample(VH_t, g, num_samples, self.vis_gauss)
            H[:, t, :] = (VH_t * V[:, t, :]).logistic()
            for i in range(0, num_samples):
                letters[i] = words.decodify_letter(V[i, t, :])
                if (letters[i] == '$') and (end[i] == False):
                    end[i] = True
                    LM[i] = H[i, t - 1, :] ## NB: consider the internal representation BEFORE the $ is generated
            if self.vis_gauss:
                V[:, t, :] = VH_t.T() * H_t_stoch
            else:
                V[:, t, :] = (VH_t.T() * H_t_stoch).logistic()
        for i in range(0, num_samples):
            assert(end[i] == True)
        return V, LM























    def get_samples_and_analyse_prediction(self, T, g, num_samples, g0 = None, dictionary = None, frequencies = None, m = None, tot_freq = None, pattern = None):
        if g0 == None:
            g0 = g
        v, h = self.v, self.h
        VH, HH, b_init = self
        V = gpu.zeros((num_samples, T, v))
        H = gpu.zeros((num_samples, T, h))
        B = gpu.zeros((num_samples, T, h))
        VH_t = 1 * VH
        VH_t[2] = VH[2] + b_init
        letters = np.empty(num_samples, dtype = np.character)
        
        ## both V and H_t_stoch are binary valued:
        V[:, 0, :], H_t_stoch, H_real = rbm.sample(VH_t, g0, num_samples, self.vis_gauss)
        H[:, 0, :] = (VH_t * V[:, 0, :]).logistic()
        
        ## now V becames real valued:
        if self.vis_gauss:
            V[:, 0, :] = VH_t.T() * H_t_stoch
        else:
            V[:, 0, :] = (VH_t.T() * H_t_stoch).logistic()
        
        for i in range(0, num_samples):
            letters[i] = words.decodify_letter(V[i, 0, :])
            dictionary[letters[i]] += H_real[i]
            frequencies[letters[i]] += 1
            if letters[i] == pattern:
                m[0] += gpu.as_numpy_array(H_real[i])
                tot_freq[0] += 1
            #print letters[i],
        #print '\n',
        
        dollars = 0
        for t in range(1, T):
            B[:, t, :] = HH * H[:, t - 1, :]
            VH_t[2] = VH[2] + B[:, t, :]
            V[:, t, :], H_t_stoch, H_real = rbm.sample(VH_t, g, num_samples, self.vis_gauss)
            H[:, t, :] = (VH_t * V[:, t, :]).logistic()
            if self.vis_gauss:
                V[:, t, :] = VH_t.T() * H_t_stoch
            else:
                V[:, t, :] = (VH_t.T() * H_t_stoch).logistic()
            for i in range(0, num_samples):
                ### check if previous generated element was the terminal
                if letters[i] != '$':
                    letters[i] = words.decodify_letter(V[i, t, :])
                    dictionary[letters[i]] += H_real[i]
                    frequencies[letters[i]] += 1
                    if (letters[i] == pattern) and (t < 10):
                        m[t] += gpu.as_numpy_array(H_real[i])
                        tot_freq[t] += 1
                    #print letters[i],
                else:
                    dictionary[letters[i]] += H_real[i]
                    frequencies[letters[i]] += 1
                    #print ' ',
                    dollars += 1
            if dollars == num_samples:
                break
            else:
                dollars = 0
            #print '\n',
        
        return V, dictionary, frequencies, m, tot_freq

    def get_samples_and_analyse_context(self, T, g, num_samples, g0 = None, dictionary = None, frequencies = None, m = None, tot_freq = None, pattern = None):
        if g0 == None:
            g0 = g
        v, h = self.v, self.h
        VH, HH, b_init = self
        V = gpu.zeros((num_samples, T, v))
        H = gpu.zeros((num_samples, T, h))
        B = gpu.zeros((num_samples, T, h))
        VH_t = 1 * VH
        VH_t[2] = VH[2] + b_init
        letters = np.empty(num_samples, dtype = np.character)
        
        ## both V and H_t_stoch are binary valued:
        V[:, 0, :], H_t_stoch, H_real = rbm.sample(VH_t, g0, num_samples, self.vis_gauss)
        H[:, 0, :] = (VH_t * V[:, 0, :]).logistic()
        ## now V becames real valued:
        V[:, 0, :] = (VH_t.T() * H_t_stoch).logistic()
        
        for i in range(0, num_samples):
            letters[i] = words.decodify_letter(V[i, 0, :])
        
        dollars = 0
        for t in range(1, T):
            B[:, t, :] = HH * H[:, t - 1, :]
            VH_t[2] = VH[2] + B[:, t, :]
            V[:, t, :], H_t_stoch, H_real = rbm.sample(VH_t, g, num_samples, self.vis_gauss)
            H[:, t, :] = (VH_t * V[:, t, :]).logistic()
            V[:, t, :] = (VH_t.T() * H_t_stoch).logistic()
            for i in range(0, num_samples):
                ### check if previous generated element was the terminal
                if letters[i] != '$':
                    if (letters[i] == pattern) and (t < 11):
                        m[t - 1] += gpu.as_numpy_array(H_real[i])
                        tot_freq[t - 1] += 1
                    
                    dictionary[letters[i]] += H_real[i]
                    frequencies[letters[i]] += 1
                    
                    letters[i] = words.decodify_letter(V[i, t, :])
                    #print letters[i],
                else:
                    dictionary[letters[i]] += H_real[i]
                    frequencies[letters[i]] += 1
                    #print ' ',
                    dollars += 1
            if dollars == num_samples:
                break
            else:
                dollars = 0
            #print '\n',
        
        return V, dictionary, frequencies, m, tot_freq

    def get_samples_and_analyse_rule(self, T, g, num_samples, g0 = None, dictionary = None, frequencies = None, m = None, tot_freq = None, pattern = None):
        if g0 == None:
            g0 = g
        v, h = self.v, self.h
        VH, HH, b_init = self
        V = gpu.zeros((num_samples, T, v))
        H = gpu.zeros((num_samples, T, h))
        B = gpu.zeros((num_samples, T, h))
        VH_t = 1 * VH
        VH_t[2] = VH[2] + b_init
        letters = np.empty(num_samples, dtype = np.character)
        precondition = np.empty(num_samples, dtype = np.bool)
        track = []
        
        ## both V and H_t_stoch are binary valued:
        V[:, 0, :], H_t_stoch, H_real = rbm.sample(VH_t, g0, num_samples, self.vis_gauss)
        H[:, 0, :] = (VH_t * V[:, 0, :]).logistic()
        
        ## now V becames real valued:
        if self.vis_gauss:
            V[:, 0, :] = VH_t.T() * H_t_stoch
        else:
            V[:, 0, :] = (VH_t.T() * H_t_stoch).logistic()
        
        for i in range(0, num_samples):
            letters[i] = words.decodify_letter(V[i, 0, :])
            dictionary[letters[i]] += H_real[i]
            frequencies[letters[i]] += 1
            if letters[i] == pattern[0]:
                precondition[i] = True
            else:
                precondition[i] = False
            #print letters[i],
        #print '\n',
        
        dollars = 0
        for t in range(1, T):
            B[:, t, :] = HH * H[:, t - 1, :]
            VH_t[2] = VH[2] + B[:, t, :]
            V[:, t, :], H_t_stoch, H_real = rbm.sample(VH_t, g, num_samples, self.vis_gauss)
            H[:, t, :] = (VH_t * V[:, t, :]).logistic()
            if self.vis_gauss:
                V[:, t, :] = VH_t.T() * H_t_stoch
            else:
                V[:, t, :] = (VH_t.T() * H_t_stoch).logistic()
            for i in range(0, num_samples):
                ### check if previous generated element was the terminal
                if letters[i] != '$':
                    letters[i] = words.decodify_letter(V[i, t, :])
                    dictionary[letters[i]] += H_real[i]
                    frequencies[letters[i]] += 1
                    if (letters[i] == pattern[1]) and (precondition[i] == True) and (t < 10):
                        m[t] += gpu.as_numpy_array(H_real[i])
                        tot_freq[t] += 1
                        ## keep track of words which contain the required rule:
                        track.append(i)
                    elif (letters[i] == pattern[0]):
                        precondition[i] = True
                    else:
                        precondition[i] = False
                    #print letters[i],
                else:
                    #print ' ',
                    dollars += 1
            if dollars == num_samples:
                break
            else:
                dollars = 0
            #print '\n',
        
        return V, dictionary, frequencies, m, tot_freq, track

    def get_samples_and_analyse_bigram(self, T, g, num_samples, g0 = None, dictionary = None, frequencies = None, m = None, tot_freq = None, pattern = None):
        if g0 == None:
            g0 = g
        v, h = self.v, self.h
        VH, HH, b_init = self
        V = gpu.zeros((num_samples, T, v))
        H = gpu.zeros((num_samples, T, h))
        B = gpu.zeros((num_samples, T, h))
        VH_t = 1 * VH
        VH_t[2] = VH[2] + b_init
        letters = np.empty(num_samples, dtype = np.character)
        precondition = np.empty(num_samples, dtype = np.bool)
        match = np.empty(num_samples, dtype = np.bool)
        track = []
        
        ## both V and H_t_stoch are binary valued:
        V[:, 0, :], H_t_stoch, H_real = rbm.sample(VH_t, g0, num_samples, self.vis_gauss)
        H[:, 0, :] = (VH_t * V[:, 0, :]).logistic()
        
        ## now V becames real valued:
        if self.vis_gauss:
            V[:, 0, :] = VH_t.T() * H_t_stoch
        else:
            V[:, 0, :] = (VH_t.T() * H_t_stoch).logistic()
        for i in range(0, num_samples):
            letters[i] = words.decodify_letter(V[i, 0, :])
            dictionary[letters[i]] += H_real[i]
            frequencies[letters[i]] += 1
            if letters[i] == pattern[0]:
                precondition[i] = True
            else:
                precondition[i] = False
            match[i] = False
            #print letters[i],
        #print '\n',
        dollars = 0
        for t in range(1, T):
            B[:, t, :] = HH * H[:, t - 1, :]
            VH_t[2] = VH[2] + B[:, t, :]
            V[:, t, :], H_t_stoch, H_real = rbm.sample(VH_t, g, num_samples, self.vis_gauss)
            H[:, t, :] = (VH_t * V[:, t, :]).logistic()
            if self.vis_gauss:
                V[:, t, :] = VH_t.T() * H_t_stoch
            else:
                V[:, t, :] = (VH_t.T() * H_t_stoch).logistic()
            for i in range(0, num_samples):
                ### check if previous generated element was the terminal
                if letters[i] != '$':
                    
                    if match[i]:
                        m[t - 1] += gpu.as_numpy_array(H_real[i])
                        tot_freq[t - 1] += 1
                        ## keep track of words which contain the required rule:
                        track.append(i)
                    
                    letters[i] = words.decodify_letter(V[i, t, :])
                    dictionary[letters[i]] += H_real[i]
                    frequencies[letters[i]] += 1
                    
                    if (letters[i] == pattern[1]) and (precondition[i] == True) and (t < 10):
                        match[i] = True
                    elif (letters[i] == pattern[0]):
                        precondition[i] = True
                        match[i] = False
                    else:
                        precondition[i] = False
                        match[i] = False
                    #print letters[i],
                else:
                    #print ' ',
                    dollars += 1
            if dollars == num_samples:
                break
            else:
                dollars = 0
            #print '\n',
        return V, dictionary, frequencies, m, tot_freq, track

    def get_samples_and_analyse_avg(self, T, g, num_samples, g0 = None, dictionary = None, frequencies = None, m = None, tot_freq = None):
        if g0 == None:
            g0 = g
        v, h = self.v, self.h
        VH, HH, b_init = self
        V = gpu.zeros((num_samples, T, v))
        H = gpu.zeros((num_samples, T, h))
        B = gpu.zeros((num_samples, T, h))
        VH_t = 1 * VH
        VH_t[2] = VH[2] + b_init
        letters = np.empty(num_samples, dtype = np.character)
        ## both V and H_t_stoch are binary valued:
        V[:, 0, :], H_t_stoch, H_real = rbm.sample(VH_t, g0, num_samples, self.vis_gauss)
        H[:, 0, :] = (VH_t * V[:, 0, :]).logistic()
        ## now V becames real valued:
        if self.vis_gauss:
            V[:, 0, :] = VH_t.T() * H_t_stoch
        else:
            V[:, 0, :] = (VH_t.T() * H_t_stoch).logistic()
        for i in range(0, num_samples):
            letters[i] = words.decodify_letter(V[i, 0, :])
            dictionary[letters[i]] += H_real[i]
            frequencies[letters[i]] += 1
            m[0] += gpu.as_numpy_array(H_real[i])
            tot_freq[0] += 1
            #print letters[i],
        #print '\n',
        dollars = 0
        for t in range(1, T):
            B[:, t, :] = HH * H[:, t - 1, :]
            VH_t[2] = VH[2] + B[:, t, :]
            V[:, t, :], H_t_stoch, H_real = rbm.sample(VH_t, g, num_samples, self.vis_gauss)
            H[:, t, :] = (VH_t * V[:, t, :]).logistic()
            if self.vis_gauss:
                V[:, t, :] = VH_t.T() * H_t_stoch
            else:
                V[:, t, :] = (VH_t.T() * H_t_stoch).logistic()
            for i in range(0, num_samples):
                ### check if previous generated element was the terminal
                if letters[i] != '$':
                    letters[i] = words.decodify_letter(V[i, t, :])
                    dictionary[letters[i]] += H_real[i]
                    frequencies[letters[i]] += 1
                    if (t < 10):
                        m[t] += gpu.as_numpy_array(H_real[i])
                        tot_freq[t] += 1
                    #print letters[i],
                else:
                    dictionary[letters[i]] += H_real[i]
                    frequencies[letters[i]] += 1
                    #print ' ',
                    dollars += 1
            if dollars == num_samples:
                break
            else:
                dollars = 0
            #print '\n',
        return V, dictionary, frequencies, m, tot_freq


    def get_samples_and_hiddens(self, T, g, num_samples, g0 = None):
        if g0 == None:
            g0 = g
        v, h = self.v, self.h
        VH, HH, b_init = self
        V = gpu.zeros((num_samples, T, v))
        V_stoch = gpu.zeros((num_samples, T, v))
        H = gpu.zeros((num_samples, T, h))
        H_stoch_init = gpu.zeros((num_samples, T, h))
        B = gpu.zeros((num_samples, T, h))
        VH_t = 1 * VH
        VH_t[2] = VH[2] + b_init
        ## V_stoch and H_t_stoch are binary-valued, while V is real-valued
        V[:, 0, :], V_stoch[:, 0, :], H_t_stoch, H_stoch_init[:, 0, :] = rbm.sample_and_store(VH_t, g0, num_samples)
        H[:, 0, :] = (VH_t * V_stoch[:, 0, :]).logistic()
        for t in range(1, T):
            B[:, t, :] = HH * H[:, t - 1, :]
            VH_t[2] = VH[2] + B[:, t, :]
            V[:, t, :], V_stoch[:, t, :], H_t_stoch, H_stoch_init[:, t, :] = rbm.sample_and_store(VH_t, g, num_samples)
            H[:, t, :] = (VH_t * V_stoch[:, t, :]).logistic()
        return V, V_stoch, H, H_stoch_init


    def reconstruct_hiddens(self, V):
        v, h = self.v, self.h
        VH, HH, b_init = self
        num_samples, T, v = V.shape
        H = gpu.zeros((num_samples, T, h))
        B = gpu.zeros((num_samples, T, h))
        VH_t = 1 * VH
        VH_t[2] = VH[2] + b_init
        H[:, 0, :] = (VH_t * V[:, 0, :]).logistic()
        for t in range(1, T):
            B[:, t, :] = HH * H[:, t - 1, :]
            VH_t[2] = VH[2] + B[:, t, :]
            H[:, t, :] = (VH_t * V[:, t, :]).logistic()
        return H

    def reconstruct_visibles(self, H, H_stoch_init, gibbs_steps):
        v, h = self.v, self.h
        VH, HH, b_init = self
        num_samples, T, h = H.shape
        V = gpu.zeros((num_samples, T, v))
        V_stoch = gpu.zeros((num_samples, T, v))
        B = gpu.zeros((num_samples, T, h))
        VH_t = 1 * VH
        VH_t[2] = VH[2] + b_init

        ## checking Gibbs sampling convergence:
        for i in range(10):
            #V[:, 0, :], V_stoch[:, 0, :] = rbm.sample_from_previous(VH_t, H_stoch_init[:, 0, :], gibbs_steps, num_samples)
            
            V[:, 0, :] = rbm.sample_from_previous_no_stoch(VH_t, H_stoch_init[:, 0, :], gibbs_steps, num_samples)
##            print 'Reconstruction ', i, ':'
##            for i in range(v):
##                print '%.4f' % V[0, 0, i],
##                if (i == 13):
##                    print ''
##            print '\n'

        for t in range(1, T):
            B[:, t, :] = HH * H[:, t - 1, :]
            VH_t[2] = VH[2] + B[:, t, :]
            
            if t == 2:
                for r in range(10):

                    #V[:, t, :], V_stoch[:, t, :] = rbm.sample_from_previous(VH_t, H_stoch_init[:, t, :], gibbs_steps, num_samples)
                    V[:, t, :] = rbm.sample_from_previous_no_stoch(VH_t, H_stoch_init[:, t, :], gibbs_steps, num_samples)
                    
                    print 'Reconstruction ', r, ':'
                    for i in range(v):
                        print '%.4f' % V[0, t, i],
                        if (i == 13):
                            print ''
                    print '\n'

        return V, V_stoch


