Source code for idmtools_calibra.algorithms.gpc

import json
import os
from collections import deque
from functools import partial
from logging import getLogger
from string import Template

import matplotlib.gridspec as gridspec
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.optimize as spo
import seaborn as sns
from scipy.stats import norm

logger = getLogger(__name__)
user_logger = getLogger('user')

try:
    import pycuda
    import skcuda.misc as misc
except ImportError as e:
    logger.error("Could not import a cuda component. pycuda and skcuda installed")
    raise e

plt.rcParams['image.cmap'] = 'jet'


# NOTE theta = [sigma_f^2, l_1^2, l_2^2, ..., l_D^2] # NOTE: no sigma_n^2
# Ack https://github.com/lebedov/scikit-cuda/blob/master/demos/indexing_2d_demo.py


[docs]class GPC:

    def __init__(self, x_cols, y_col, training_data, param_info,
                 kernel_mode='RBF',
                 kernel_params=None,
                 verbose=False,
                 debug=False,
                 **kwargs
                 ):

        self.use_laplace_approximation = True

        cur_dir = os.path.dirname(os.path.realpath(__file__))
        self.kernel_fn = os.path.join(cur_dir, 'kernel.c')

        self.training_data = training_data.copy()
        self.param_info = param_info.copy()
        self.x_cols = x_cols
        self.y_col = y_col

        self.kernel_mode = kernel_mode

        self.x_cols_scaled = []
        for xc in self.x_cols:
            if xc not in self.training_data.columns:
                user_logger.error('Cannot find columns %s in the training data, which has columns:' % xc,
                                  self.training_data.columns)
                raise Exception('Missing column')
            xc_new = xc + ' (scaled)'
            self.x_cols_scaled.append(xc_new)
            self.training_data[xc + ' (scaled)'] = (self.training_data[xc] - self.param_info.loc[xc, 'Min']) / \
                                                   (self.param_info.loc[xc, 'Max'] - self.param_info.loc[xc, 'Min'])

        self.verbose = verbose
        self.debug = debug
        self.D = len(self.x_cols)

        self.theta = None  # Kernel/model hyperparameters
        self.kernel_xx_gpu = None
        self.kernel_xp_gpu = None

        self.kernel_params = kernel_params
        self.define_kernel(self.kernel_params)

[docs]    @classmethod
    def from_config(cls, config_fn):
        try:
            logger.debug("from_config:", config_fn)
            with open(os.path.join(config_fn)) as data_file:
                config = json.load(data_file)
                return GPC.from_dict(config)
        except EnvironmentError:
            user_logger.error("Unable to load GPC from_config file", config_fn)
            raise

[docs]    @classmethod
    def from_dict(cls, config):
        return cls(
            config['Xcols'],
            config['Ycol'],
            training_data=pd.read_json(config['Training_Data'], orient='split').set_index('Sample'),
            param_info=pd.read_json(config['Param_Info'], orient='split').set_index('Name'),
            kernel_mode=config['Kernel_Mode'],
            kernel_params=np.array(config['Kernel_Params']),
        )

[docs]    def set_training_data(self, new_training_data):
        self.training_data = new_training_data.copy()
        self.define_kernel(self.kernel_params)

        for xc in self.x_cols:
            # xc_new = xc + ' (scaled)'
            self.training_data[xc + ' (scaled)'] = \
                (self.training_data[xc] - self.param_info.loc[xc, 'Min']) / \
                (self.param_info.loc[xc, 'Max'] - self.param_info.loc[xc, 'Min'])

[docs]    def save(self, save_to=None):
        save_dict = {
            'Xcols': self.x_cols,
            'Ycol': self.y_col,
            'Kernel_Mode': self.kernel_mode,
            'Kernel_Params': self.theta.tolist(),
            'Training_Data': self.training_data.reset_index().to_json(orient='split'),  # [self.Xcols + [self.Ycol]]
            'Param_Info': self.param_info.reset_index().to_json(orient='split')
        }

        if save_to:
            with open(save_to, 'w') as fout:
                json.dump(save_dict, fout, indent=4)
        return save_dict

[docs]    def define_kernel(self, params):
        if self.kernel_mode == 'RBF':
            nx = self.training_data.shape[0]

            with open(self.kernel_fn, 'r') as f:
                kernel_code_template = Template(f.read())

            max_threads_per_block, max_block_dim, max_grid_dim = misc.get_dev_attrs(pycuda.autoinit.device)
            block_dim, grid_dim = misc.select_block_grid_sizes(pycuda.autoinit.device, (nx, nx))
            max_blocks_per_grid = max(max_grid_dim)

            if self.verbose:
                user_logger.info("max_threads_per_block", max_threads_per_block)
                user_logger.info("max_block_dim", max_block_dim)
                user_logger.info("max_grid_dim", max_grid_dim)
                user_logger.info("max_blocks_per_grid", max_blocks_per_grid)
                user_logger.info("block_dim", block_dim)
                user_logger.info("grid_dim", grid_dim)

            # Substitute in template to get kernel code
            kernel_code = kernel_code_template.substitute(
                max_threads_per_block=max_threads_per_block,
                max_blocks_per_grid=max_blocks_per_grid,
                B=nx)

            # Compile the kernel
            mod = pycuda.compiler.SourceModule(kernel_code)

            # retrieve the kernel functions
            self.kernel_xx_gpu = mod.get_function("kernel_xx")
            self.kernel_xp_gpu = mod.get_function("kernel_xp")

        else:
            user_logger.error('Bad kernel mode, kernel_mode=%s' % self.kernel_mode)
            raise

        if params is not None:
            assert (len(params) == 1 + self.D)
            if isinstance(params, list):
                params = np.array(params)
            self.theta = params

[docs]    def kernel_xx(self, x, theta):
        # NOTE: Slow, use GPU acceleration instead.
        sigma2_f = theta[0]

        n = x.shape[0]

        kxx = np.zeros([n, n], dtype=np.float32)
        for i in range(n):
            # Off-diagonal
            for j in range(i + 1, n):
                d_x = x[i, :] - x[j, :]
                r2 = 0
                for d in range(self.D):
                    r2 += d_x[d] * d_x[d] / theta[1 + d]
                kxx[i, j] = sigma2_f * np.exp(-r2 / 2.)
                kxx[j, i] = kxx[i, j]

            # Diagonal:
            kxx[i, i] = sigma2_f

        return kxx

[docs]    @staticmethod
    def kernel_xp(x, p, theta):
        # NOTE: Slow, use GPU acceleration instead.
        sigma2_f = theta[0]

        nx = x.shape[0]
        n_p = p.shape[0]
        d = x.shape[1]

        kxp = n_p.zeros([nx, n_p])
        for i in range(nx):
            for j in range(n_p):
                d_x = x[i, :] - p[j, :]
                r2 = 0
                for d in range(d):
                    r2 += d_x[d] * d_x[d] / theta[1 + d]
                kxp[i, j] = sigma2_f * n_p.exp(-r2 / 2.)

        return kxp

[docs]    def kxx_gpu_wrapper(self, x, theta, deriv=-1):
        nx = x.shape[0]
        # Use from before...?
        block_dim, grid_dim = misc.select_block_grid_sizes(pycuda.autoinit.device, (nx, nx))

        x_gpu = pycuda.gpuarray.to_gpu(x.astype(np.float32))

        theta_extended = np.concatenate(
            (np.array(theta[:1]), np.array([0]), np.array(theta[1:])))  # Kernel code needs sigma2_n
        theta_gpu = pycuda.gpuarray.to_gpu(theta_extended.astype(np.float32))

        # create empty gpu array for the result
        kxx_gpu = pycuda.gpuarray.empty((nx, nx), np.float32)

        # call the kernel on the card
        self.kernel_xx_gpu(
            kxx_gpu,  # <-- Output
            x_gpu, theta_gpu,  # <-- Inputs
            np.uint32(nx),  # <-- N
            np.uint32(self.D),  # <-- D
            np.uint32(deriv),  # <- for dK/dtheta_i.  Negative for no deriv.
            np.uint8(x.flags.f_contiguous),  # FORTRAN (column) contiguous
            block=block_dim,
            grid=grid_dim
        )

        kxx = kxx_gpu.get()

        if self.debug:
            kxx_cpu = self.kernel_xx(x.astype(np.float32), theta.astype(np.float32))
            if not np.allclose(kxx_cpu, kxx):
                user_logger.error('kxx_gpu_wrapper(CPU):\n', kxx_cpu)
                user_logger.error('kxx_gpu_wrapper(GPU):\n', kxx)
                raise

        return kxx

[docs]    def kxp_gpu_wrapper(self, x, p, theta):
        nx = x.shape[0]
        n_p = p.shape[0]
        block_dim, grid_dim = misc.select_block_grid_sizes(pycuda.autoinit.device, (nx, n_p))

        if x.flags.f_contiguous:  # Fortran column-major
            # Convert to C contiguous (row major)
            x_gpu = pycuda.gpuarray.to_gpu(n_p.ascontiguousarray(x).astype(n_p.float32))
        else:
            x_gpu = pycuda.gpuarray.to_gpu(x.astype(n_p.float32))

        if p.flags.f_contiguous:
            # Convert to C contiguous (row major)
            p_gpu = pycuda.gpuarray.to_gpu(n_p.ascontiguousarray(p).astype(n_p.float32))
        else:
            p_gpu = pycuda.gpuarray.to_gpu(p.astype(n_p.float32))

        theta_extended = n_p.concatenate(
            (n_p.array(theta[:1]), n_p.array([0]), n_p.array(theta[1:])))  # Kernel code needs sigma2_n
        theta_gpu = pycuda.gpuarray.to_gpu(theta_extended.astype(n_p.float32))

        # create empty gpu array for the result
        kxp_gpu = pycuda.gpuarray.empty((nx, n_p), n_p.float32)

        # call the kernel on the card
        self.kernel_xp_gpu(
            kxp_gpu,  # <-- Output
            x_gpu, p_gpu, theta_gpu,  # <-- Inputs
            n_p.uint32(nx),  # <-- Nx
            n_p.uint32(n_p),  # <-- Nx
            n_p.uint32(self.D),  # <-- D
            block=block_dim,
            grid=grid_dim
        )

        if self.debug:
            kxp_cpu = self.kernel_xp(x, p, theta)
            if not n_p.allclose(kxp_cpu, kxp_gpu.get()):
                user_logger.error('kxp_gpu_wrapper(CPU):\n', kxp_cpu)
                user_logger.error('kxp_gpu_wrapper(GPU):\n', kxp_gpu.get())
                raise

        return kxp_gpu.get()

[docs]    def assign_rep(self, sample):
        sample = sample.drop('index', axis=1).reset_index()
        sample.index.name = 'Replicate'
        sample.reset_index(inplace=True)
        return sample

[docs]    def expectation_propagation(self, theta):
        # Algorithm 3.5: "Expectation Propagation for binary classification" from
        # "Gaussian Process for Machine Learning", p58
        # TODO: WIP!

        # TODO: Pass in 
        mu_tol = 1e-6
        sigma_tol = 1e-6

        y = self.training_data[self.y_col].values
        user_logger.debug('y:', y)
        n = len(y)

        x = self.training_data[self.x_cols_scaled].values
        k = self.kxx_gpu_wrapper(x, theta)  # This is for f, no sigma2_n

        # Initialize
        nu = np.zeros(n)
        tau = np.zeros(n)
        sigma = k.copy()
        mu = np.zeros(n)

        tau_minus_i_vec = np.zeros_like(tau)
        mu_minus_i_vec = np.zeros_like(mu)

        done = False
        it = 0
        stilde = None
        while not done:
            prev_mu = mu
            prev_sigma = sigma
            # prev_tau = tau
            # prev_nu = nu
            for i in range(n):
                # logger.info(it, ' ', i,' ', '-'*80)
                sigma2_i = sigma[i, i]  # Not sure on this one
                tau_minus_i = 1 / sigma2_i - tau[i]
                nu_minus_i = mu[i] / sigma2_i - nu[i]
                assert (tau_minus_i > 0)

                den = tau_minus_i * (tau_minus_i + 1)
                sqrt_den = np.sqrt(den)
                zi = y[i] * nu_minus_i / sqrt_den
                n_zi = norm.pdf(zi)
                z_hat_i = norm.cdf(zi)
                mu_hat_i = nu_minus_i / tau_minus_i + y[i] * n_zi / (z_hat_i * sqrt_den)
                sigma2_hat_i = 1 / tau_minus_i - n_zi / (z_hat_i * den) * (zi + n_zi / z_hat_i)

                delta_tau = 1 / sigma2_hat_i - tau_minus_i - tau[i]  # using sigmahat2_i
                tau[i] = tau[i] + delta_tau
                nu[i] = mu_hat_i / sigma2_hat_i - nu_minus_i  # using muhat_u and sigmahat2_i

                si = sigma[:, i]
                sigma = sigma - 1 / ((1 / delta_tau) + sigma[i, i]) * np.outer(si, si)
                mu = np.dot(sigma, nu)

                tau_minus_i_vec[i] = tau_minus_i
                mu_minus_i_vec[i] = nu_minus_i / tau_minus_i

            stilde = np.diag(tau)
            '''
            sqrtStilde = np.diag(np.sqrt(tau))
            B = np.eye(N) + np.dot(sqrtStilde, np.dot(K, sqrtStilde))
            L = np.linalg.cholesky(B)
            V = np.linalg.solve( np.transpose(L), np.dot(sqrtStilde,K) )
            logger.info('V', V)
            logger.info('K', K)
            Sigma = K - np.dot(np.transpose(V),V)
            logger.info('Sigma after', Sigma)

            t = time.time()
            SigmaDIRECT = np.linalg.inv(np.linalg.inv(K) + Stilde)
            logger.info('Sigma DIRECT %f'%(time.time()-t), SigmaDIRECT)
            '''

            # t = time.time()
            sigma_mil = k - np.dot(k, np.linalg.solve(np.linalg.inv(stilde) + k, k))
            # logger.info('Sigma MIL %f'%(time.time()-t), SigmaMIL)

            '''
            import scipy as sp
            t = time.time()
            a = np.linalg.inv(Stilde)+K
            ainvsqrt = sp.linalg.sqrtm(np.linalg.inv(a))
            Vdjk = np.dot(ainvsqrt, K)
            SigmaDJK = K - np.dot(np.transpose(Vdjk),Vdjk)
            logger.info('Sigma DJK %f'%(time.time()-t), SigmaDJK)
            logger.info('Vdjk', Vdjk)

            #Bdjk = np.linalg.inv(Stilde)+K
            #Ldjk = np.linalg.cholesky(Bdjk)
            #Vdjk2 = np.linalg.solve(np.transpose(Ldjk),K)
            #logger.info('Vdjk2', Vdjk2)
            '''

            sigma = sigma_mil

            mu = np.dot(sigma, nu)

            d_mu = np.linalg.norm(mu - prev_mu)
            d_sigma = np.linalg.norm(sigma - prev_sigma)

            done = d_mu > mu_tol or d_sigma > sigma_tol

            it = it + 1

        if stilde is None:
            raise Exception("stilde not set")
        # TEMP because long-cut in calculating Sigma above
        sqrt_stilde = np.diag(np.sqrt(tau))
        b = np.eye(n) + np.dot(sqrt_stilde, np.dot(k, sqrt_stilde))
        l_cholesky = np.linalg.cholesky(b)
        # #################################################

        log_zep_terms_1_and_4 = 0.5 * sum(
            np.log(1 + np.multiply(tau, np.reciprocal(tau_minus_i_vec))) - np.log(np.diag(l_cholesky)))

        t = np.diag(tau_minus_i_vec)
        big_matrix = k - np.dot(k, np.dot(sqrt_stilde, np.linalg.solve(b, np.dot(sqrt_stilde, k)))) - np.linalg.inv(
            t + stilde)
        log_zep_terms_5b_and_2 = 0.5 * np.dot(np.transpose(nu), np.dot(big_matrix, nu))

        stuff = np.linalg.solve(stilde + t, np.dot(stilde, mu_minus_i_vec - 2 * nu))
        log_zep_term_5a = 0.5 * np.dot(np.transpose(mu_minus_i_vec), np.dot(t, stuff))

        num = np.multiply(y, mu_minus_i_vec)
        den = np.sqrt(1 + tau_minus_i_vec)
        log_zep_term_3 = np.sum(np.log(norm.cdf(np.multiply(num, np.reciprocal(den)))))

        # logger.info('logZep_terms_1_and_4', logZep_terms_1_and_4)
        # logger.info('logZep_terms_5b_and_2', logZep_terms_5b_and_2)
        # logger.info('logZep_term_5a', logZep_term_5a)
        # logger.info('logZep_term_3', logZep_term_3)

        log_zep = log_zep_terms_1_and_4 + log_zep_terms_5b_and_2 + log_zep_term_5a + log_zep_term_3

        user_logger.debug(theta, '-->', -log_zep)

        return -log_zep, nu, tau

[docs]    def find_posterior_mode(self, theta, f_guess=None, tol_grad=1e-6, max_iter=100):
        # Mode finding for Laplace GPC.  Algorithm 3.1 from "Gaussian Process for Machine Learning", p46

        y = self.training_data[self.y_col].values
        user_logger.debug('y:', y)
        n = len(y)

        if f_guess is not None:
            f_hat = f_guess
            assert (isinstance(f_hat, np.ndarray))
            assert (f_hat.shape[0] == y.shape[0])
        else:
            f_hat = np.zeros_like(y)

        x = self.training_data[self.x_cols_scaled].values

        k = self.kxx_gpu_wrapper(x, theta)  # This is for f, no sigma2_n

        l_val = w = sqrt_w = pi = d_df_log_p_y_given_f = a = log_p_y_given_f = log_q_y_given_x_theta = i = None
        for i in range(max_iter):
            user_logger.debug('---[ %d ]------------------------------------' % i)
            user_logger.debug('f_hat:', f_hat)

            pi = 1.0 / (1.0 + np.exp(-f_hat))
            user_logger.debug('pi:', pi)

            t = (y + 1) / 2.0
            user_logger.debug('t:', t)
            user_logger.debug('Computing W ...')

            d2_df2_log_p_y_given_f = -np.multiply(pi, 1 - pi)

            np.diag(-d2_df2_log_p_y_given_f)  # NOTE: Using logit (3.15)
            sqrt_w = np.diag(np.sqrt(-d2_df2_log_p_y_given_f))

            user_logger.debug('Computing B ...')
            # B = np.eye(N) + np.dot(sqrtW, np.dot(K, sqrtW))
            ####################
            # Dan's method for B:
            w = np.sqrt(-d2_df2_log_p_y_given_f)
            w_outer = np.outer(w, w)
            b = np.eye(n) + np.multiply(k, w_outer)
            ###

            user_logger.debug('Computing L ...')
            l_val = np.linalg.cholesky(b)

            user_logger.debug('Computing b ...')
            d_df_log_p_y_given_f = t - pi
            b = np.dot(w, f_hat) + d_df_log_p_y_given_f
            logger.debug('b:', b)

            user_logger.debug('Computing W12_K_b ...')
            w12_k_b = np.dot(sqrt_w, np.dot(k, b))

            user_logger.debug('Computing L_slash_W12_K_b ...')
            l_slash_w12_k_b = np.linalg.solve(l_val, w12_k_b)

            user_logger.debug('Computing Lt_slash_L_slash_W12_K_b ...')
            lt_slash_l_slash_w12_k_b = np.linalg.solve(np.transpose(l_val), l_slash_w12_k_b)

            user_logger.debug('Computing a ...')
            a = b - np.dot(sqrt_w, lt_slash_l_slash_w12_k_b)

            user_logger.debug('a:', a)
            user_logger.debug('Computing f_hat ...')
            f_hat = np.dot(k, a)

            #####
            # log_p_y_given_f = -np.log(1 + np.exp(-np.dot(y, f_hat)))
            log_p_y_given_f = np.sum(-np.log(1 + np.exp(-np.multiply(y, f_hat))))
            log_q_y_given_x_theta = -0.5 * np.dot(np.transpose(a), f_hat) + log_p_y_given_f - sum(np.log(np.diag(l_val)))

            d_df_log_q_y_given_x_theta = d_df_log_p_y_given_f - np.linalg.solve(k, f_hat)
            # user_logger.info('***', log_q_y_given_X_theta, np.linalg.norm(d_df_log_q_y_given_X_theta))
            norm_grad = np.linalg.norm(d_df_log_q_y_given_x_theta)
            if norm_grad < tol_grad:
                break

            user_logger.warning('WARNING: out of iterations in find_posterior_mode, |grad| =', norm_grad)

            user_logger.debug(theta, '--> log_q_y_given_X_theta: %f (%d f_hat-iterations)' % (log_q_y_given_x_theta, i))
        user_logger.debug(theta,
                          'Final --> log_q_y_given_X_theta: %f (%d f_hat-iterations)' % (log_q_y_given_x_theta, i))

        if l_val is None or w is None:
            raise Exception("Iter loop did not complete or all expected values were not set")
        return {
            'f_hat': f_hat,
            'log_q_y_given_X_theta': log_q_y_given_x_theta,
            'L': l_val,
            'K': k,
            'W': w,
            'sqrtW': sqrt_w,
            'pi': pi,
            'a': a,
            'd_df_log_p_y_given_f': d_df_log_p_y_given_f,
            'log_p_y_given_f': log_p_y_given_f
        }

[docs]    def negative_log_marginal_likelihood(self, theta):
        if self.use_laplace_approximation:
            log_q_y_given_x_theta = self.find_posterior_mode(theta)['log_q_y_given_X_theta']
            return -log_q_y_given_x_theta
        else:
            log_zep, _, _ = self.expectation_propagation(theta)
            return log_zep

[docs]    def negative_log_marginal_likelihood_and_gradient(self, theta, f_guess=None):
        # Rasmussen and Williams GPML p126 algo 5.1

        # if np.any(theta < 0):
        #    theta = np.abs(theta)
        # theta += 1e-6 # Keep away from 0

        mode_results_dict = self.find_posterior_mode(theta, f_guess)

        f_hat = mode_results_dict['f_hat']
        log_z = mode_results_dict['log_q_y_given_X_theta']
        l_val = mode_results_dict['L']
        k = mode_results_dict['K']
        # w = mode_results_dict['W']
        sqrt_w = mode_results_dict['sqrtW']
        pi = mode_results_dict['pi']
        a = mode_results_dict['a']
        d_df_log_p_y_given_f = mode_results_dict['d_df_log_p_y_given_f']
        # log_p_y_given_f = mode_results_dict['log_p_y_given_f']

        x = self.training_data[self.x_cols_scaled].values

        l_slash_sqrt_w = np.linalg.solve(l_val, sqrt_w)
        r = np.dot(sqrt_w, np.linalg.solve(np.transpose(l_val), l_slash_sqrt_w))  # <-- good

        c = np.linalg.solve(l_val, np.dot(sqrt_w, k))

        # logger.info(np.diag(K - np.dot(np.transpose(C),C)) - np.diag(K - np.dot(K,np.linalg.solve(np.linalg.inv(W)+K,K))))
        # exit()

        # n = f_hat.shape[0]
        # L is good

        s2_part1 = np.diag(np.diag(k) - np.diag(np.dot(np.transpose(c), c)))
        d3_df3_log_p_y_given_f = pi * (1 - pi) * (2 * pi - 1)
        s2 = -0.5 * np.dot(s2_part1, d3_df3_log_p_y_given_f)
        s2 = -s2  # TEMP - OMG, RW is WRONG!!!!!

        d_dtheta_log_z = np.zeros_like(theta)

        for j in range(len(theta)):
            # Compute dK/dtheta_j
            if j == 0:
                # theta_0 is sigma2_f
                c = k / theta[0]
            else:
                c = self.kxx_gpu_wrapper(x, theta, deriv=j + 1)  # +1 because no sigma2_n

            s1_part_1 = 0.5 * np.dot(np.transpose(a), np.dot(c, a))
            s1 = s1_part_1 - 0.5 * np.trace(
                np.dot(r, c))  # <-- WARNING, INEFFICIENT!  COMPUTE DIAG OF MATRIX PROD ONLY!
            b = np.dot(c, d_df_log_p_y_given_f)
            s3 = b - np.dot(k, np.dot(r, b))
            d_dtheta_log_z[j] = s1 + np.dot(np.transpose(s2), s3)  # s1 seems good, s2 is good

        user_logger.debug('d_dtheta_logZ:', d_dtheta_log_z)

        return -log_z, -d_dtheta_log_z, f_hat  # Careful with sign

[docs]    @staticmethod
    def func_wrapper(f, cache_size=100):
        # http://stackoverflow.com/questions/10712789/scipy-optimize-fmin-bfgs-single-function-computes-both-f-and-fprime
        evals = {}
        last_points = deque()

        def get(pt, which):
            s = pt.tostring()  # get binary string of numpy array, to make it hashable
            if s not in evals:
                evals[s] = f(pt)
                last_points.append(s)
                if len(last_points) >= cache_size:
                    del evals[last_points.popleft()]
            return evals[s][which]

        return partial(get, which=0), partial(get, which=1)

[docs]    def laplace_predict(self, theta, f_hat, p):
        y = self.training_data[self.y_col].values
        user_logger.debug('y:', y)
        n = len(y)
        x = self.training_data[self.x_cols_scaled].values
        kxx = self.kxx_gpu_wrapper(x, theta)  # This is for f

        user_logger.debug('---[ PREDICT ]------------------------------------')
        user_logger.debug('f_hat:', f_hat)
        pi = 1.0 / (1.0 + np.exp(-f_hat))
        user_logger.debug('pi:', pi)
        t = (y + 1) / 2.0
        user_logger.debug('t:', t)

        d2_df2_log_p_y_given_f = -np.multiply(pi, 1 - pi)
        sqrt_w = np.diag(np.sqrt(-d2_df2_log_p_y_given_f))

        user_logger.debug('Computing B ...')
        # ## Dan's method for B:
        w = np.sqrt(-d2_df2_log_p_y_given_f)
        w_outer = np.outer(w, w)
        b = np.eye(n) + np.multiply(kxx, w_outer)
        ###
        # Bslow = np.eye(N) + np.dot(sqrtW, np.dot(KXX, sqrtW))
        # logger.info( np.allclose(B, Bslow) )
        # exit()
        ###

        lh = np.linalg.cholesky(b)
        d_df_log_p_y_given_f = t - pi

        # p-specific code begins here:
        ret = pd.DataFrame(columns=['Mean-Transformed', 'Var-Transformed', 'Mean', 'Var'])  # , 'Trapz'
        for idx, p_series in p.iterrows():
            user_logger.debug(idx, 'x_star is', p_series['x (scaled)'])
            p = p_series.as_matrix()[np.newaxis, :]
            k_xp = self.kxp_gpu_wrapper(x, p, theta)
            # TODO Reference this code directly from history matching. We need to make a utility function there that
            # can be used in this process or others
            f_bar_star = np.dot(np.transpose(k_xp), d_df_log_p_y_given_f)  # MEAN (vector of length 1)

            v = np.linalg.solve(lh, np.dot(sqrt_w, k_xp))
            kpp = self.kxx_gpu_wrapper(p, theta)  # For latent distribution, don't add sigma2_n
            v = kpp - np.dot(np.transpose(v), v)  # VARIANCE (matrix of size 1x1)

            def logistic(f):
                return 1.0 / (1 + np.exp(-f))

            mu = f_bar_star[0]
            sigma2 = v[0, 0]

            # import time
            # tz = time.time()
            # Numerical integration ##### (Works, but need variance calculation)
            fstar = np.linspace(mu - 3 * np.sqrt(sigma2), mu + 3 * np.sqrt(sigma2),
                                100)  # <-- should choose num points (100) wisely
            mean_integrand = np.multiply(logistic(fstar),
                                         np.exp(-(fstar - mu) ** 2 / (2.0 * sigma2)) / np.sqrt(2.0 * np.pi * sigma2))
            mean_trapz = np.trapz(mean_integrand, x=fstar)  # Average prediction (better)
            var_integrand = np.multiply((logistic(fstar) - mean_trapz) ** 2,
                                        np.exp(-(fstar - mu) ** 2 / (2.0 * sigma2)) / np.sqrt(2.0 * np.pi * sigma2))
            var_trapz = np.trapz(var_integrand, x=fstar)  # Average prediction (better)
            # logger.info('TRAPZ', time.time()-tz)

            '''
            ### Monte Carlo
            mc = time.time()
            pts = np.random.multivariate_normal(f_bar_star, V, size=10000)
            yy = logistic(pts)
            mean = np.mean(yy)
            var = np.var(yy)
            logger.info('MC', time.time()-mc)
            ###
            '''

            # logger.info('MC: (%f, %f)  VS  TRAPZ: (%f, %f)' % (mean, var, mean_trapz, var_trapz))

            logi = logistic(mu)  # MAP prediction
            # End TODO for History matching refactor out

            user_logger.debug('MEAN:', mu)
            user_logger.debug('VAR:', sigma2)
            user_logger.debug('LOGIS:', logi)
            # user_logger.debug('MONTE CARLO:', 'mean=%f, var=%f'%(mean, var))
            user_logger.debug('TRAPZ:', 'mean=%f, var=%f' % (mean_trapz, var_trapz))

            ret = pd.concat([ret, pd.DataFrame(
                {'Mean-Transformed': [mu], 'Var-Transformed': [sigma2], 'Mean': [mean_trapz], 'Var': [var_trapz]})])
        ret.index = p.index.copy()

        return ret

[docs]    def ep_predict(self, theta, p):
        log_zep, nu, tau = self.expectation_propagation(theta)

        y = self.training_data[self.y_col].values
        user_logger.debug('y:', y)
        n = len(y)
        x = self.training_data[self.x_cols_scaled].values
        kxx = self.kxx_gpu_wrapper(x, theta)  # This is for f

        user_logger.debug('---[ PREDICT ]------------------------------------')

        user_logger.debug('Computing B ...')
        sqrt_stilde = np.diag(np.sqrt(tau))
        b = np.eye(n) + np.dot(sqrt_stilde, np.dot(kxx, sqrt_stilde))
        l_cholesky = np.linalg.cholesky(b)

        sqrt_stilde_k_nu = np.dot(sqrt_stilde, np.dot(kxx, nu))
        l_slash_sqrt_stilde_k_nu = np.linalg.solve(l_cholesky, sqrt_stilde_k_nu)
        lt_slash_l_slash_sqrt_stilde_k_nu = np.linalg.solve(np.transpose(l_cholesky), l_slash_sqrt_stilde_k_nu)
        z = np.dot(sqrt_stilde, lt_slash_l_slash_sqrt_stilde_k_nu)

        ret = pd.DataFrame(columns=['Mean-Transformed', 'Var-Transformed', 'Mean', 'Var'])
        for idx, p_series in p.iterrows():
            user_logger.debug(idx, 'x_star is', p_series['x (scaled)'])
            p = p_series.as_matrix()[np.newaxis, :]
            k_xp = self.kxp_gpu_wrapper(x, p, theta)
            f_bar_star = np.dot(np.transpose(k_xp), nu - z)  # MEAN (vector of length 1)

            v = np.linalg.solve(l_cholesky, np.dot(sqrt_stilde, k_xp))
            kpp = self.kxx_gpu_wrapper(p, theta)  # For latent distribution, don't add sigma2_n
            v = kpp - np.dot(np.transpose(v), v)  # VARIANCE (matrix of size 1x1)

            mu = f_bar_star[0]
            sigma2 = v[0, 0]

            mean = norm.cdf(mu / np.sqrt(1 + sigma2))

            fstar = np.linspace(mu - 3 * np.sqrt(sigma2), mu + 3 * np.sqrt(sigma2),
                                100)  # <-- should choose num points (100) wisely
            '''
            mean_integrand = np.multiply(norm.cdf(fstar), np.exp(-(fstar-mu)**2/(2.0*sigma2)) / np.sqrt(2.0*np.pi*sigma2) )
            mean_trapz = np.trapz(mean_integrand, x=fstar) # Average prediction (better)
            logger.info(mean, mean_trapz)
            '''

            var_integrand = np.multiply((norm.cdf(fstar) - mean) ** 2,
                                        np.exp(-(fstar - mu) ** 2 / (2.0 * sigma2)) / np.sqrt(2.0 * np.pi * sigma2))
            var_trapz = np.trapz(var_integrand, x=fstar)  # TODO: Closed form!

            ret = pd.concat([ret, pd.DataFrame(
                {'Mean-Transformed': [mu], 'Var-Transformed': [sigma2], 'Mean': [mean], 'Var': [var_trapz]})])
        ret.index = p.index.copy()

        return ret

[docs]    def optimize_hyperparameters(self, x0, bounds=(), k=-1, eps=1e-2, disp=True, maxiter=15000):
        # x0 like np.array([2, 0.10, 0.14641288665436947, 0.12166006573919039, 0.05, 0.05, 0.08055223671416605,
        # 7.026854485434267 ])
        # bounds like ((0.005,10),)+((0.01,10),) + tuple((5e-5,10) for i in range(self.D))
        # K=None is leave one out cross validation, otherwise make K groups
        idx = self.training_data.index.names  # Save index
        self.training_data.reset_index(inplace=True)

        if self.use_laplace_approximation:
            f_, fprime = GPC.func_wrapper(self.negative_log_marginal_likelihood_and_gradient)
        else:
            f_ = self.negative_log_marginal_likelihood
            fprime = None
        user_logger.debug(f"F Prime: {fprime}")

        '''
        # Truncated Newton Conjugate-Gradient
        ret = spo.minimize(
            fun = f_,
            x0 = x0,
            #args=(X,Y,P),
            jac = fprime,
            method='TNC',
            bounds = bounds, # Constrain values
            hess=None, hessp=None,
            constraints=(), tol=None, callback=None,
            options= {
                'maxiter': maxiter,
                'disp': disp,
                'eps': eps
            }
        )
        '''

        '''
        # BFGS
        ret = spo.minimize(
            fun = f_,
            #args=(X,Y,P),
            x0 = x0,
            method='L-BFGS-B',
            bounds = bounds, # Constrain values
            jac = fprime,
            hess=None, hessp=None,
            constraints=(), tol=None, callback=None,
            options= {
                'maxiter': maxiter,
                'disp': disp,
                'ftol': 1e-12,
                'gtol': 1e-12,
                'factr': 0.01,
                'eps': eps
            }
        )
        '''

        attempts = 0
        done = False
        ret = None
        while attempts < 3 and not done:
            # No jacobian
            ret = spo.minimize(
                fun=f_,  # self.negative_log_marginal_likelihood,
                # args=(X,Y,P),
                x0=x0,
                method='L-BFGS-B',
                bounds=bounds,  # Constrain values
                jac=None, hess=None, hessp=None,
                constraints={}, tol=None, callback=None,
                options={
                    'maxiter': maxiter,
                    'disp': disp,
                    'eps': eps  # eps: Step size used for numerical approximation of the jacobian (1e-3).
                }
            )

            user_logger.info('OPTIMIZATION RETURNED:\n', ret)
            done = ret['success'] is True
            if not done:
                user_logger.error('OPTIMIZATION FAILED, trying AGAIN!!!')
                x0 = 1.1 * x0
            attempts = attempts + 1

        self.theta = ret.x  # Length scales now on 0-1 range
        # self.theta = np.abs(ret.x) + 1e-6 # Length scales now on 0-1 range

        f_hat = self.find_posterior_mode(self.theta)['f_hat']
        np.savetxt('f_hat.csv', f_hat, delimiter=',')  # X is an array

        user_logger.info('OPTIMIZATION RETURNED:\n', ret)

        # Restore original index
        if idx[0] is not None:
            self.training_data.set_index(idx, inplace=True)

        return ret

[docs]    def evaluate(self, data):
        # Predict at test and training points, store mean and variance in self.data

        # Normalize data
        for xc in self.x_cols:
            # xc_new = xc + ' (scaled)'
            data[xc + ' (scaled)'] = \
                (data[xc] - self.param_info.loc[xc, 'Min']) / \
                (self.param_info.loc[xc, 'Max'] - self.param_info.loc[xc, 'Min'])

        # PREDICT:
        if self.use_laplace_approximation:
            f_hat = self.find_posterior_mode(self.theta)['f_hat']
            np.savetxt('f_hat.csv', f_hat, delimiter=',')  # X is an array

            ret = self.laplace_predict(self.theta, f_hat, data[self.x_cols_scaled])
        else:
            ret = self.ep_predict(self.theta, data[self.x_cols_scaled])

        # ret = data.merge(ret, left_index=True, right_index=True)

        return ret

[docs]    def plot_data(self, samples_to_circle=None):
        if samples_to_circle is None:
            samples_to_circle = []
        scaled = \
            5 + 45 * \
            (self.training_data[self.y_col] - self.training_data[self.y_col].min()) / \
            (self.training_data[self.y_col].max() - self.training_data[self.y_col].min())

        figs = {}

        for row in range(self.D):
            for col in range(self.D):
                if col > row:
                    # gs = gridspec.GridSpec(self.D-1, self.D-1)
                    # ax = fig.add_subplot(gs[col-1,row])
                    fn = '%s-%s.pdf' % (self.x_cols[row], self.x_cols[col])
                    figs[fn] = plt.figure(figsize=(6, 6))  # GPy.plotting.plotting_library().figure()

                    x = self.training_data[self.x_cols[row]]
                    y = self.training_data[self.x_cols[col]]

                    plt.scatter(x, y, s=scaled, c=self.training_data[self.y_col], cmap='jet', lw=0.1, alpha=0.5,
                                edgecolors='k')  # , s=area, c=colors, alpha=0.5)

                    # Circle some interesting samples
                    for s in samples_to_circle:
                        plt.scatter(self.training_data.loc[s][self.x_cols[row]],
                                    self.training_data.loc[s][self.x_cols[col]], s=10 + scaled.loc[s], alpha=1, lw=1.0,
                                    facecolors="None", edgecolors='k')  # , s=area, c=colors, alpha=0.5)

                    plt.autoscale(tight=True)
                    plt.xlabel(self.x_cols[row])
                    plt.ylabel(self.x_cols[col])
                    plt.tight_layout()

        return figs

[docs]    def plot_histogram(self):
        fig, ax = plt.subplots(nrows=1, ncols=1)  # , figsize=(5,5), sharex='col', sharey='row')
        sns.distplot(self.training_data[self.y_col], rug=True, ax=ax)

        return fig

[docs]    def plot(self, x_center, res=10):
        x_mu = np.repeat(np.array([x_center]), res * res, axis=0)

        fig = plt.figure(figsize=(4 * (self.D - 1), 4 * (self.D - 1)))
        fig_std_latent = plt.figure(figsize=(4 * (self.D - 1), 4 * (self.D - 1)))
        for row in range(self.D):
            for col in range(self.D):
                if col > row:
                    gs = gridspec.GridSpec(self.D - 1, self.D - 1)
                    ax = fig.add_subplot(gs[col - 1, row])  # , projection='3d'
                    ax_std_latent = fig_std_latent.add_subplot(gs[col - 1, row])  # , projection='3d'

                    fixed_inputs = [(x, mean) for (i, (x, mean)) in enumerate(zip(range(self.D), x_center)) if
                                    row is not i and col is not i]
                    user_logger.info(row, col, row * self.D + col, fixed_inputs)

                    # TODO: Real parameter ranges here, not just 0-1
                    (row_min, row_max) = (
                        self.training_data[self.x_cols[row]].min(), self.training_data[self.x_cols[row]].max())
                    (col_min, col_max) = (
                        self.training_data[self.x_cols[col]].min(), self.training_data[self.x_cols[col]].max())
                    # sim_cases_range = data.reset_index().groupby('Sample')['Sim_Cases'].agg(
                    #   {'Min':np.min, 'Max':np.max, 'Mean':np.mean}
                    # )
                    x1 = np.linspace(row_min, row_max, res)
                    x2 = np.linspace(col_min, col_max, res)
                    x1, x2 = np.meshgrid(x1, x2)

                    x = x_mu.copy()
                    x[:, row] = x1.flatten()
                    x[:, col] = x2.flatten()

                    xdf = pd.DataFrame(x, columns=self.x_cols)

                    self.debug = False
                    # user_logger.warning('WARNING: DEBUG!\n')
                    self.verbose = False

                    ret = self.evaluate(xdf)

                    y_mean = np.reshape(ret['Mean'], [res, res])
                    y_std_latent = np.reshape(np.sqrt(ret['Var_Latent']), [res, res])
                    # Y_std_predictive = np.reshape( np.sqrt(ret['Var_Predictive']), [res, res])

                    try:
                        cs = ax.contour(x1, x2, y_mean, zorder=100)
                        ax.clabel(cs, inline=1, fontsize=10, zorder=100)
                    except Exception as ex:
                        logger.exception(ex)
                        user_logger.info('Unable to plot mean contour')
                        pass

                    ax.scatter(self.training_data[self.x_cols[row]], self.training_data[self.x_cols[col]],
                               c=self.training_data[self.y_col], s=25)

                    try:
                        cs = ax_std_latent.contour(x1, x2, y_std_latent, zorder=100)
                        ax_std_latent.clabel(cs, inline=1, fontsize=10, zorder=100)
                    except Exception as ex:
                        logger.exception(ex)
                        user_logger.info('Unable to plot std contour')
                        pass

                    if col == self.D - 1:
                        ax.set_xlabel(self.x_cols[row])
                    if row == 0:
                        ax.set_ylabel(self.x_cols[col])
        # plt.tight_layout()
        return fig, fig_std_latent

[docs]    def plot_errors(self, train, test, mean_col, var_predictive_col, truth_col=None, figsize=(16, 10)):
        if train is not None:
            train['Z'] = (train[self.y_col] + 1) / 2
        if test is not None:
            test['Z'] = (test[self.y_col] + 1) / 2
        ycol = 'Z'

        if truth_col:
            if train is not None:
                train['ZTrue_Predictive'] = (train[truth_col] - train[mean_col]) / np.sqrt(train[var_predictive_col])
                train['Truth-Logit'] = np.log(train[truth_col] / (1 - train[truth_col]))
                train['ZTrue_Predictive_Logit'] = (train['Truth-Logit'] - train['Mean-Transformed']) / np.sqrt(
                    train['Var-Transformed'])

            if test is not None:
                test['ZTrue_Predictive'] = (test[truth_col] - test[mean_col]) / np.sqrt(test[var_predictive_col])
                test['Truth-Logit'] = np.log(test[truth_col] / (1 - test[truth_col]))
                test['ZTrue_Predictive_Logit'] = (test['Truth-Logit'] - test['Mean-Transformed']) / np.sqrt(
                    test['Var-Transformed'])

        fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, sharex='col',
                                       figsize=figsize)  # , sharex='col', sharey='row')

        ax = ax1
        if train is not None:
            train['Z_Predictive'] = (train[ycol] - train[mean_col]) / np.sqrt(train[var_predictive_col])
            ax.scatter(x=train['Sample'], y=train[ycol], c='c', marker='_', s=25, alpha=1, linewidths=1, zorder=50)
            ax.errorbar(x=train['Sample'], y=train[mean_col], yerr=2 * np.sqrt(train[var_predictive_col]), fmt='.',
                        ms=5, linewidth=1, c='k')
            if truth_col and truth_col in train:
                ax.plot(train['Sample'], train[truth_col], 'c.')

        if test is not None:
            test['Z_Predictive'] = (test[ycol] - test[mean_col]) / np.sqrt(test[var_predictive_col])
            ax.scatter(x=test['Sample'], y=test[ycol], c='m', marker='_', s=25, alpha=1, linewidths=1, zorder=50)
            ax.errorbar(x=test['Sample'], y=test[mean_col], yerr=2 * np.sqrt(test[var_predictive_col]), fmt='.', ms=5,
                        linewidth=1, c='k')
            if truth_col and truth_col in test:
                ax.plot(test['Sample'], test[truth_col], 'm.')

        ax.margins(x=0, y=0.05)
        ax.set_xlabel('Sample Index')
        ax.set_ylabel(ycol)

        a = 0.05
        ax = ax2
        if train is not None:
            ax.scatter(x=train['Sample'], y=train['Z_Predictive'], c='c', marker='_', alpha=0.5, linewidth=1)
            if truth_col and truth_col in train:
                ax.scatter(x=train['Sample'], y=train['ZTrue_Predictive'], c='c', marker='.', alpha=0.5, linewidth=1)

        if test is not None:
            ax.scatter(x=test['Sample'], y=test['Z_Predictive'], c='m', marker='_', alpha=0.5, linewidth=1)
            if truth_col and truth_col in test:
                ax.scatter(x=test['Sample'], y=test['ZTrue_Predictive'], c='m', marker='.', alpha=0.5, linewidth=1)

        ax.margins(x=0, y=0.05)
        xlim = ax.get_xlim()
        ylim = ax.get_ylim()
        ax.add_patch(patches.Rectangle((0, -2), xlim[1], 4, alpha=a, color='g'))
        ax.add_patch(patches.Rectangle((0, -5), xlim[1], 3, alpha=a, color='#FFA500'))
        ax.add_patch(patches.Rectangle((0, 2), xlim[1], 3, alpha=a, color='#FFA500'))
        ax.add_patch(patches.Rectangle((0, ylim[0]), xlim[1], abs(ylim[0]) - 5, alpha=a, color='r'))
        ax.add_patch(patches.Rectangle((0, 5), xlim[1], abs(ylim[1]) - 5, alpha=a, color='r'))
        ax.set_xlabel('Sample Index')
        ax.set_ylabel('Z-Score')

        plt.tight_layout()

        return fig