Source code for coniii.utils._correlations

"""Correlation and observable utilities.

Internal module split out of the original coniii.utils. Public names
are re-exported by :mod:`coniii.utils`.
"""
import warnings
import numpy as np
from numba import njit
from itertools import combinations
from scipy.special import logsumexp

from ._indexing import sub_to_ind, ind_to_sub, bin_states, unique_rows


[docs]
def calc_overlap(sample,ignore_zeros=False):
    """<si_a si_b> between all pairs of replicas a and b

    Params:
    -------
    sample
    ignore_zeros (bool=False)
        Instead of normalizing by the number of spins, normalize by the minimum number of
        nonzero spins.
    """
    overlap = sample.dot(sample.T)
    if ignore_zeros:
        countZeros = np.zeros((len(sample),len(sample),2))
        countZeros[:,:,0] = (sample==0).sum(1)[:,None]
        countZeros[:,:,1] = (sample==0).sum(1)[None,:]
        return overlap / (sample.shape[1]-countZeros.max(2))
    return overlap / sample.shape[1]




[docs]
def pair_corr(X,
              weights=None,
              concat=False,
              exclude_empty=False,
              subtract_mean=False,
              laplace_count=False):
    """Calculate averages and pairwise correlations of spins.

    Parameters
    ----------
    X : ndarray
        Dimensions (n_samples,n_dim).
    weights : float or np.ndarray or twople, None
        If an array is passed, it must be the length of the data and each data point will
        be given the corresponding weight. Otherwise, the two element tuple should contain
        the normalization for each mean and each pairwise correlation, in that order. In
        other words, the first array should be length {s_i} and the second length
        {si*s_j}.
    concat : bool, False
        Return means concatenated with the pairwise correlations into one array.
    exclude_empty : bool, False
        When using with {-1,1} basis, you can leave entries with 0 and those will not be
        counted for any pair. If True, the weights option doesn't do anything.
    subtract_mean : bool, False
        If True, return pairwise correlations with product of individual means subtracted.
    laplace_count : 


    Returns
    -------
    twople
        (si,sisj) or np.concatenate((si,sisj))
    """

    assert frozenset(np.unique(X))<=frozenset([-1,0,1])
    S, N = X.shape
    
    if exclude_empty and not laplace_count:
        # count all nonzero entries for every pair
        weights = 1/(X!=0).sum(0), 1./( (X!=0).astype(int).T.dot(X!=0)[np.triu_indices(X.shape[1],k=1)] )
    elif exclude_empty and laplace_count:
        weights = ( 1/((X!=0).sum(0)+2),
                    1./( (X!=0).astype(int).T.dot(X!=0)[np.triu_indices(X.shape[1],k=1)] + 4 ) )
    elif weights is None:
        # for taking simple average
        weights = np.ones(len(X))/len(X)
    elif type(weights) is tuple:
        assert len(weights[0])==X.shape[1]
        assert len(weights[1])==(X.shape[1]*(X.shape[1]-1)//2)
    elif type(weights) is np.ndarray:
        assert len(weights)==len(X)
    else:
        weights = np.zeros(len(X))+weights
    
    # Calculate pairwise correlations depending on whether or not exclude_empty was set or not.
    if type(weights) is tuple:
        si = X.sum(0) * weights[0]
        sisj = (X.T.dot(X))[np.triu_indices(X.shape[1],k=1)] * weights[1]
    else:
        si = (X*weights[:,None]).sum(0)
        sisj = (X.T.dot(X*weights[:,None]))[np.triu_indices(X.shape[1],k=1)]

    if subtract_mean:
        sisj = np.array([sisj[i]-si[ix[0]]*si[ix[1]] for i,ix in enumerate(combinations(list(range(N)),2))])
    
    if concat:
        return np.concatenate((si,sisj))
    return si, sisj




[docs]
def k_corr(X, k,
           weights=None,
           exclude_empty=False):
    """Calculate kth order correlations of spins.

    Parameters
    ----------
    X : ndarray
        Dimensions (n_samples, n_dim).
    k : int
        Order of correlation function <s_{i_1} * s_{i_2} * ... * s_{i_k}>.
    weights : np.ndarray, None : 
        Calculate single and pairwise means given fractional weights for each state in
        the data such that a state only appears with some weight, typically less than
        one.
    exclude_empty : bool, False
        When using with {-1,1} basis, you can leave entries with 0 and those will not be
        counted for any pair. If True, the weights option doesn't do anything.

    Returns
    -------
    ndarray
        Kth order correlations <s_{i_1} * s_{i_2} * ... * s_{i_k}>.
    """
    
    assert frozenset(np.unique(X))<=frozenset([-1,0,1])
    S, N = X.shape
    kcorr = np.zeros(int(binom(N,k)))
    
    if exclude_empty:
        for counter,ijk in enumerate(combinations(range(N), k)):
            p = np.prod(X[:,ijk], axis=1)
            kcorr[counter] = p[p!=0].mean()
    
    if weights is None:
        weights = np.ones(S)/S
    for counter,ijk in enumerate(combinations(range(N), k)):
        kcorr[counter] = np.prod(X[:,ijk], axis=1).dot(weights)
    return kcorr




[docs]
def convert_corr(si, sisj, convert_to, concat=False):
    """Convert single spin means and pairwise correlations between {0,1} and {-1,1}
    formulations.

    Parameters
    ----------
    si : ndarray
        Individual means.
    sisj : ndarray
        Pairwise correlations.
    convert_to : str
        '11' will convert {0,1} formulation to +/-1 and '01' will convert +/-1 formulation
        to {0,1}
    concat : bool, False
        If True, return concatenation of means and pairwise correlations.

    Returns
    -------
    ndarray
        Averages <si>. Converted to appropriate basis. Returns concatenated vector <si>
        and <sisj> if concat is True.
    ndarray, optional
        Pairwise correlations <si*sj>. Converted to appropriate basis.
    """

    if convert_to=='11':
        newsisj = np.zeros(sisj.shape)
        k = 0
        for i in range(len(si)-1):
            for j in range(i+1,len(si)):
                newsisj[k] = 4*sisj[k] - 2*si[i] - 2*si[j] + 1
                k += 1
        newsi = si*2-1
    elif convert_to=='01':
        newsisj = np.zeros(sisj.shape)
        k = 0
        for i in range(len(si)-1):
            for j in range(i+1,len(si)):
                newsisj[k] = ( sisj[k] + si[i] + si[j] + 1 )/4.
                k += 1
        newsi = (si+1)/2
    else:
        raise Exception("Invalid value for convert_to. Must be either '01' or '11'.")

    if concat:
        return np.concatenate((newsi,newsisj))
    return newsi, newsisj




[docs]
def state_probs(v, allstates=None, weights=None, normalized=True):
    """Get probability of unique states. There is an option to allow for weighted
    counting.
    
    Parameters
    ----------
    states : ndarray
        Sample of states on which to extract probabilities of unique configurations with
        dimensions (n_samples,n_dimension).
    allstates : ndarray, None
        Unique configurations to look for with dimensions (n_samples, n_dimension).
    weights : vector, None
        For weighted counting of each state given in allstate kwarg.
    normalized : bool, True
        If True, return probability distribution instead of frequency count.
    
    Returns
    -------
    ndarray
        Vector of the probabilities of each state.
    ndarray
        All unique states found in the data. Each state is a row. Only returned if
        allstates kwarg is not provided.
    """

    if v.ndim==1:
        v = v[:,None]
    n = v.shape[1]
    j = 0
    return_all_states = False  # switch to keep track of whether or not allstates were given

    if allstates is None:
        allstates = v[unique_rows(v)]
        uniqIx = unique_rows(v, return_inverse=True)
        freq = np.bincount( uniqIx )
        return_all_states = True
    else:
        if weights is None:
            weights = np.ones((v.shape[0]))
        
        freq = np.zeros(allstates.shape[0])
        for vote in allstates:
            ix = ( vote==v ).sum(1)==n
            freq[j] = (ix*weights).sum()
            j+=1
        if np.isclose(np.sum(freq),np.sum(weights))==0:
            import warnings
            warnings.warn("States not found in given list of all states.")
    if normalized:
        freq = freq.astype(float)/np.sum(freq)

    if return_all_states:
        return freq, allstates
    return freq




[docs]
def calc_de(s, i):
    """Calculate the derivative of the energy wrt parameters given the state and index of
    the parameter. In this case, the parameters are the concatenated vector of {h_i,J_ij}.

    Parameters
    ----------
    s : ndarray
         Two-dimensional vector of spins where each row is a state.
    i : int

    Returns
    -------
    dE : float
        Derivative of hamiltonian with respect to ith parameter, i.e. the corresponding
        observable.
    """
    
    assert s.ndim==2
    if i<s.shape[1]:
        return -s[:,i]
    else:
        i -= s.shape[1]
        i, j = ind_to_sub(s.shape[1], i)
        return -s[:,i] * s[:,j]