Source code for coniii.utils._correlations

"""Correlation and observable utilities.

Internal module split out of the original coniii.utils. Public names
are re-exported by :mod:`coniii.utils`.
"""
import warnings
import numpy as np
from numba import njit
from itertools import combinations
from scipy.special import logsumexp

from ._indexing import sub_to_ind, ind_to_sub, bin_states, unique_rows

[docs] def calc_overlap(sample,ignore_zeros=False): """<si_a si_b> between all pairs of replicas a and b Params: ------- sample ignore_zeros (bool=False) Instead of normalizing by the number of spins, normalize by the minimum number of nonzero spins. """ overlap = sample.dot(sample.T) if ignore_zeros: countZeros = np.zeros((len(sample),len(sample),2)) countZeros[:,:,0] = (sample==0).sum(1)[:,None] countZeros[:,:,1] = (sample==0).sum(1)[None,:] return overlap / (sample.shape[1]-countZeros.max(2)) return overlap / sample.shape[1]
[docs] def pair_corr(X, weights=None, concat=False, exclude_empty=False, subtract_mean=False, laplace_count=False): """Calculate averages and pairwise correlations of spins. Parameters ---------- X : ndarray Dimensions (n_samples,n_dim). weights : float or np.ndarray or twople, None If an array is passed, it must be the length of the data and each data point will be given the corresponding weight. Otherwise, the two element tuple should contain the normalization for each mean and each pairwise correlation, in that order. In other words, the first array should be length {s_i} and the second length {si*s_j}. concat : bool, False Return means concatenated with the pairwise correlations into one array. exclude_empty : bool, False When using with {-1,1} basis, you can leave entries with 0 and those will not be counted for any pair. If True, the weights option doesn't do anything. subtract_mean : bool, False If True, return pairwise correlations with product of individual means subtracted. laplace_count : Returns ------- twople (si,sisj) or np.concatenate((si,sisj)) """ assert frozenset(np.unique(X))<=frozenset([-1,0,1]) S, N = X.shape if exclude_empty and not laplace_count: # count all nonzero entries for every pair weights = 1/(X!=0).sum(0), 1./( (X!=0).astype(int).T.dot(X!=0)[np.triu_indices(X.shape[1],k=1)] ) elif exclude_empty and laplace_count: weights = ( 1/((X!=0).sum(0)+2), 1./( (X!=0).astype(int).T.dot(X!=0)[np.triu_indices(X.shape[1],k=1)] + 4 ) ) elif weights is None: # for taking simple average weights = np.ones(len(X))/len(X) elif type(weights) is tuple: assert len(weights[0])==X.shape[1] assert len(weights[1])==(X.shape[1]*(X.shape[1]-1)//2) elif type(weights) is np.ndarray: assert len(weights)==len(X) else: weights = np.zeros(len(X))+weights # Calculate pairwise correlations depending on whether or not exclude_empty was set or not. if type(weights) is tuple: si = X.sum(0) * weights[0] sisj = (X.T.dot(X))[np.triu_indices(X.shape[1],k=1)] * weights[1] else: si = (X*weights[:,None]).sum(0) sisj = (X.T.dot(X*weights[:,None]))[np.triu_indices(X.shape[1],k=1)] if subtract_mean: sisj = np.array([sisj[i]-si[ix[0]]*si[ix[1]] for i,ix in enumerate(combinations(list(range(N)),2))]) if concat: return np.concatenate((si,sisj)) return si, sisj
[docs] def k_corr(X, k, weights=None, exclude_empty=False): """Calculate kth order correlations of spins. Parameters ---------- X : ndarray Dimensions (n_samples, n_dim). k : int Order of correlation function <s_{i_1} * s_{i_2} * ... * s_{i_k}>. weights : np.ndarray, None : Calculate single and pairwise means given fractional weights for each state in the data such that a state only appears with some weight, typically less than one. exclude_empty : bool, False When using with {-1,1} basis, you can leave entries with 0 and those will not be counted for any pair. If True, the weights option doesn't do anything. Returns ------- ndarray Kth order correlations <s_{i_1} * s_{i_2} * ... * s_{i_k}>. """ assert frozenset(np.unique(X))<=frozenset([-1,0,1]) S, N = X.shape kcorr = np.zeros(int(binom(N,k))) if exclude_empty: for counter,ijk in enumerate(combinations(range(N), k)): p = np.prod(X[:,ijk], axis=1) kcorr[counter] = p[p!=0].mean() if weights is None: weights = np.ones(S)/S for counter,ijk in enumerate(combinations(range(N), k)): kcorr[counter] = np.prod(X[:,ijk], axis=1).dot(weights) return kcorr
[docs] def convert_corr(si, sisj, convert_to, concat=False): """Convert single spin means and pairwise correlations between {0,1} and {-1,1} formulations. Parameters ---------- si : ndarray Individual means. sisj : ndarray Pairwise correlations. convert_to : str '11' will convert {0,1} formulation to +/-1 and '01' will convert +/-1 formulation to {0,1} concat : bool, False If True, return concatenation of means and pairwise correlations. Returns ------- ndarray Averages <si>. Converted to appropriate basis. Returns concatenated vector <si> and <sisj> if concat is True. ndarray, optional Pairwise correlations <si*sj>. Converted to appropriate basis. """ if convert_to=='11': newsisj = np.zeros(sisj.shape) k = 0 for i in range(len(si)-1): for j in range(i+1,len(si)): newsisj[k] = 4*sisj[k] - 2*si[i] - 2*si[j] + 1 k += 1 newsi = si*2-1 elif convert_to=='01': newsisj = np.zeros(sisj.shape) k = 0 for i in range(len(si)-1): for j in range(i+1,len(si)): newsisj[k] = ( sisj[k] + si[i] + si[j] + 1 )/4. k += 1 newsi = (si+1)/2 else: raise Exception("Invalid value for convert_to. Must be either '01' or '11'.") if concat: return np.concatenate((newsi,newsisj)) return newsi, newsisj
[docs] def state_probs(v, allstates=None, weights=None, normalized=True): """Get probability of unique states. There is an option to allow for weighted counting. Parameters ---------- states : ndarray Sample of states on which to extract probabilities of unique configurations with dimensions (n_samples,n_dimension). allstates : ndarray, None Unique configurations to look for with dimensions (n_samples, n_dimension). weights : vector, None For weighted counting of each state given in allstate kwarg. normalized : bool, True If True, return probability distribution instead of frequency count. Returns ------- ndarray Vector of the probabilities of each state. ndarray All unique states found in the data. Each state is a row. Only returned if allstates kwarg is not provided. """ if v.ndim==1: v = v[:,None] n = v.shape[1] j = 0 return_all_states = False # switch to keep track of whether or not allstates were given if allstates is None: allstates = v[unique_rows(v)] uniqIx = unique_rows(v, return_inverse=True) freq = np.bincount( uniqIx ) return_all_states = True else: if weights is None: weights = np.ones((v.shape[0])) freq = np.zeros(allstates.shape[0]) for vote in allstates: ix = ( vote==v ).sum(1)==n freq[j] = (ix*weights).sum() j+=1 if np.isclose(np.sum(freq),np.sum(weights))==0: import warnings warnings.warn("States not found in given list of all states.") if normalized: freq = freq.astype(float)/np.sum(freq) if return_all_states: return freq, allstates return freq
[docs] def calc_de(s, i): """Calculate the derivative of the energy wrt parameters given the state and index of the parameter. In this case, the parameters are the concatenated vector of {h_i,J_ij}. Parameters ---------- s : ndarray Two-dimensional vector of spins where each row is a state. i : int Returns ------- dE : float Derivative of hamiltonian with respect to ith parameter, i.e. the corresponding observable. """ assert s.ndim==2 if i<s.shape[1]: return -s[:,i] else: i -= s.shape[1] i, j = ind_to_sub(s.shape[1], i) return -s[:,i] * s[:,j]