import random
import igraph
import numpy as np
import scipy as sp
import math
from sklearn import metrics
from sklearn import svm
from sklearn import manifold
from sklearn.datasets import *
from sklearn.neighbors import NearestNeighbors
import matplotlib as mpl
import matplotlib.pyplot as plt
import time
import seaborn as sns
%matplotlib inline
c = %config InlineBackend.rc
c['savefig.dpi'] = 144
%config InlineBackend.figure_format='png'
%config InlineBackend.rc = c
%matplotlib inline
sns.set_style("darkgrid", {"legend.frameon": 1})
def abline():
gca = plt.gca()
gca.set_autoscale_on(False)
gca.plot(gca.get_xlim(),gca.get_ylim())
import scipy.weave as sw
from scipy.weave import converters
a = 1
# test weave is working
sw.inline('printf("%d\\n",a);',['a'])
def weight_hit_time(source, maxt, nrun, klist, kweight = None):
assert(type(source) == type(1))
assert(type(maxt) == type(1))
assert(type(nrun) == type(1))
# construct flattened list of neighborhood indices
klens=[0]+[len(item) for item in klist]
# kind[i] points to the start of neighborhood for vertex [i]
kind = np.cumsum(np.array(klens))
kind.astype(int)
# kwflat is the flattened weight matrix, used for weighted sampling.
if kweight is None:
kweight=[np.ones(len(sublist))/float(len(sublist)) for sublist in klist]
kwtemp = [np.cumsum(nbweight) for nbweight in kweight]
kwflat = np.array([val for sublist in kwtemp for val in sublist])
# kflat[kind[i] - kind[i+1]] are the neighbors of i.
kflat = np.array([val for sublist in klist for val in sublist])
kflat.astype(int)
hittimes = np.zeros((len(klist),maxt))
hittemp = np.zeros(len(klist))
code = """\
for(int i=0; i < nrun; i++){
for(int k=0; k < Nhittemp[0]; k++)
hittemp[k] = -1;
int j = source;
HITTIMES2(j,0)++;
int t = 1;
while(t < maxt){
int kl = kind[j+1]-kind[j];
// draw random sample from weighted neighborhood
float rv = drand48();
int jn = 0;
while(rv > kwflat[kind[j] + jn])
jn++;
// now maintain the importance weights and list of nodes
j = kflat[kind[j] + jn];
if(hittemp[j] < 0){
hittemp[j] = t;
HITTIMES2(j,t)++;
}
t++;
}
}
"""
sw.inline(code,['maxt','nrun','kflat','kwflat','kind','hittimes','source','hittemp'],verbose=1, headers=["<math.h>"], compiler="gcc")
return (hittimes)
klist = [[0, 1],[0, 1]]
kweight = [np.array([0.5,0.5]),np.array([0.5,0.5])]
weight_hit_time(1, 10000, 1000, klist, kweight=kweight)
def fast_hit_time(jinit, nrun, maxt, klist):
assert(type(jinit) == type(1))
assert(type(nrun) == type(1))
assert(type(maxt) == type(1))
# construct flattened list of neighborhood indices
klens=[0]+[len(item) for item in klist]
# kind[i] points to the start of neighborhood for vertex [i]
kind = np.cumsum(np.array(klens))
kind.astype(int)
# kflat[kind[i] - kind[i+1]] are the neighbors of i.
kflat = np.array([val for sublist in klist for val in sublist])
kflat.astype(int)
j=jinit
t=maxt
hittimes = np.zeros((len(klist), maxt))
hittemp = np.zeros(len(klist))
code = """\
for(int i=0; i < nrun; i++){
int j = jinit;
for(int k=0; k < Nhittemp[0]; k++)
hittemp[k] = -1;
for(int t=0; t < maxt; t++){
int kl = kind[j+1]-kind[j];
int jn = rand() % kl;
j = kflat[kind[j] + jn];
if(hittemp[j] < 0){
hittemp[j] = t;
HITTIMES2(j,t)++;
}
}
}
"""
sw.inline(code,['jinit','maxt','nrun','kflat','kind','hittemp','hittimes'],verbose=1, headers=["<math.h>"], compiler="gcc")
return hittimes
klist = [[0, 1],[0, 1]]
fast_hit_time(0, 200, 10, klist)
def fast_list_stationary(jinit, nrun, klist, kweight = None):
assert(type(jinit) == type(1))
assert(type(nrun) == type(1))
# construct flattened list of neighborhood indices
klens=[0]+[len(item) for item in klist]
# kind[i] points to the start of neighborhood for vertex [i]
kind = np.cumsum(np.array(klens))
kind.astype(int)
# kwflat is the flattened weight matrix, used for weighted sampling.
if kweight is None:
kweight=[np.ones(len(sublist))/float(len(sublist)) for sublist in klist]
kwtemp = [np.cumsum(nbweight) for nbweight in kweight]
kwflat = np.array([val for sublist in kwtemp for val in sublist])
# kflat[kind[i] - kind[i+1]] are the neighbors of i.
kflat = np.array([val for sublist in klist for val in sublist])
kflat.astype(int)
csamp = np.ones(len(klist))
j=jinit
code = """\
for(int i=0; i < nrun; i++){
int kl = kind[j+1]-kind[j];
float rv = drand48();
int jn = 0;
while(rv > kwflat[kind[j] + jn])
jn++;
j = kflat[kind[j] + jn];
csamp[j]++;
}
"""
sw.inline(code,['j','nrun','kflat','kind','kwflat','csamp'],verbose=1, headers=["<math.h>"], compiler="gcc")
return csamp/np.sum(csamp)
jinit = 0
nrun = 100000
klist = [[0, 1],[0, 1]]
kweight = [np.array([0.7,0.3]),np.array([0.5,0.5])]
fast_list_stationary(jinit, nrun, klist, kweight)
def slow_hitting_time(adj, i, j, kmax, nbs=False):
if nbs:
jset = np.nonzero(adj[j,:])[0]
else:
jset = j
nnode = adj.shape[0]
vec = np.zeros(nnode)
vec[i]=1
prs = np.zeros(kmax+1)
adj2 = sp.sparse.csr_matrix(adj.T)
for k in xrange(kmax):
vec = adj2.dot(vec)
prs[k] = np.sum(vec[jset])
vec[jset] = 0
prs[kmax] = np.sum(vec)
return prs
def get_stationary(n_trunc, n_run, adj):
pi = (np.zeros(n_trunc)+1.0)/float(n_trunc)
adj2 = sp.sparse.csr_matrix(adj.T)
for i in xrange(n_run):
pi = adj2.dot(pi)
pi = pi + 1.0/( n_run * n_trunc )
pi = pi / sum(pi)
return pi
def reweighted_walk(dest, epsest, x, nbrs):
jumpp=1/pow(epsest,2.0)
jumpp=jumpp/max(jumpp)
rwmat = np.zeros((len(dest), len(dest)))
knind = np.delete(nbrs.kneighbors(x)[1],0,1)
for i in xrange(knind.shape[0]):
inds=knind[i,]
djump=1.0/dest[inds]
rwmat[i,inds]=djump/sum(djump)*jumpp[i]
rwmat[i,i]=1.0-jumpp[i]
return rwmat
Calculate $\varepsilon$ via the relation $\varepsilon(x)^d p(x) \propto k$ which implies, $\varepsilon(x) = k/p(x)^{1/d}$
def fit_graph(adj, n_run = 1000, d = 1):
pi = get_stationary(adj.shape[0], n_run, adj)
degree = np.sum(adj > 0,1)
dest = pow(pi,d/(d+2.0))*pow(degree,2/(d+2.0))
dest = dest/sum(dest)
epsest = pow(pi,-1.0/(d+2.0))*pow(degree,1.0/(d+2.0))
return (pi, dest, epsest)
def fit_knn(x, k):
nbrs = NearestNeighbors(n_neighbors=k).fit(x)
adj = get_graph(nbrs, x, k)
return (nbrs, adj)
def ltht(hittimes, beta, ofs=1):
nsamples=np.sum(hittimes,1)
runtime=hittimes.shape[1]
return -np.log(np.dot(hittimes,np.exp(-beta*(np.arange(runtime)+ofs)))/nsamples + np.exp(-beta*runtime) * (nsamples-np.sum(hittimes,1))/nsamples)
def adj_to_nblist(adj):
return [np.nonzero(adj[i,:])[0] for i in xrange(adj.shape[0])]
def get_graph(nbrs, x, k):
return nbrs.kneighbors_graph(x).toarray()*1.0/k
def get_dist(adj, eps):
distmat=np.dot(np.diag(eps),adj)
np.fill_diagonal(distmat,0)
return distmat
The directed Laplacian is defined by Yanhua Li and Zhi-Li Zhang in 'Random Walks on Digraphs, The Generalized Digraph Laplacian, and The Degree of Asymmetry'
Their definition is: $$\tilde{L} = \Pi^{1/2} (I-P) \Pi^{-1/2}$$ Where $\Pi^{1/2} = \text{diag}(\sqrt{\pi})$.
def make_laplace(mat):
return np.diag(np.sum(mat,1)) - mat
def make_directed_laplace_rw(mat, pi):
lapone = np.diag(np.ones(mat.shape[0])) - mat
return np.dot(np.dot(np.diag(np.sqrt(pi)),lapone),np.diag(1.0/np.sqrt(pi)))
def make_directed_laplace(mat):
pi_tmp = get_stationary(mat.shape[0], 1000, mat)
return make_directed_laplace(mat, pi_tmp)
The diffusion kernel is defined in Kondor et al for a undirected laplacian as:
$$e^{\beta L}$$This should generalize very directly to the directed case via the SVD.
def make_diffkern(lap, beta):
u, d, v = np.linalg.svd(-1 * lap)
gram=np.dot(np.dot(u,np.diag(np.exp(d*beta))),v)
return gram
The hitting time between two points on a directed graph is defined by Li-Zhang as:
$$H_{ij} = \frac{\tilde{L}^+_{jj}}{\pi_j} - \frac{\tilde{L}^+_{ij}}{\sqrt{\pi_i\pi_j}}$$The commute time is obviously: $$C_{ij} = H_{ij} + H_{ji}$$
def get_htime(lap, pi):
lapinv = np.linalg.pinv(lap)
isqrt = np.sqrt(1.0/pi)
selfterm = np.tile(np.diag(lapinv)/pi, (lapinv.shape[0], 1))
crossterm = np.dot(np.dot(np.diag(isqrt),lapinv),np.diag(isqrt))
hittime = selfterm - crossterm
return hittime
def get_ctime(lap, pi):
lapinv = np.linalg.pinv(lap)
isqrt = np.sqrt(1.0/pi)
selfterm = np.tile(np.diag(lapinv)/pi, (lapinv.shape[0], 1))
crossterm = np.dot(np.dot(np.diag(isqrt),lapinv),np.diag(isqrt))
hittime = selfterm - crossterm
commtime = hittime + np.transpose(hittime)
commtime[commtime < 0] = 0
return commtime
def get_sps(adj, eps):
sps=sp.sparse.csgraph.shortest_path(get_dist(adj,eps))
sps = sps + np.transpose(sps)
return sps
def get_histo(din, nbin, xvec):
minx = min(xvec)
maxx = max(xvec)
step = (maxx - minx)/nbin
nzs = np.zeros(nbin)
for i in xrange(nbin):
lb = i*step+minx
ub = (i+1)*step+minx
isel = np.squeeze((xvec > lb) & (xvec < ub))
nzs[i]= sum(din[isel])
return nzs
def get_mix():
xc=random.uniform(0,0.75)
if xc > 0.5:
return xc*2-0.5
return xc
def dens_mix(xc):
xr = np.ones(len(np.squeeze(xc)))
xr[np.squeeze(xc) > 0.5] = 0.5
return xr