{"nbformat_minor": 0, "cells": [{"source": "# Graph random walks functions", "cell_type": "markdown", "metadata": {}}, {"source": "## Import packages", "cell_type": "markdown", "metadata": {}}, {"execution_count": 2, "cell_type": "code", "source": "import random\nimport igraph\nimport numpy as np\nimport scipy as sp\nimport math\nfrom sklearn import metrics\nfrom sklearn import svm\nfrom sklearn import manifold\nfrom sklearn.datasets import *\nfrom sklearn.neighbors import NearestNeighbors\nimport matplotlib as mpl\nimport matplotlib.pyplot as plt\nimport time\nimport seaborn as sns\n%matplotlib inline", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "c = %config InlineBackend.rc\nc['savefig.dpi'] = 144\n%config InlineBackend.figure_format='png'\n%config InlineBackend.rc = c\n%matplotlib inline\nsns.set_style(\"darkgrid\", {\"legend.frameon\": 1})", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"source": "## Plotting related", "cell_type": "markdown", "metadata": {}}, {"execution_count": 3, "cell_type": "code", "source": "def abline():\n    gca = plt.gca()\n    gca.set_autoscale_on(False)\n    gca.plot(gca.get_xlim(),gca.get_ylim())", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"source": "## Random walk related", "cell_type": "markdown", "metadata": {}}, {"execution_count": 4, "cell_type": "code", "source": "import scipy.weave as sw\nfrom scipy.weave import converters\na  = 1\n# test weave is working\nsw.inline('printf(\"%d\\\\n\",a);',['a'])", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 25, "cell_type": "code", "source": "def weight_hit_time(source, maxt, nrun, klist, kweight = None):\n    assert(type(source) == type(1))\n    assert(type(maxt) == type(1))\n    assert(type(nrun) == type(1))\n    # construct flattened list of neighborhood indices\n    klens=[0]+[len(item) for item in klist]\n    # kind[i] points to the start of neighborhood for vertex [i]\n    kind = np.cumsum(np.array(klens))\n    kind.astype(int)\n    # kwflat is the flattened weight matrix, used for weighted sampling.\n    if kweight is None:\n        kweight=[np.ones(len(sublist))/float(len(sublist)) for sublist in klist]\n    kwtemp = [np.cumsum(nbweight) for nbweight in kweight]\n    kwflat = np.array([val for sublist in kwtemp for val in sublist])\n    # kflat[kind[i] - kind[i+1]] are the neighbors of i.\n    kflat = np.array([val for sublist in klist for val in sublist])\n    kflat.astype(int)\n    \n    hittimes = np.zeros((len(klist),maxt))\n    hittemp = np.zeros(len(klist))\n        \n    code = \"\"\"\\\n    for(int i=0; i < nrun; i++){\n        for(int k=0; k < Nhittemp[0]; k++)\n            hittemp[k] = -1;\n        int j = source;\n        HITTIMES2(j,0)++;\n        int t = 1;\n        while(t < maxt){\n            int kl = kind[j+1]-kind[j];\n            // draw random sample from weighted neighborhood\n            float rv = drand48();\n            int jn = 0;\n            while(rv > kwflat[kind[j] + jn])\n                jn++;\n            // now maintain the importance weights and list of nodes\n            j = kflat[kind[j] + jn];\n            if(hittemp[j] < 0){\n                hittemp[j] = t;\n                HITTIMES2(j,t)++;\n            }\n            t++;\n        }\n    }\n    \"\"\"\n    sw.inline(code,['maxt','nrun','kflat','kwflat','kind','hittimes','source','hittemp'],verbose=1, headers=[\"<math.h>\"], compiler=\"gcc\")\n    return (hittimes)\n\nklist = [[0, 1],[0, 1]]\nkweight = [np.array([0.5,0.5]),np.array([0.5,0.5])]\nweight_hit_time(1, 10000, 1000, klist, kweight=kweight)\n", "outputs": [{"execution_count": 25, "output_type": "execute_result", "data": {"text/plain": "array([[    0.,   511.,   243., ...,     0.,     0.,     0.],\n       [ 1000.,   489.,   273., ...,     0.,     0.,     0.]])"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 11, "cell_type": "code", "source": "def fast_hit_time(jinit, nrun, maxt, klist):\n    assert(type(jinit) == type(1))\n    assert(type(nrun) == type(1))\n    assert(type(maxt) == type(1))\n    # construct flattened list of neighborhood indices\n    klens=[0]+[len(item) for item in klist]\n    # kind[i] points to the start of neighborhood for vertex [i]\n    kind = np.cumsum(np.array(klens))\n    kind.astype(int)\n    # kflat[kind[i] - kind[i+1]] are the neighbors of i.\n    kflat = np.array([val for sublist in klist for val in sublist])\n    kflat.astype(int)\n    j=jinit\n    t=maxt\n    \n    hittimes = np.zeros((len(klist), maxt))\n    hittemp = np.zeros(len(klist))\n    \n    code = \"\"\"\\\n    for(int i=0; i < nrun; i++){\n        int j = jinit;\n        for(int k=0; k < Nhittemp[0]; k++)\n            hittemp[k] = -1;\n        for(int t=0; t < maxt; t++){\n           int kl = kind[j+1]-kind[j];\n           int jn = rand() % kl;\n           j = kflat[kind[j] + jn];\n           if(hittemp[j] < 0){\n               hittemp[j] = t;\n               HITTIMES2(j,t)++;\n            }\n        }\n    }\n    \"\"\"\n    sw.inline(code,['jinit','maxt','nrun','kflat','kind','hittemp','hittimes'],verbose=1, headers=[\"<math.h>\"], compiler=\"gcc\")\n    return hittimes\nklist = [[0, 1],[0, 1]]\nfast_hit_time(0, 200, 10, klist)", "outputs": [{"execution_count": 11, "output_type": "execute_result", "data": {"text/plain": "array([[ 115.,   45.,   17.,   16.,    3.,    2.,    1.,    1.,    0.,\n           0.],\n       [  85.,   58.,   35.,   14.,    6.,    1.,    0.,    0.,    1.,\n           0.]])"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 9, "cell_type": "code", "source": "def fast_list_stationary(jinit, nrun, klist, kweight = None):\n    assert(type(jinit) == type(1))\n    assert(type(nrun) == type(1))\n    # construct flattened list of neighborhood indices\n    klens=[0]+[len(item) for item in klist]\n    # kind[i] points to the start of neighborhood for vertex [i]\n    kind = np.cumsum(np.array(klens))\n    kind.astype(int)\n    # kwflat is the flattened weight matrix, used for weighted sampling.\n    if kweight is None:\n        kweight=[np.ones(len(sublist))/float(len(sublist)) for sublist in klist]\n    kwtemp = [np.cumsum(nbweight) for nbweight in kweight]\n    kwflat = np.array([val for sublist in kwtemp for val in sublist])\n    # kflat[kind[i] - kind[i+1]] are the neighbors of i.\n    kflat = np.array([val for sublist in klist for val in sublist])\n    kflat.astype(int)\n    csamp = np.ones(len(klist))\n    j=jinit\n    code = \"\"\"\\\n    for(int i=0; i < nrun; i++){\n       int kl = kind[j+1]-kind[j];\n       float rv = drand48();\n       int jn = 0;\n       while(rv > kwflat[kind[j] + jn])\n           jn++;\n       j = kflat[kind[j] + jn];\n       csamp[j]++;\n    }\n    \"\"\"\n    sw.inline(code,['j','nrun','kflat','kind','kwflat','csamp'],verbose=1, headers=[\"<math.h>\"], compiler=\"gcc\")\n    return csamp/np.sum(csamp)\njinit = 0\nnrun = 100000\nklist = [[0, 1],[0, 1]]\nkweight = [np.array([0.7,0.3]),np.array([0.5,0.5])]\nfast_list_stationary(jinit, nrun, klist, kweight)", "outputs": [{"execution_count": 9, "output_type": "execute_result", "data": {"text/plain": "array([ 0.62639747,  0.37360253])"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 6, "cell_type": "code", "source": "def slow_hitting_time(adj, i, j, kmax, nbs=False):\n    if nbs:\n        jset = np.nonzero(adj[j,:])[0]\n    else:\n        jset = j\n    nnode = adj.shape[0]\n    vec = np.zeros(nnode)\n    vec[i]=1\n    prs = np.zeros(kmax+1)\n    adj2 = sp.sparse.csr_matrix(adj.T)\n    for k in xrange(kmax):\n        vec = adj2.dot(vec)\n        prs[k] = np.sum(vec[jset])\n        vec[jset] = 0\n    prs[kmax] = np.sum(vec)\n    return prs", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 4, "cell_type": "code", "source": "def get_stationary(n_trunc, n_run, adj):\n    pi = (np.zeros(n_trunc)+1.0)/float(n_trunc)\n    adj2 = sp.sparse.csr_matrix(adj.T)\n    for i in xrange(n_run):\n        pi = adj2.dot(pi)\n    pi = pi + 1.0/( n_run * n_trunc )\n    pi = pi / sum(pi)\n    return pi\n\ndef reweighted_walk(dest, epsest, x, nbrs):\n    jumpp=1/pow(epsest,2.0)\n    jumpp=jumpp/max(jumpp)\n    rwmat = np.zeros((len(dest), len(dest)))\n    knind = np.delete(nbrs.kneighbors(x)[1],0,1)\n    for i in xrange(knind.shape[0]):\n        inds=knind[i,]\n        djump=1.0/dest[inds]\n        rwmat[i,inds]=djump/sum(djump)*jumpp[i]\n        rwmat[i,i]=1.0-jumpp[i]\n    return rwmat", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"source": "Calculate $\\varepsilon$ via the relation $\\varepsilon(x)^d p(x) \\propto k$ which implies, $\\varepsilon(x) = k/p(x)^{1/d}$", "cell_type": "markdown", "metadata": {}}, {"execution_count": 7, "cell_type": "code", "source": "def fit_graph(adj, n_run = 1000, d = 1):\n    pi = get_stationary(adj.shape[0], n_run, adj)\n    degree = np.sum(adj > 0,1)\n    dest = pow(pi,d/(d+2.0))*pow(degree,2/(d+2.0))\n    dest = dest/sum(dest)\n    epsest = pow(pi,-1.0/(d+2.0))*pow(degree,1.0/(d+2.0))\n    return (pi, dest, epsest)\n    \ndef fit_knn(x, k):\n    nbrs = NearestNeighbors(n_neighbors=k).fit(x)\n    adj = get_graph(nbrs, x, k)\n    return (nbrs, adj)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 21, "cell_type": "code", "source": "def ltht(hittimes, beta, ofs=1):\n    nsamples=np.sum(hittimes,1)\n    runtime=hittimes.shape[1]\n    return -np.log(np.dot(hittimes,np.exp(-beta*(np.arange(runtime)+ofs)))/nsamples + np.exp(-beta*runtime) * (nsamples-np.sum(hittimes,1))/nsamples)", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"source": "## Graph related", "cell_type": "markdown", "metadata": {}}, {"execution_count": 32, "cell_type": "code", "source": "def adj_to_nblist(adj):\n    return [np.nonzero(adj[i,:])[0] for i in xrange(adj.shape[0])]", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 4, "cell_type": "code", "source": "def get_graph(nbrs, x, k):\n    return nbrs.kneighbors_graph(x).toarray()*1.0/k\n\ndef get_dist(adj, eps):\n    distmat=np.dot(np.diag(eps),adj)\n    np.fill_diagonal(distmat,0)\n    return distmat", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "The directed Laplacian is defined by Yanhua Li and Zhi-Li Zhang in 'Random Walks on Digraphs, The Generalized Digraph Laplacian, and\nThe Degree of Asymmetry'\n\nTheir definition is:\n$$\\tilde{L} = \\Pi^{1/2} (I-P) \\Pi^{-1/2}$$\nWhere $\\Pi^{1/2} = \\text{diag}(\\sqrt{\\pi})$.\n", "cell_type": "markdown", "metadata": {}}, {"execution_count": 5, "cell_type": "code", "source": "\ndef make_laplace(mat):\n    return np.diag(np.sum(mat,1)) - mat\n\ndef make_directed_laplace_rw(mat, pi):\n    lapone = np.diag(np.ones(mat.shape[0])) - mat\n    return np.dot(np.dot(np.diag(np.sqrt(pi)),lapone),np.diag(1.0/np.sqrt(pi)))\n\ndef make_directed_laplace(mat):\n    pi_tmp = get_stationary(mat.shape[0], 1000, mat)\n    return make_directed_laplace(mat, pi_tmp)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "The diffusion kernel is defined in Kondor et al for a undirected laplacian as:\n\n$$e^{\\beta L}$$\n\nThis should generalize very directly to the directed case via the SVD.", "cell_type": "markdown", "metadata": {}}, {"execution_count": null, "cell_type": "code", "source": "def make_diffkern(lap, beta):\n    u, d, v = np.linalg.svd(-1 * lap)\n    gram=np.dot(np.dot(u,np.diag(np.exp(d*beta))),v)\n    return gram", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "The hitting time between two points on a directed graph is defined by Li-Zhang as:\n\n$$H_{ij} = \\frac{\\tilde{L}^+_{jj}}{\\pi_j} - \\frac{\\tilde{L}^+_{ij}}{\\sqrt{\\pi_i\\pi_j}}$$\n\nThe commute time is obviously:\n$$C_{ij} = H_{ij} + H_{ji}$$", "cell_type": "markdown", "metadata": {}}, {"execution_count": null, "cell_type": "code", "source": "def get_htime(lap, pi):\n    lapinv = np.linalg.pinv(lap)\n    isqrt = np.sqrt(1.0/pi)\n    selfterm = np.tile(np.diag(lapinv)/pi, (lapinv.shape[0], 1))\n    crossterm = np.dot(np.dot(np.diag(isqrt),lapinv),np.diag(isqrt))\n    hittime = selfterm - crossterm\n    return hittime", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "def get_ctime(lap, pi):\n    lapinv = np.linalg.pinv(lap)\n    isqrt = np.sqrt(1.0/pi)\n    selfterm = np.tile(np.diag(lapinv)/pi, (lapinv.shape[0], 1))\n    crossterm = np.dot(np.dot(np.diag(isqrt),lapinv),np.diag(isqrt))\n    hittime = selfterm - crossterm\n    commtime = hittime + np.transpose(hittime)\n    commtime[commtime < 0] = 0\n    return commtime", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "def get_sps(adj, eps):\n    sps=sp.sparse.csgraph.shortest_path(get_dist(adj,eps))\n    sps = sps + np.transpose(sps)\n    return sps", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "## Plotting and data generation", "cell_type": "markdown", "metadata": {}}, {"execution_count": 6, "cell_type": "code", "source": "def get_histo(din, nbin, xvec):\n    minx = min(xvec)\n    maxx = max(xvec)\n    step = (maxx - minx)/nbin\n    nzs = np.zeros(nbin)\n    for i in xrange(nbin):\n        lb = i*step+minx\n        ub = (i+1)*step+minx\n        isel = np.squeeze((xvec > lb) & (xvec < ub))\n        nzs[i]= sum(din[isel])\n    return nzs\n", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 7, "cell_type": "code", "source": "def get_mix():\n    xc=random.uniform(0,0.75)\n    if xc > 0.5:\n        return xc*2-0.5\n    return xc\n\ndef dens_mix(xc):\n    xr = np.ones(len(np.squeeze(xc)))\n    xr[np.squeeze(xc) > 0.5] = 0.5\n    return xr", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "IPython (Python 2)", "name": "python2"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}, "signature": "sha256:ca07d83984a513e5b5d7539cebbd47ca8eec807afe54140d463f62b015a1e64e"}}