Source code for modbp.ModularityBP

from __future__ import absolute_import
import numpy as np
import igraph as ig
from future.utils import iteritems,iterkeys
from collections import Hashable
from .GenerateGraphs import MultilayerGraph
import sklearn.metrics as skm
from .bp import BP_Modularity,PairVector,IntArray,IntMatrix,DoubleArray
import itertools
import pandas as pd
import scipy.optimize as sciopt
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sbn
from time import time
import warnings
import os,pickle,gzip
#import #logging
#logging.basicConfig(format=':%(asctime)s:%(levelname)s:%(message)s', level=#logging.DEBUG)
##logging.basicConfig(format=':%(asctime)s:%(levelname)s:%(message)s', level=#logging.INFO)

class ModularityBP():
    """
    This is python interface class for the mulitlayer modularity BP.
    """

    def __init__(self, mlgraph=None, interlayer_edgelist=None,
                 intralayer_edgelist=None, layer_vec=None,
                 accuracy_off=True, use_effective=False, comm_vec=None,
                 align_communities_across_layers=True,min_com_size=5):

        assert not (mlgraph is None) or not ( intralayer_edgelist is None and layer_vec is None)


        if mlgraph is not None:
            # this is just a single layer igraph. We create a mlgraph with empty interlayer edges
            if hasattr(mlgraph, 'get_edgelist'):
                self.graph = MultilayerGraph (intralayer_edges=np.array(mlgraph.get_edgelist()),
                                              interlayer_edges=np.zeros((0,2),dtype='int'),
                                              layer_vec=[0 for _ in range(mlgraph.vcount())])

            else:
                self.graph=mlgraph

        else:
            if interlayer_edgelist is None:
                interlayer_edgelist=np.zeros((0,2),dtype='int')
            self.graph = MultilayerGraph(intralayer_edges=intralayer_edgelist,
                                         interlayer_edges=interlayer_edgelist,layer_vec=layer_vec)

        if not comm_vec is None:
            self.graph.comm_vec = comm_vec
        self.n=self.graph.N
        self.nlayers=self.graph.nlayers
        self.totaledgeweight=self.graph.totaledgeweight
        self.intralayer_edges=self.graph.intralayer_edges
        self.interlayer_edges=self.graph.interlayer_edges
        self._cpp_intra_weights=self._get_cpp_intra_weights()
        self.layer_vec=np.array(self.graph.layer_vec)
        self._layer_vec_ia=IntArray([int(i) for i in self.layer_vec])#must be integers!
        self.layers_unique=sorted(np.unique(self.layer_vec))
        self._accuracy_off=accuracy_off #calculating permuated accuracy can be expensive for large q
        self._align_communities_across_layers=align_communities_across_layers
        self.marginals={}
        self.partitions={} # max of marginals
        self.niters={}
        self.marginal_index_to_close_marginals={} #maps each marginal index to common marginals (ie marginals that have the same information across nodes
        self.marginal_to_comm_number={} # the marginal index to the effective community number
        self.group_distances={}
        self.use_effective=use_effective
        self.nruns=0 #how many times has the BP algorithm been run.  Also serves as index for outputs
        self._permutation_vectors={} # This is used to keep track of which vertices are switched after as sweep
        #make single index
        # rm_index=pd.MultiIndex(labels=[[],[],[],[]],levels=[[],[],[],[]],names=['q','beta','resgamma','omega'])
        self.retrieval_modularities=pd.DataFrame(columns=['q','beta','resgamma','omega',
                                                         'retrieval_modularity','niters'],dtype=float)


        self._intraedgelistpv= self._get_edgelistpv()
        self._interedgelistpv= self._get_edgelistpv(inter=True)
        self.min_community_size = min_com_size  #for calculating true number of communities min number of node assigned to count.
        self._bpmod=None

[docs] def run_modbp(self,beta,q,niter=100,resgamma=1.0,omega=1.0,reset=False): """ :param beta: The inverse tempature parameter at which to run the modularity belief propagation algorithm. Must be specified each time BP is run. :param q: The number of mariginals used for the run of modbp. Note that if self.use_effective is true, The final number of reported communities could be lower. :param niter: Maximum number of iterations allowed. If self._align_communities_across_layers is true, the actual number of runs could be higher than this upper bound though at most 2*niter :param resgamma: The resolution parameter at which to run modbp. Default is resgamma=1.0 :param omega: The coupling strength used in running multimodbp. This represent how strongly the algorithm tries to assign nodes connected by an interlayer connection to the same community. :param reset: If true, the marginals will be rerandomized when this method is called. Otherwise the state will be maintained from previous runs if existing (assuming q hasn't changed). :return: None """ if beta==0: warnings.warn("beta cannot be zero. Using 10-16") beta=np.power(10.0,-16) assert(q>0),"q must be > 0" q_orig=q #before collapsing self.retrieval_modularities.loc[self.nruns, 'q'] = q_orig self.retrieval_modularities.loc[self.nruns, 'beta'] = beta self.retrieval_modularities.loc[self.nruns, 'omega'] = omega self.retrieval_modularities.loc[self.nruns, 'resgamma'] = resgamma t=time() #logging.debug("Creating c++ modbp object") if self._bpmod is None: #print("Creating c++ modbp object") #print(list(self._cpp_intra_weights)) self._bpmod=BP_Modularity(layer_membership=self._layer_vec_ia, intra_edgelist=self._intraedgelistpv,intra_edgeweight=self._cpp_intra_weights, inter_edgelist=self._interedgelistpv, _n=self.n, _nt= self.nlayers , q=q, beta=beta, resgamma=resgamma,omega=omega,transform=False,verbose=False) else: if self._bpmod.getBeta() != beta or reset: self._bpmod.setBeta(beta) if self._bpmod.getq() != q: self._bpmod.setq(q) if self._bpmod.getResgamma() != resgamma: self._bpmod.setResgamma(resgamma) if self._bpmod.getOmega() != omega: self._bpmod.setOmega(omega) if self._align_communities_across_layers: iters_per_run=niter//2 #somewhat arbitrary divisor else: iters_per_run=niter # run as many times as possible on first run. #logging.debug('time: {:.4f}'.format(time()-t)) t=time() #logging.debug('Running modbp at beta={:.3f}'.format(beta)) converged=False iters=self._bpmod.run(iters_per_run) cmargs=np.array(self._bpmod.return_marginals()) #logging.debug('time: {:.4f}, {:d} iterations '.format(time() - t, iters)) t=time() if iters<iters_per_run: converged=True self.marginals[self.nruns]=cmargs #Calculate effective group size and get partitions #logging.debug('Combining marginals') self._get_community_distances(self.nruns) #sets values in method cpartition = self._get_partition(self.nruns, self.use_effective) self.partitions[self.nruns] = cpartition if self.use_effective: q_new = self._merge_communities_bp(self.nruns) q = q_new #logging.debug('time: {:.4f}'.format(time()-t)) t=time() # if self._align_communities_across_layers and iters<niter: # #logging.debug('aligning communities across layers') # # print ("Bethe : {:.3f}, Modularity: {:.3f}".format(self._bpmod.compute_bethe_free_energy(), # # self._get_retrieval_modularity(self.nruns))) # nsweeps=self._perform_permuation_sweep(self.nruns) # modifies partition directly if self._align_communities_across_layers and iters<niter: #logging.debug('aligning communities across layers') # print ("Bethe : {:.3f}, Modularity: {:.3f}".format(self._bpmod.compute_bethe_free_energy(), # self._get_retrieval_modularity(self.nruns))) nsweeps=self._perform_permuation_sweep(self.nruns) # modifies partition directly #logging.debug('time: {:.4f} : nsweeps: {:d}'.format(time() - t,nsweeps)) t = time() cnt=0 while not (nsweeps==0 and converged==True) and not iters>niter: #plots for debugging #logging.debug("rerunning modbp with realigned:") self._switch_beliefs_bp(self.nruns) #can't go more than the alloted number of runs citers = self._bpmod.run(iters_per_run) iters+=citers if citers<iters_per_run: #it converged converged=True #logging.debug('time: {:.4f}, {:d} iterations more. total iters: {:d}'.format(time() - t,citers,iters)) t = time() cmargs = np.array(self._bpmod.return_marginals()) self.marginals[self.nruns] = cmargs # Calculate effective group size and get partitions self._get_community_distances(self.nruns) # sets values in method cpartition = self._get_partition(self.nruns, self.use_effective) self.partitions[self.nruns] = cpartition if self.use_effective: q_new = self._merge_communities_bp(self.nruns) q = q_new cnt+=1 nsweeps = self._perform_permuation_sweep(self.nruns) # modifies partition directly #final combined marginals # #logging.debug('time: {:.4f}'.format(time() - t)) #logging.debug('nsweeps: {:d}'.format(nsweeps)) # Perform the merger on the BP side before getting final marginals if iters>=niter: #logging.debug("Modularity BP did not converge after {:d} iterations.".format(iters)) pass self.retrieval_modularities.loc[self.nruns, 'niters'] = iters self.retrieval_modularities.loc[self.nruns, 'converged'] = converged retmod=self._get_retrieval_modularity(self.nruns) #logging.debug('calculating bethe_free energy') bethe_energy=self._get_bethe_free_energy() self.retrieval_modularities.loc[self.nruns,'retrieval_modularity']=retmod self.retrieval_modularities.loc[self.nruns,'bethe_free_energy']=bethe_energy self.retrieval_modularities.loc[self.nruns,'avg_entropy']=_get_avg_entropy(cmargs) _,com_cnts=np.unique(self.partitions[self.nruns],return_counts=True) # self.retrieval_modularities.loc[self.nruns,'num_coms']=self._get_true_number_of_communities(self.nruns) self.retrieval_modularities.loc[self.nruns, 'num_coms']=np.sum(com_cnts>=self.min_community_size) # self.retrieval_modularities.loc[self.nruns,'qstar']=self._get_true_number_of_communities(self.nruns) self.retrieval_modularities.loc[self.nruns,'bstar']=self.get_bstar(q,omega) if self.graph.comm_vec is not None: self.retrieval_modularities.loc[self.nruns,'AMI_layer_avg']=self.graph.get_AMI_layer_avg_with_communities(cpartition) self.retrieval_modularities.loc[self.nruns,'AMI']=self.graph.get_AMI_with_communities(cpartition) if not self._accuracy_off: #for low number of communities self.retrieval_modularities.loc[self.nruns,'Accuracy_layer_avg']=self.graph.get_accuracy_layer_averaged_with_communities(cpartition) self.retrieval_modularities.loc[self.nruns, 'Accuracy'] = self.graph.get_accuracy_with_communities(cpartition) self.retrieval_modularities.loc[self.nruns,'is_trivial']=self._is_trivial(self.nruns) # self.retrieval_modularities.loc[(q,beta,resgamma,omega),'retrieval_modularity']=retmod # self.retrieval_modularities.loc[(q,beta,resgamma,omega),'niters']=iters # self.retrieval_modularities.loc[(q,beta,resgamma,omega),'AMI']=self.graph.get_AMI_with_communities(cpartition) # self.retrieval_modularities.sort_index(inplace=True) self.nruns+=1
# def _get_edgelist(self): # edgelist=self.graph.get_edgelist() # if not self.graph.intralayer_weights is None: # elist_weights=sorted(zip(self.graph.),key=lambda (x):x[0],x[1]) # edgelist.sort() # return edgelist def _get_bethe_free_energy(self): if self._bpmod is None: raise AssertionError( "cannot calculate the bethe free energy without running first. Please call run_mobp.") return self._bpmod.compute_bethe_free_energy() def _get_cpp_intra_weights(self): if self.graph.unweighted: return DoubleArray([]) else: assert len(self.graph.intralayer_weights)==len(self.graph.intralayer_edges),"length of weights must match number of edges" return DoubleArray(self.graph.intralayer_weights) def _get_edgelistpv(self,inter=False): ''' Return PairVector swig wrapper version of edgelist''' if inter: try: #found that type numpy.int doesn't work _edgelistpv = PairVector(self.interlayer_edges) #cpp wrapper for list except: self.interlayer_edges=[ (int(e[0]),int(e[1])) for e in self.interlayer_edges] _edgelistpv = PairVector(self.interlayer_edges) else: try: _edgelistpv = PairVector(self.intralayer_edges) except: self.intralayer_edges = [(int(e[0]), int(e[1])) for e in self.intralayer_edges] _edgelistpv = PairVector(self.intralayer_edges) return _edgelistpv def _get_partition(self,ind,use_effective=True): """ We want to have argmax with randomly broken ties. :param ind: index of the marginal to use :return: """ #thanks to SO 42071597 marginal=self.marginals[ind] def argmax_breakties(x): try: return np.random.choice(np.flatnonzero(np.abs(x-x.max())<np.power(10.0,-6))) except: print(x) raise ValueError parts=np.apply_along_axis(func1d=argmax_breakties,arr=marginal,axis=1) if use_effective: #map the marginals to very close ones. groupmap=self.marginal_index_to_close_marginals[ind] # We use the effective communities to map parts=np.array( [ self.marginal_to_comm_number[ind][x] for x in parts]) return parts else: return parts def get_bstar(self,q,omega=0): #c is supposed to be the average excess degree # degrees=self.graph.intradegrees + self.graph.interdegrees # d_avg=np.mean(degrees) # d2=np.mean(np.power(degrees,2.0)) # c= d2/d_avg - 1 #c=(2.0*self.totaledgeweight/(self.n)) # return np.log(q/(np.sqrt(c)-1)+1) if self._bpmod is None: self._bpmod=BP_Modularity(layer_membership=self._layer_vec_ia, intra_edgelist=self._intraedgelistpv, intra_edgeweight=self._cpp_intra_weights, inter_edgelist=self._interedgelistpv, _n=self.n, _nt= self.nlayers , q=q, beta=1.0, #beta doesn't matter here omega=omega,transform=False) return self._bpmod.compute_bstar(omega,int(q)) #q must be an int def _get_retrieval_modularity(self,nrun=None): """ :param nrun: :return: """ if nrun is None: nrun=self.nruns #get last one resgamma,omega=self.retrieval_modularities.loc[nrun,['resgamma','omega']] cpartition = self.partitions[nrun] #must have already been run return calc_modularity(self.graph,partition=cpartition,resgamma=resgamma,omega=omega) def _get_community_distances(self,ind,thresh=np.power(10.0,-3)): """ Here we calculate the average distance between the mariginals of each of the \ communities as defined by: :math:`d_{l,k}=\\frac{1}{N}\\sum_{i}(\\psi_{i}^{l}-\\psi_{i}^{k})^2` \ We also identify communities that are close enough to be considered a single community\ i.e. their distance is below the threshhold :return: """ try: cmarginal = self.marginals[ind] except KeyError: raise KeyError("Cannot find partition with index {}".format(ind)) #get direcly from the mariginals q=cmarginal.shape[1] distmat=np.zeros((q,q)) # everyone starts out in their own group initially. # We merge sets together every time pairwise distance is less. groups=dict(zip(range(q),[{i} for i in range(q)])) for k,l in itertools.combinations(range(q),2): dist_kl=np.mean(np.power(cmarginal[:,k]-cmarginal[:,l],2.0)) distmat[k,l]=dist_kl distmat[l,k]=dist_kl if dist_kl <=thresh: comb=groups[l].union(groups[k]) for val in comb: #have to update everyone's groups groups[val]=comb self.marginal_index_to_close_marginals[ind]=groups commsets = list(set([frozenset(s) for s in groups.values()])) revmap={} for i,comset in enumerate(commsets): for val in comset: #we use the minimum community mapped so that must number of communities #will retain mapping. revmap[val]=np.min(list(comset)) #remap to those in the range from 0 to len(commsets) available=set(np.arange(len(commsets))).difference(set(revmap.values())) valsremap={} #use this dict to ensure that all vals are remapped consistently for k,val in list(set(revmap.items())): if val >= len(commsets): if val not in valsremap: valsremap[val]=available.pop() revmap[k]=valsremap[val] if np.max(list(revmap.values()))>=len(set(revmap.values())): raise AssertionError self.marginal_to_comm_number[ind] = revmap def _groupmap_to_permutation_vector(self,ind): """ Create vector that denotes communities that should be collapsed . \ Each element of the array is the new community label for that index . \ I.e [ 0, 0 , 1 , 2] Denotes a collpase from 4 communities to 3 where \ the 0th and 1st old communities are merged into a single set of marginals. :param ind: :return: """ revgroupmap=self.marginal_to_comm_number[ind] outarray=np.arange(np.max(list(revgroupmap.keys()))+1) for k,val in revgroupmap.items(): outarray[k]=val return outarray # return IntArray(outarray) def _get_true_number_of_communities(self,ind): """ :param ind: :return: """ if ind not in self.marginal_index_to_close_marginals.keys(): self._get_community_distances(ind) groupmap=self.marginal_index_to_close_marginals[ind] #create set of sets and take len. Frozenset is immutable # if self.min_community_size==0: return len(set([frozenset(s) for s in groupmap.values()])) else: return len(set([ frozenset(s) for s in groupmap.values() if len(s) >= self.min_community_size ])) def _is_trivial(self,ind,thresh=np.power(10.0,-3)): """ We use the same metric to define marginals that represent the same partitions\ used in _get_community_distances. :param ind: index of marginal to examine :return: true if partition is close enough to trival, false if it is sufficiently differet """ cmarginal=self.marginals[ind] trival=np.ones(cmarginal.shape)/cmarginal.shape[1] if np.mean(np.power(cmarginal-trival,2.0))<thresh: return True else: return False def _get_number_switched_single_layer(self,layer,partition,percent=False): layers = self.layers_unique if layer == 0: return 0 else: # these are the identity connection across layers # we check for which identities switch prev_layer=layers[np.where(layers == layer)[0][0] - 1] interedges = self.graph.interedgesbylayers[(layer, prev_layer)] #use previous layer num_switched = 0 for e in interedges: ei=e[0] ej=e[1] if partition[ei] != partition[ej]: num_switched += 1 if percent: num_switched /= float(len(interedges)) return num_switched def _initialize_final_permutation_dict_all_layers(self,ind): """ this object keeps track of which community every community is mapped to in each layer \ in the permutation sweep to try and align the layers. :param ind: :return: list with one dictionary for each layer """ layers=self.layers_unique final_permutation_dict = [ ] for i,layer in enumerate(layers): lay_inds=np.where(self.layer_vec==layer)[0] #all the possible community labels partvals=np.unique(list(self.marginal_to_comm_number[ind].values())) #map to itself final_permutation_dict.append(dict(zip(partvals,partvals))) return final_permutation_dict def _perform_permuation_sweep(self,ind): """ Calculate largest difference between adjacent layers\ then perform flip for everylayer afterwards Repeat until no more flips are performed :param ind: partition to perform permutation on :return: number of sweeps performed. To keep track of whether \ any layers were actually shuffled. """ max_iters=100 niters=0 #go through at least once # number_switched = self.get_number_nodes_switched_all_layers(ind=ind, percent=True) # max_layer_switched = np.argmax(number_switched) # permdict = self._create_layer_permutation_single_layer(ind, max_layer_switched) # print(number_switched[max_layer_switched],max_layer_switched,permdict) # plt.close() # plt.subplots(1,2,figsize=(10,5)) # a=plt.subplot(1,2,1) # a.set_title('before') # self.plot_communities(ind,layers=[max_layer_switched-1, # max_layer_switched],ax=a,numbers=True) # for layer in range(max_layer_switched, self.layers_unique[-1] + 1): # permute all layers behind # self._permute_layer_with_dict(ind, layer=layer, permutation=permdict) # a = plt.subplot(1, 2, 2) # a.set_title('after') # self.plot_communities(ind, layers=[max_layer_switched - 1, # max_layer_switched], ax=a, numbers=True) # plt.show() #for each sweep perform we keep track of which communities are switched within each #layer. We start out with each community mapped to itself. self._permutation_vectors[ind]=self._initialize_final_permutation_dict_all_layers(ind=ind) while niters<max_iters: # for clayer in self.layers_unique: number_switched = self.get_number_nodes_switched_all_layers(ind=ind, percent=True) max_layer_switched=np.argmax(number_switched) permdict=self._create_layer_permutation_single_layer(ind,max_layer_switched) # print (number_switched) # print(number_switched[max_layer_switched], max_layer_switched, permdict) # oldpart=self.partitions[ind] #before switch # plt.close() # plt.subplots(1, 2, figsize=(10, 5)) # a = plt.subplot(1, 2, 1) # a.set_title('before') # self.plot_communities(ind, layers=[max_layer_switched - 1, # max_layer_switched], ax=a) # self._permute_layer_with_dict(ind, layer=max_layer_switched, permutation=permdict) # a = plt.subplot(1, 2, 2) # a.set_title('after') # self.plot_communities(ind, layers=[max_layer_switched - 1, # max_layer_switched], ax=a) # plt.show() # # # dir="/Users/whweir/Documents/UNC_SOM_docs/Mucha_Lab/Mucha_Python/ModBP_gh/ModularityBP_Cpp/test" # with gzip.open(os.path.join(dir,'test_case_partswitch.txt'),'w') as fh: # pickle.dump((self.layer_vec,max_layer_switched,oldpart),fh) # print(max_layer_switched,permdict) # # print() if all([k==v for k,v in permdict.items()]): break #nothing changed for layer in range(max_layer_switched,self.layers_unique[-1]+1): #permute all layers behind self._permute_layer_with_dict(ind,layer=layer,permutation=permdict) niters+=1 return niters # plt.close() # f, a = plt.subplots(1, 2, figsize=(6, 3)) # a = plt.subplot(1, 2, 1) # self.plot_communities(ax=a) # a = plt.subplot(1, 2, 2) # self.plot_communities(ind, ax=a) # plt.show() # dir="/Users/whweir/Documents/UNC_SOM_docs/Mucha_Lab/Mucha_Python/ModBP_gh/ModularityBP_Cpp/test" # with gzip.open(os.path.join(dir,'test_case_partswitch.txt'),'w') as fh: # pickle.dump((self.layer_vec,max_layer_switched,oldpart),fh) # print(max_layer_switched,permdict) # print(niters) def get_number_nodes_switched_all_layers(self, ind, percent=False): """ For each layer, how many nodes switched from the previous layer ( 0 for first layer).\ :param ind: index of the partitions to check for :param percent: = return percentage switched instead of number :return: array """ cpart=self.partitions[ind] layers=self.layers_unique switched=np.zeros(len(layers)) for i,layer in enumerate(layers): switched[i]=self._get_number_switched_single_layer(layer,cpart,percent=percent) return switched # def _create_com_dist_map_to_minimize(self,ind,layer): # layers=self.layers_unique # prev_layer = layers[np.where(layers == layer)[0][0] - 1] # cind=np.where(self.layer_vec==layer)[0] # prevind=np.where(self.layer_vec==prev_layer)[0] # # curpart=self.partitions[ind][cind] # prevpart=self.partitions[ind][prevind] # curcoms=np.unique(curpart) # prevcoms = np.unique(prevpart) # distmat=np.zeros((len(prevcoms),len(curcoms))) # #the index within the current layer partition # prev_inds={ com:np.where(prevpart==com)[0] for com in prevcoms } # for i,prevcom in enumerate(prevcoms): # for j,curcom in enumerate(curcoms): # #how many of the currentl communities are not in the previous community # distmat[i,j]=np.sum(curpart[prev_inds[prevcom]]!=curcom) # return distmat def _get_previous_layer_inds_dict(self,layer): """ returns 2 dictionaries, mapping the indices of the nodes in the previous layer to that \ in the current layer. This is used to define the adjacent layer of nodes. creates interlayer_edge_dict={ node1 : [nodes with interlayer connections], node2 : []} :return: cur2prev_inds = dict mapping from current layer id to previous layer ids based on interalyer edges \ prev2cur = dict mapping from previous layer to current layer. Both these are based on the interlayer \ edges and neither are strictly one-to-one """ if layer==0: #nothing in the previous layer return {},{} if "interlayer_edge_dict" not in self.__dict__: self.interlayer_edge_dict={} for e in self.interlayer_edges: ei,ej=e[0],e[1] self.interlayer_edge_dict[ei]=self.interlayer_edge_dict.get(ei,[])+[ej] self.interlayer_edge_dict[ej]=self.interlayer_edge_dict.get(ej,[])+[ei] cur_inds=np.where(self.layer_vec==layer)[0] prev_inds=np.where(self.layer_vec==(layer-1))[0] prev2cur={} cur2prev={} for ind in prev_inds: if ind in self.interlayer_edge_dict: prev2cur[ind]=list(set(self.interlayer_edge_dict[ind]).intersection(cur_inds)) #construct mapping other direction for prevind in prev2cur.keys(): for cind in prev2cur.get(prevind,[]): cur2prev[cind]=list(set(cur2prev.get(cind,[])+[prevind])) return cur2prev,prev2cur def _create_layer_permutation_single_layer(self,ind,layer): """ Identify the permutation of community labels that minimizes the number\ switched at the specified layer :param ind: :return: """ cind = np.where(self.layer_vec == layer)[0] layers=self.layers_unique #we switch only the communiites in that layer layer_inds=np.where(self.layer_vec==layer)[0] prev_layer = layers[np.where(layers == layer)[0][0] - 1] prevind = np.where(self.layer_vec == prev_layer)[0] cur2prev_inds, prev2cur_inds = self._get_previous_layer_inds_dict(layer) prev_inds=list(prev2cur_inds.keys()) curpart = self.partitions[ind][cind] prevpart = self.partitions[ind][prevind] curcoms = np.unique(curpart) prevcoms = np.unique(prevpart) distmat = np.zeros((len(prevcoms), len(curcoms))) # the index within the current layer partition prev_inds = {com: np.where(prevpart == com)[0] for com in prevcoms} cur_inds = {com: np.where(curpart == com)[0] for com in curcoms} prevcoms2_i=dict(zip(prevcoms,range(len(prevcoms)))) curcoms2_j=dict(zip(curcoms,range(len(curcoms)))) for prev_ind in prev2cur_inds.keys(): pre_com=self.partitions[ind][prev_ind] i=prevcoms2_i[pre_com] for cur_ind in prev2cur_inds[prev_ind]: cur_com=self.partitions[ind][cur_ind] j=curcoms2_j[cur_com] #distmat[i, : ]+=(1.0/len(prev2cur_inds[prev_ind])) distmat[ i , : ]+=(1.0/len(prev2cur_inds[prev_ind])) distmat[ i , j ]-=(1.0/len(prev2cur_inds[prev_ind])) for cur_ind in cur2prev_inds.keys(): cur_com = self.partitions[ind][cur_ind] j = curcoms2_j[cur_com] for prev_ind in cur2prev_inds[cur_ind]: prev_com = self.partitions[ind][prev_ind] i = prevcoms2_i[prev_com] # distmat[i, : ]+=(1.0/len(prev2cur_inds[prev_ind])) distmat[:, j] += (1.0 / len(cur2prev_inds[cur_ind])) distmat[i, j] -= (1.0 / len(cur2prev_inds[cur_ind])) #below works for multiplex, but not for more general multilayer topologies. # for i, prevcom in enumerate(prevcoms): # for j, curcom in enumerate(curcoms): # # how many of the currentl communities are not in the previous community # #plus how many of the previous communities are not in the current community # #have to figure out which of the nodes from previous layer are actually in this layer # #(in the case where it's not multiplex) # distmat[i, j] = np.sum(curpart[prev_inds[prevcom]] != curcom) + \ # np.sum(prevpart[cur_inds[curcom]] != prevcom) #solve bipartite min cost matching with munkre algorithm row_ind,col_ind=sciopt.linear_sum_assignment(distmat) colcoms= list(map(lambda x : curcoms[x],col_ind)) rwcoms= list(map(lambda x : prevcoms[x],row_ind)) com_map_dict=dict(zip(colcoms,rwcoms)) #map to current layer coms to previous ones #Mapping needs to be one-to-one so we have to fill in communities which weren't mapped coms_remaining=set(curcoms).difference(list(com_map_dict.values())) comsnotmapped=set(curcoms).difference(list(com_map_dict.keys())) #things that are in both get mapped to themselves first for com in coms_remaining.intersection(comsnotmapped): com_map_dict[com]=com coms_remaining.remove(com) comsnotmapped.remove(com) for com in comsnotmapped: com_map_dict[com]=coms_remaining.pop() return com_map_dict #found more efficient method above # com_map_dict=dict(zip(coms,coms) ) # remaining = [com for com in coms] # for i, com in enumerate(coms_by_size): # minchange=1 # mincom=com # for com2test in remaining: # com_map_dict[com]=com2test # #swap out with current changes # cpart[layer_inds]=map(lambda x : com_map_dict[x],cpart[layer_inds]) # new_change=self._get_number_switched_single_layer(layer,partition=cpart,percent=True) # if new_change<minchange: # minchange=new_change # mincom=com2test # cpart=self.partitions[ind].copy() #copy back over from original # com_map_dict[com]=mincom # use the new minimum swap # remaining.remove(mincom) # assert(len(com_map_dict.values())==len(set(com_map_dict.values()))) # assert(len(com_map_dict.keys())==len(coms_by_size)) #all communities must be present # print(com_map_dict) # return com_map_dict def _permute_layer_with_dict(self,ind,layer,permutation): """ Swap a given layer by the partition dictionary. Any community \ not present in dictionary is mapped to itself :param ind: which partition to permute :param layer: the layer that needs to be permuated in the :param permutation: dictionary mapping current values to new permuted community values :return: none """ lay_inds=np.where(self.layer_vec==layer)[0] old_layer=self.partitions[ind][lay_inds] curcoms=np.unique(self.partitions[ind]) #use all communities here. curpermutation={k:v for k,v in permutation.items()} #ensure that things don't get map to that aren't mapped so some other community comsremain=set(list(curcoms)+list(permutation.keys())).difference(permutation.values()) coms2match=set(curcoms).difference(curpermutation.keys()) #communities that need to be matched for com in comsremain.intersection(coms2match) : # these map to themselve curpermutation[com]=com comsremain.remove(com) coms2match.remove(com) for com in coms2match: curpermutation[com]=comsremain.pop() self.partitions[ind][lay_inds]=\ list(map(lambda x : curpermutation[x], self.partitions[ind][lay_inds])) #also apply map to the final permutation dictionary #No Communities should be merged or destroyed in this mapping #Each value (communities from previous permutation) should still be in the dictionary for k,val in self._permutation_vectors[ind][layer].items(): if val in curpermutation: #otherwise not affected self._permutation_vectors[ind][layer][k]=curpermutation[val] assert len(set(self._permutation_vectors[ind][layer].values()))==len(self._permutation_vectors[ind][layer].values()), 'community lost in permutation' #sanity check. Internal communities shouldn't change assert(np.abs(skm.adjusted_mutual_info_score(old_layer,self.partitions[ind][lay_inds])-1)<np.power(10.0,-6)) def _create_all_layers_permuation_vector(self,ind): """ We use the dictionary representation to create the permuation vector to pass into \ the modularity bp\. This is organized such that each row represents the current layer \ And each position has the new position it is supposed to map to . Ie [ 1 , ... ] \ denotes that the zeroeth belief becomes the first belief (marginal) . :param ind: :return: """ layers=self.layers_unique N=len(self.layers_unique) M=len(np.unique(list(self.marginal_to_comm_number[ind].values()))) outarray=np.zeros((N,M)) #layers by #communites (after combining) numcoms=len(set(self.marginal_to_comm_number[ind].values())) for i,layer in enumerate(layers): currow = range(numcoms) #use the final mapping dictionary to map each of the communities in this layer currow = list(map ( lambda x : self._permutation_vectors[ind][layer][x],currow)) outarray[i,:]=currow return outarray def _merge_communities_bp(self,ind): """ Merge communities that have the same marginals across all of the nodes :param ind: :return: """ #type cast to int merge_vec=IntArray([int(x) for x in self._groupmap_to_permutation_vector(ind).astype(int)]) # merge_vec=IntArray(self._groupmap_to_permutation_vector(ind).astype(int)) self._bpmod.merge_communities(merge_vec) return len(set(self.marginal_to_comm_number[ind].values())) #new number of communities def _switch_beliefs_bp(self, ind): """ This switches the belefs. Should only be called after _perform_permutation_sweep :param ind: modbp run index :returns: permutes the beliefs or marginals """ perm_vec_c=self._create_all_layers_permuation_vector(ind).astype(int) # type cast each to int perm_vec_c=IntMatrix([[int(x) for x in y] for y in perm_vec_c]) self._bpmod.permute_beliefs(perm_vec_c) def plot_communities(self,ind=None,layers=None,ax=None,cmap=None): """ :param ind: :param layers: :return: """ if layers is None: layers=self.layers_unique def get_partition_matrix(partition, layer_vec): # assumes partiton in same ordering for each layer vals = np.unique(layer_vec) nodeperlayer = len(layer_vec) / len(vals) com_matrix = np.zeros((nodeperlayer, len(vals))) for i, val in enumerate(vals): cind = np.where(layer_vec == val)[0] ccoms = partition[cind] com_matrix[:, i] = ccoms return com_matrix cinds=np.where(np.isin(self.layer_vec,layers))[0] if ind is None: #use baseline assert self.graph.comm_vec is not None, "Must specify ground truth com_vec for graph" cpart=self.graph.comm_vec else: cpart=self.partitions[ind][cinds] vmin=np.min(cpart) vmax=np.max(cpart) clayer_vec=np.array(self.layer_vec)[cinds] part_mat=get_partition_matrix(cpart,clayer_vec) if ax is None: ax=plt.axes() if cmap is None: cmap=sbn.cubehelix_palette(as_cmap=True) ax.grid('off') ax.pcolormesh(part_mat,cmap=cmap,vmin=vmin,vmax=vmax) # numswitched=self.get_number_nodes_switched_all_layers(ind=ind,percent=True) # # numswitched=numswitched[np.where(np.isin(self.layers_unique,layers))[0]] #filter for layers selected # for i,num in enumerate(numswitched): # ax.text(s="{:.2f}".format(num),x=i,y=-1,fontdict={"fontsize":9,'color':'white'}) ax.set_xticks(range(0,len(layers))) ax.set_xticklabels(layers) return ax def _get_avg_entropy(marginal): """caculate normalized entropies from marginals""" entropies=[] for i in range(marginal.shape[0]): entropies.append(stats.entropy(marginal[i])/np.log(marginal.shape[1])) return np.mean(entropies) def calc_modularity(graph,partition,resgamma,omega): """ Calculate the modularity of graph for given partition, resolution, and omega :param graph: GenerateGraph.MultilayerGraph object :param partition: :param resgamma: :param omega: :return: """ # we sort indices into alike com_inddict = {} allcoms = sorted(list(set(partition))) sumA = 0 Ahat = 0 Chat = 0 def part_equal(x): if partition[int(x[0])] == partition[int(x[1])]: if len(x)>2: return x[2] #edge weights should be x[2] else: return 1.0 else: return 0.0 intra_edges=graph.intralayer_edges inter_edges = graph.interlayer_edges # zip weights into the edges if graph.intralayer_weights is not None: temp=[] for i,e in enumerate(intra_edges): temp.append((e[0],e[1],graph.intralayer_weights[i])) intra_edges=temp if graph.interlayer_weights is not None: temp=[] for i,e in enumerate(inter_edges): temp.append((e[0],e[1],graph.interlayer_weights[i])) inter_edges=temp # For Ahat and Chat we simply iterate over the edges and count internal ones if len(intra_edges) > 0: Ahat = np.sum(np.apply_along_axis(func1d=part_equal, arr=intra_edges, axis=1)) else: Ahat = 0 if not graph.is_directed: Ahat=Ahat*2.0 if len(inter_edges) > 0: Chat = np.sum(np.apply_along_axis(func1d=part_equal, arr=inter_edges, axis=1)) else: Chat = 0 if not graph.is_directed: Chat = Chat * 2.0 # We calculate Phat a little differently since it requires degrees of all members of each group # store indices for each community together in dict for i, val in enumerate(partition): try: com_inddict[val] = com_inddict.get(val, []) + [i] except TypeError: raise TypeError("Community labels must be hashable- isinstance(%s,Hashable): " % (str(val)), \ isinstance(val, Hashable)) # convert indices stored together to np_array for k, val in iteritems(com_inddict): com_inddict[k] = np.array(val) Phat = 0 degrees = graph.get_intralayer_degrees() # get all layers degrees/strengths for i in range(graph.nlayers): layersum=0 c_layer_inds = np.where(graph.layer_vec == i)[0] for com in allcoms: cind = com_inddict[com] # get only the inds in this layer cind_com = cind[np.where(np.isin(cind, c_layer_inds))[0]] cdeg = degrees[cind_com] # if cind_com.shape[0] == 1: continue # contribution is 0 else: cPmat = np.outer(cdeg, cdeg.T) layersum += (np.sum(cPmat)) layersum /= (2.0 * graph.intra_edge_counts[i]) Phat += layersum # print("A:{:.2f},P:{:.2f},C:{:.2f}".format(Ahat,Phat,Chat)) mu= 2.0*graph.totaledgeweight if not graph.is_directed else graph.totaledgeweight return (1.0 / mu) * (Ahat - resgamma * Phat + omega * Chat)