Source code for netsim.generate

"""
This module contains all functions needed to setup a network simulation  

"""

import numpy as np
import pandas as pd
import geopandas as gpd
from collections import OrderedDict
from itertools import permutations, product
from math import factorial


# constants used in generate 

#: maximum number of samples to be drawn from any group.
NSAMPLES = 100

#: maximum number of points below which permulations are possible
MAX_PERMUTATION_NUM = 7

#: minimum number of points above which samples can be drawn
MIN_NUM_SAMPLE = 3

#: maximum total number of iterations allowed in the simulation
MAX_ITERATIONS = 5000



[docs]def check(df): ''' Checks and corrects input geo/pandas dataframe. Parameters ---------- df: geo/dataframe contains locations used in the simulation Returns ------- c_df: dataframe corrected geo/dataframe ready for netsim Notes ----- This function checks for the existence columns needed in the *netsim* simulation. Columns must have the appropriate header (as shown below). Columns that do not exist will be generated and populated with default values. The validity of values in existing columns is also checked. Minor errors and corrections are notified. Major errors raise an exception error. The following columns need to be present: - *id*: exclusive identifier for each location. - *group*: identifies a location as being part of a specific group. Groups can be of any size. Groups of size 1 will automatically mixed with the following group. A column with a single group affiliation will be created in case this column does not exist (default value is 1). - *seq*: identifies rank/ordering of a location within a group. There are two possible scenarios: - *No ordering/ ranking (default)*. Within each group a **single** value is used for all sites in that group. Depending on the number of locations in the group the simulation with either generate all possible permutations or *num_samples* of randomized samples (with repetition). - *Ordering /ranking*. Identify all sites in a group with a increasing monotonic sequence of numbers (no repetitions). ''' # copy original dataframe c_df = df.rename(str.lower, axis = 'columns') # collect basic information from dataframe nrows, ncols = c_df.shape colnames = list(c_df) # initialize message msg = [] error_flag = False # id if 'id' not in colnames: # create a id column c_df['id'] = pd.Series(np.arange(nrows, dtype= np.int16), index=c_df.index) elif 'id' in colnames: # any row with the same id? if nrows != len(c_df['id'].unique()): msg.append('\n ERROR: id column - ids are not unique !!!') error_flag = True # group if 'group' not in colnames: # create group column with default (1 for single group) c_df['group'] = pd.Series(np.full(nrows, int(1)), index=c_df.index) msg.append('group column - created group column with single group !') groups= c_df['group'].unique() ngroup = len(groups) # seq if 'seq' not in colnames: # create a seq column with default value c_df['seq'] = pd.Series(np.full(nrows, int(1)), index=c_df.index) msg.append('seq column - created sequence with no sequence (default 1.) !') elif 'seq' in colnames: # check each group for g in groups: # Only one group? all_one = (c_df.loc[c_df['group'] == g, 'seq'] == 1).all() sequence = c_df.loc[c_df['group'] == g, 'seq'].unique() unique_sequence = len(c_df.loc[c_df['group'] == g]) == len(sequence) if not (all_one or unique_sequence): error_flag = True msg.append('\n ERROR: seq column - sequence for group '+str(g)+' is not 1 or sequential!') # print messages if msg != []: for m in msg: print('\n'+m) # raise exception if any errors if error_flag: raise('\nCheck errors !!! Network simulation ABORTED!! ') else: print('\n No corrections or errors !! ') return c_df
def __shuffle(arr, nsamples): ''' Generator function used to create randomized samples. Parameters ---------- arr: 1D numpy array numpy array to be randomized nsamples: int number of randomized samples we want the generator to produce Yields ------ arr: 1D numpy array randomized array Notes ----- Internal function called by create_network_generator() ''' for i in range(nsamples): np.random.shuffle(arr) yield tuple(arr)
[docs]def create_network_generator(df): ''' Generates network generator. Parameters --------- df: geo/dataframe contains locations, group membership and parameters used in netsim Yields ------ netgentor: generator network generator Returns ------- df_net_info: dataframe contains information about each network (see *Notes* below) total_num_iter: int total number of iterations Notes ----- This function returns three different outputs: 1. It primarily returns a network generator that results from the cartesian product of separate generators (one per group of locations as identified thru the *group* column). Each, *group*, generator may be of one of these three types: - *single*: returns always the same combination of locations. - *sample*: returns a shuffled version of the locations. *n.b.* repetition can occur. - *permutation*: returns permutation of the locations (no repetition). 2. Generates a dataframe, *df_net_info*, containing generator information for each group (number of locations, total number of iterations, generator type). 3. Total number of iterations that results from the combination of all group generators. ''' groups = df['group'].sort_values().unique() # number of groups in df? netgentor = list(range(len(groups))) # initialize list to store # generator functions. net_info = {'group':[], 'num_loc':[], 'num_iter':[], 'iter_type':[]} # dictionary with generator # summary. total_num_iter = 1 for i, grp in enumerate(groups): df_grp = df.loc[df['group'] == grp] # select pts with the same group id. indxs = list(df_grp['id']) # generate a list of with pt ids in group. num_pts_in_grp = len(df_grp) # number of points in group? num_unique_pts_grp = len(df_grp['seq'].unique()) # number of points with distinct pt ids in group? if num_pts_in_grp == num_unique_pts_grp: # is there a single sequence? net_info['group'].append(grp) # single sequence block. net_info['num_loc'].append(num_pts_in_grp) num_iter_grp = 1 net_info['num_iter'].append(num_iter_grp) net_info['iter_type'].append('single') netgentor[i] = [tuple(indxs)] # return single seq generator. else: if num_pts_in_grp > MIN_NUM_SAMPLE: # can it be sampled? if num_pts_in_grp > MAX_PERMUTATION_NUM: # can it be permutated? net_info['group'].append(grp) # sample sequence block. net_info['num_loc'].append(num_pts_in_grp) num_iter_grp = NSAMPLES net_info['num_iter'].append(num_iter_grp) net_info['iter_type'].append('sample') netgentor[i] = __shuffle(indxs, NSAMPLES) # return random seq generator. else: if factorial(num_pts_in_grp) < NSAMPLES: # num_samples > permutations? # permute net_info['group'].append(grp) # permutation sequence block. net_info['num_loc'].append(num_pts_in_grp) num_iter_grp = factorial(num_pts_in_grp) net_info['num_iter'].append(num_iter_grp) net_info['iter_type'].append('permutation') netgentor[i] = permutations(indxs) # return permutation seq generator. else: net_info['group'].append(grp) # sample sequence block. net_info['num_loc'].append(num_pts_in_grp) num_iter_grp = NSAMPLES net_info['num_iter'].append(num_iter_grp) net_info['iter_type'].append('sample') netgentor[i] = __shuffle(indxs, NSAMPLES) # return random seq generator. else: # too small to sample so permute net_info['group'].append(grp) # permutation sequence block. net_info['num_loc'].append(num_pts_in_grp) num_iter_grp = factorial(num_pts_in_grp) net_info['num_iter'].append(num_iter_grp) net_info['iter_type'].append('permutation') netgentor[i] = permutations(indxs) # return permutation seq generator. total_num_iter = total_num_iter * num_iter_grp # update total number of iterations. df_net_info = pd.DataFrame(net_info) print('\n iteration broken per group....\n') print(df_net_info) print('\n total number of iterations....',total_num_iter) # return generator for network # iteration, iteration info and return product(*netgentor), df_net_info, total_num_iter # total number of iterations
[docs]def network_layout(df, iteration, iter_num, df_net= None, twoway= False, opt= 'close'): ''' Creates a dataframe with information for each path in network. Parameters ---------- df: dataframe contains list of locations, group membership used to generate a network iteration: tuple of tuples a tuple containing one or several tuples, one per group, representing a single network iteration iter_num: int iteration identifier df_net: dataframe, optional contains the identifiers of the origin and destination of each path plus the iteration identifier twoway: boolean if **True** two-way paths are generated for each pair of locations in a network. *Default*: **False**. opt: string type of network to generate. The options are as follows: - *close*: This option defines an independent close network of paths for each group. In this network, the locations are connected in order so that the first location is connected to the second one and so one until the last location is conected to the first. No network is defined if the first group is made of a single location. - *central*: This option defines a network of that consists of centralized set of paths from the locations of the first group to all of the locations in the remaining groups. - *decentral*: This option defines a network of paths so that the locations of each group are connected to the locations of the following (lower level) group. - *distributed*: This option defines a network of paths similar to *decentral*, where the locations of each group are connected to the locations of the following (lower level) group, and in addition, locations within each group are interconnected. - *all*: This option defines a network of paths from amongst all locations. Returns ------- df_net: dataframe, optional contains the identifiers of the origin and destination of each path plus the iteration identifier Notes ----- This function takes a dataframe with locations, a list of lists containing an ordering of these locations obtained after running ``create_network_generator()`` function and a iteration identifier number. It generates, or updates, the *df_net* dataframe with the identifiers of the origin and destination of each path that make up the path network for this specific iteration. ''' # option valid? options = ['close', 'central', 'decentral','distributed', 'all'] if opt not in options: raise Exception("'opt' not valid!!! Choose from {}".format(options)) # create df_net? if df_net is None: net = {'origin': 'int32', 'destination': 'int32', 'iteration': 'int32'} df_net = pd.DataFrame(columns=list(net.keys())).astype(net) # distinct groups groups = df['group'].unique() n_groups = len(groups) grp_sizes = df.groupby(['group']).count().id.values # sizes of each group iteration = [list(g) for g in iteration] if n_groups == 1: if opt == 'close': origins = iteration[0] destinations = origins[1:]+[origins[0]] for o, d in zip(origins, destinations): df_net.loc[len(df_net.index)] = [o,d,iter_num] if twoway & (grp_sizes[0]>2): df_net.loc[len(df_net.index)] = [d,o,iter_num] else: # do all other options as 'all' origins = iteration[0] indx = range(len(origins)) for i in indx[:-1]: for j in indx[i+1:]: df_net.loc[len(df_net.index)] = [origins[i], origins[j], iter_num] if twoway: df_net.loc[len(df_net.index)] = [origins[j], origins[i], iter_num] else: if opt == 'close': for i in range(n_groups): if grp_sizes[i] > 1: # generate paths connecting all locations within the same group origins = iteration[i] destinations = origins[1:]+[origins[0]] for o, d in zip(origins, destinations): df_net.loc[len(df_net.index)] = [o,d,iter_num] if twoway & (grp_sizes[i]>2): df_net.loc[len(df_net.index)] = [d,o,iter_num] else: print('\nWARNING: No path for first group 1 calculated (size = 1)') elif opt == 'central': # first group if grp_sizes[0] > 1: # more than one location. define paths connecting all locations in group origins = iteration[0] destinations = origins[1:]+[origins[0]] for o, d in zip(origins, destinations): df_net.loc[len(df_net.index)] = [o,d,iter_num] if twoway: df_net.loc[len(df_net.index)] = [d,o,iter_num] # set destinations to all locations in first group destinations_up = iteration[0] # loop thru lower levels for i in range(1,n_groups): # define paths from lower level locations to all first group locations origins = iteration[i] for o in origins: for d in destinations_up: df_net.loc[len(df_net.index)] = [o,d,iter_num] if twoway: df_net.loc[len(df_net.index)] = [d,o,iter_num] elif opt == 'decentral': # first group if grp_sizes[0] > 1: # more than one location, define paths connecting all locations in group origins = iteration[0] destinations = origins[1:]+[origins[0]] for o, d in zip(origins, destinations): df_net.loc[len(df_net.index)] = [o,d,iter_num] if twoway: df_net.loc[len(df_net.index)] = [d,o,iter_num] # set destinations to all locations in first group destinations_up = iteration[0] # loop thru levels for i in range(1,n_groups): # define paths to from higher level locations to lower level locations origins = iteration[i] for o in origins: for d in destinations_up: df_net.loc[len(df_net.index)] = [o,d,iter_num] if twoway: df_net.loc[len(df_net.index)] = [d,o,iter_num] # set destinations to all locations in the previous (higher level) group destinations_up = origins elif opt == 'distributed': # first group if grp_sizes[0] > 1: # more than one location origins = iteration[0] destinations = origins[1:]+[origins[0]] for o, d in zip(origins, destinations): df_net.loc[len(df_net.index)] = [o,d,iter_num] if twoway: df_net.loc[len(df_net.index)] = [d,o,iter_num] # set destinations to all locations in first group destinations_up = iteration[0] # loop thru levels for i in range(1,n_groups): # define paths to from higher level locations to lower level locations origins = iteration[i] #df.loc[df['group']== groups[i]]['id'].tolist() for o in origins: for d in destinations_up: df_net.loc[len(df_net.index)] = [o,d,iter_num] if twoway: df_net.loc[len(df_net.index)] = [d,o,iter_num] # paths between locations of the same level indx = range(len(origins)) for i in indx[:-1]: for j in indx[i+1:]: df_net.loc[len(df_net.index)] = [origins[i], origins[j], iter_num] if twoway: df_net.loc[len(df_net.index)] = [origins[j], origins[i], iter_num] # update lower_destinations with previous origins destinations_up = origins else: #opt == 'all'# # define paths from all locations to all other locations origins = [id for g in iteration for id in g] indx = range(len(origins)) for i in indx[:-1]: for j in indx[i+1:]: df_net.loc[len(df_net.index)] = [origins[i], origins[j], iter_num] if twoway: df_net.loc[len(df_net.index)] = [origins[j], origins[i], iter_num] return df_net