Source code for synthpops.contact_networks

"""
This module generates the household, school, and workplace contact networks.
"""

import sciris as sc
import numpy as np
import pandas as pd
import networkx as nx
from . import data_distributions as spdata
from . import schools as spsch
from .config import logger as log, checkmem


[docs]def make_contacts(pop, age_by_uid, homes_by_uids, students_by_uid_lists=None, teachers_by_uid_lists=None, non_teaching_staff_uid_lists=None, workplace_by_uid_lists=None, facilities_by_uid_lists=None, facilities_staff_uid_lists=None, use_two_group_reduction=False, average_LTCF_degree=20, with_school_types=False, school_mixing_type='random', average_class_size=20, inter_grade_mixing=0.1, average_student_teacher_ratio=20, average_teacher_teacher_degree=3, average_student_all_staff_ratio=15, average_additional_staff_degree=20, school_type_by_age=None, workplaces_by_industry_codes=None, max_contacts=None): """ From microstructure objects (dictionary mapping ID to age, lists of lists in different settings, etc.), create a dictionary of individuals. Each key is the ID of an individual which maps to a dictionary for that individual with attributes such as their age, household ID (hhid), school ID (scid), workplace ID (wpid), workplace industry code (wpindcode) if available, and contacts in different layers. Args: age_by_uid (dict) : dictionary mapping id to age for all individuals in the population homes_by_uids (list) : A list of lists where each sublist is a household and the IDs of the household members. schools_by_uids (list) : A list of lists, where each sublist represents a school and the ids of the students and teachers within it teachers_by_uids (list) : A list of lists, where each sublist represents a school and the ids of the teachers within it workplaces_by_uids (list) : A list of lists, where each sublist represents a workplace and the ids of the workers within it facilities_by_uids (list) : A list of lists, where each sublist represents a skilled nursing or long term care facility and the ids of the residents living within it facilities_staff_uids (list) : A list of lists, where each sublist represents a skilled nursing or long term care facility and the ids of the staff working within it non_teaching_staff_uids (list) : None or a list of lists, where each sublist represents a school and the ids of the non teaching staff within it use_two_group_reduction (bool) : If True, create long term care facilities with reduced contacts across both groups average_LTCF_degree (int) : default average degree in long term care facilities with_school_types (bool) : If True, creates explicit school types. school_mixing_type (str or dict) : The mixing type for schools, 'random', 'age_clustered', or 'age_and_class_clustered' if string, and a dictionary of these by school type otherwise. 'random' means random graphs for each school, 'age_clustered' means random graphs but with students mostly mixing within the age/grade (inter_grade_mixing controls mixing between grades), 'age_and_grade_clustered' means students cohorted into classes with their own teachers. average_class_size (float) : The average classroom size. inter_grade_mixing (float) : The average fraction of mixing between grades in the same school for clustered school mixing types. average_student_teacher_ratio (float) : The average number of students per teacher. average_teacher_teacher_degree (float) : The average number of contacts per teacher with other teachers. average_student_all_staff_ratio (float) : The average number of students per staff members at school (including both teachers and non teachers). average_additional_staff_degree (float) : The average number of contacts per additional non teaching staff in schools. school_type_by_age (dict) : A dictionary of probabilities for the school type likely for each age. workplaces_by_industry_codes (np.ndarray or None) : array with workplace industry code for each workplace trimmed_size_dic (dict) : If supplied, trim contacts on creation rather than post hoc. Returns: A popdict of people with attributes. Dictionary keys are the IDs of individuals in the population and the values are a dictionary for each individual with their attributes, such as age, household ID (hhid), school ID (scid), workplace ID (wpid), workplace industry code (wpindcode) if available, and the IDs of their contacts in different layers. Different layers available are households ('H'), schools ('S'), and workplaces ('W'), and long term care facilities ('LTCF'). Contacts in these layers are clustered and thus form a network composed of groups of people interacting with each other. For example, all household members are contacts of each other, and everyone in the same school is considered a contact of each other. If use_two_group_reduction is True, then contracts within 'LTCF' are reduced from fully connected. Notes: Methods to trim large groups of contacts down to better approximate a sense of close contacts (such as classroom sizes or smaller work groups are available via sp.trim_contacts() or sp.create_reduced_contacts_with_group_types(): see these methods for more details). If with_school_types==False, completely random schools will be generated with respect to the average_class_size, but other parameters such as average_additional_staff_degree will not be used. """ log.debug('make_contacts_from_microstructure_objects()') popdict = {} grade_age_mapping = {i: i + 5 for i in range(13)} age_grade_mapping = {i + 5: i for i in range(13)} age_grade_mapping[3] = 0 age_grade_mapping[4] = 0 # what are the school types by age school_type_by_age = sc.mergedicts(spdata.get_default_school_types_by_age_single(), school_type_by_age) school_types = list(set(school_type_by_age.values())) # get the location specific school types whatever they may be # check school mixing type if isinstance(school_mixing_type, str): school_mixing_type_dic = dict.fromkeys(school_types, school_mixing_type) elif isinstance(school_mixing_type, dict): school_mixing_type_dic = sc.dcp(school_mixing_type) school_mixing_type_dic = sc.mergedicts(dict.fromkeys(school_types, 'random'), school_mixing_type_dic) # if the dictionary given doesn't specify the mixing type for an expected school type, set the mixing type for that school type to random by default age_and_class_clustered_flag = False for school_type in school_mixing_type_dic: if school_mixing_type_dic[school_type] == 'age_and_class_clustered': age_and_class_clustered_flag = True if not isinstance(average_class_size, dict): average_class_size_by_mixing_type = dict.fromkeys(set(school_mixing_type_dic.values()), average_class_size) else: average_class_size_by_mixing_type = sc.dcp(average_class_size) average_class_size_by_mixing_type = sc.mergedicts(dict.fromkeys(set(school_mixing_type_dic.values())), average_class_size_by_mixing_type) if age_and_class_clustered_flag: if average_class_size < average_student_teacher_ratio: actual_classroom_size = max(average_class_size, average_student_teacher_ratio) average_class_size_by_mixing_type['age_and_class_clustered'] = actual_classroom_size warning_msg = f"average_class_size: {average_class_size} < average_student_teacher_ratio: {average_student_teacher_ratio}. \n In schools with mixing type 'age_and_class_clustered', synthpops will use the larger of the two to define the classroom sizes." log.warning(warning_msg) if len(list(average_class_size_by_mixing_type.keys())) > 1: pop.average_class_size = average_class_size_by_mixing_type else: pop.average_class_size = list(average_class_size_by_mixing_type.values())[0] uids = list(age_by_uid.keys()) popdict = {} # also need to return schools as well and not just school contacts schools = {} # Handle trimming do_trim = max_contacts is not None max_contacts = sc.mergedicts({'W': 20}, max_contacts) trim_keys = max_contacts.keys() # Handle LTCF use_ltcf = facilities_by_uid_lists is not None if use_ltcf: layer_keys = ['H', 'S', 'W', 'C', 'LTCF'] else: layer_keys = ['H', 'S', 'W', 'C'] log.debug(' starting...' + checkmem()) # TODO: include age-based sex ratios sexes = np.random.randint(2, size=len(age_by_uid)) for u, uid in enumerate(age_by_uid): popdict[uid] = {} popdict[uid]['age'] = int(age_by_uid[uid]) popdict[uid]['sex'] = sexes[u] popdict[uid]['loc'] = None popdict[uid]['contacts'] = {} if use_ltcf: popdict[uid]['ltcf_res'] = None popdict[uid]['ltcf_staff'] = None popdict[uid]['hhid'] = None popdict[uid]['scid'] = None popdict[uid]['sc_student'] = None popdict[uid]['sc_teacher'] = None popdict[uid]['sc_staff'] = None popdict[uid]['sc_type'] = None popdict[uid]['sc_mixing_type'] = None popdict[uid]['wpid'] = None popdict[uid]['wpindcode'] = None if use_ltcf: popdict[uid]['ltcfid'] = None for k in layer_keys: popdict[uid]['contacts'][k] = set() # read in facility residents and staff if use_ltcf: for nf, facility in enumerate(facilities_by_uid_lists): facility_staff = facilities_staff_uid_lists[nf] for u in facility: popdict[u]['ltcf_res'] = 1 popdict[u]['ltcfid'] = nf for u in facility_staff: popdict[u]['ltcf_staff'] = 1 popdict[u]['ltcfid'] = nf if use_two_group_reduction: popdict = create_reduced_contacts_with_group_types(popdict, facility, facility_staff, 'LTCF', average_degree=average_LTCF_degree, force_cross_edges=True) else: log.debug('...LTCFs ' + checkmem()) for uid in facility: popdict[uid]['contacts']['LTCF'] = set(facility) popdict[uid]['contacts']['LTCF'] = popdict[uid]['contacts']['LTCF'].union(set(facility_staff)) popdict[uid]['contacts']['LTCF'].remove(uid) for uid in facility_staff: popdict[uid]['contacts']['LTCF'] = set(facility) popdict[uid]['contacts']['LTCF'] = popdict[uid]['contacts']['LTCF'].union(set(facility_staff)) popdict[uid]['contacts']['LTCF'].remove(uid) log.debug('...households ' + checkmem()) for nh, household in enumerate(homes_by_uids): for uid in household: popdict[uid]['contacts']['H'] = set(household) popdict[uid]['contacts']['H'].remove(uid) popdict[uid]['hhid'] = nh log.debug('...students ' + checkmem()) student_in_groups, teachers_in_groups = [], [] for ns, students in enumerate(students_by_uid_lists): schools[ns] = {} teachers = teachers_by_uid_lists[ns] if non_teaching_staff_uid_lists is None: non_teaching_staff = [] elif non_teaching_staff_uid_lists == []: non_teaching_staff = [] else: non_teaching_staff = non_teaching_staff_uid_lists[ns] this_school_type = None this_school_mixing_type = None if with_school_types: student_ages = [age_by_uid[i] for i in students] min_age = min(student_ages) this_school_type = school_type_by_age[min_age] this_school_mixing_type = school_mixing_type_dic[this_school_type] popdict, student_groups, teacher_groups = spsch.add_school_edges(popdict, students, student_ages, teachers, non_teaching_staff, age_by_uid, grade_age_mapping, age_grade_mapping, average_class_size_by_mixing_type[this_school_mixing_type], inter_grade_mixing, average_student_teacher_ratio, average_teacher_teacher_degree, average_additional_staff_degree, this_school_mixing_type) else: school = students.copy() + teachers.copy() + non_teaching_staff.copy() school_edges = spsch.generate_random_contacts_across_school(school, average_class_size) popdict = spsch.add_contacts_from_edgelist(popdict, school_edges, 'S') student_groups = [students] teacher_groups = [teachers] schools[ns]['sc_type'] = this_school_type schools[ns]['school_mixing_type'] = this_school_mixing_type schools[ns]['student_groups'] = student_groups schools[ns]['teacher_groups'] = teacher_groups for uid in students: popdict[uid]['scid'] = ns popdict[uid]['sc_student'] = 1 popdict[uid]['sc_type'] = this_school_type popdict[uid]['sc_mixing_type'] = this_school_mixing_type for uid in teachers: popdict[uid]['scid'] = ns popdict[uid]['sc_teacher'] = 1 popdict[uid]['sc_type'] = this_school_type popdict[uid]['sc_mixing_type'] = this_school_mixing_type for uid in non_teaching_staff: popdict[uid]['scid'] = ns popdict[uid]['sc_staff'] = 1 popdict[uid]['sc_type'] = this_school_type popdict[uid]['sc_mixing_type'] = this_school_mixing_type pop.schools_in_groups = schools log.debug('...workplaces ' + checkmem()) if do_trim and 'W' in trim_keys: average_degree = max_contacts['W'] for nw, workplace in enumerate(workplace_by_uid_lists): uids = np.array(workplace) G = random_graph_model(uids, average_degree) # undirected graph for u, uid in enumerate(workplace): v = list(G.neighbors(u)) popdict[uid]['contacts']['W'] = set(uids[v]) popdict[uid]['contacts']['W'].discard(uid) # this shouldn't be needed popdict[uid]['wpid'] = nw if workplaces_by_industry_codes is not None: # pragma: no cover popdict[uid]['wpindcode'] = int(workplaces_by_industry_codes[nw]) else: # pragma: no cover for nw, workplace in enumerate(workplace_by_uid_lists): for uid in workplace: popdict[uid]['contacts']['W'] = set(workplace) popdict[uid]['contacts']['W'].remove(uid) popdict[uid]['wpid'] = nw if workplaces_by_industry_codes is not None: popdict[uid]['wpindcode'] = int(workplaces_by_industry_codes[nw]) log.debug('...done ' + checkmem()) return popdict
[docs]def create_reduced_contacts_with_group_types(popdict, group_1, group_2, setting, average_degree=20, p_matrix=None, force_cross_edges=True): """ Create contacts between members of group 1 and group 2, fixing the average degree, and the probability of an edge between any two groups controlled by p_matrix if provided. Forces inter group edge for each individual in group 1 with force_cross_groups equal to True. This means not everyone in group 2 will have a contact with group 1. Args: group_1 (list) : list of ids for group 1 group_2 (list) : list of ids for group 2 average_degree (int) : average degree across group 1 and 2 p_matrix (np.ndarray) : probability matrix for edges between any two groups force_cross_groups (bool) : If True, force each individual to have at least one contact with a member from the other group Returns: Popdict with edges added for nodes in the two groups. Notes: This method uses the Stochastic Block Model algorithm to generate contacts both between nodes in different groups and for nodes within the same group. In the current version, fixing the average degree and p_matrix, the matrix of probabilities for edges between any two groups is not supported. Future versions may add support for this. """ if len(group_1) == 0 or len(group_2) == 0: errormsg = f'This method requires that both groups are populated. If one of the two groups has size 0, then consider using the synthpops.trim_contacts() method, or checking that the groups provided to this method are correct.' raise ValueError(errormsg) if average_degree < 2: errormsg = f'This method is likely to create disconnected graphs with average_degree < 2. In order to keep the group connected, use a higher average_degree for nodes across the two groups.' raise ValueError(errormsg) r1 = [int(i) for i in group_1] r2 = [int(i) for i in group_2] n1 = list(np.arange(len(r1)).astype(int)) n2 = list(np.arange(len(r1), len(r1) + len(r2)).astype(int)) group = r1 + r2 sizes = [len(r1), len(r2)] for i in popdict: popdict[i]['contacts'].setdefault(setting, set()) # group is less than the average degree, so return a fully connected graph instead if len(group) <= average_degree: G = nx.complete_graph(len(group)) # group 2 is less than 2 people so everyone in group 1 must be connected to that lone group 2 individual, create a fully connected graph then remove some edges at random to preserve the degree distribution elif len(group_2) < 2: G = nx.complete_graph(len(group)) for i in n1: group_1_neighbors = [j for j in G.neighbors(i) if j in n1] # if the person's degree is too high, cut out some contacts if len(group_1_neighbors) > average_degree: ncut = len(group_1_neighbors) - average_degree # rough number to cut for k in range(ncut): j = np.random.choice(group_1_neighbors) G.remove_edge(i, j) group_1_neighbors.remove(j) else: share_k_matrix = np.ones((2, 2)) share_k_matrix *= average_degree / np.sum(sizes) if p_matrix is None: p_matrix = share_k_matrix.copy() # create a graph with edges within each groups and between members of different groups using the probability matrix G = nx.stochastic_block_model(sizes, p_matrix) # how many people in group 2 have connections they could cut to preserve the degree distribution group_2_to_group_2_connections = [] for i in n2: group_2_neighbors = [j for j in G.neighbors(i) if j in n2] if len(group_2_neighbors) > 0: group_2_to_group_2_connections.append(i) # there are no people in group 2 who can remove edges to other group 2 people, so instead, just add edges if len(group_2_to_group_2_connections) == 0: for i in n1: group_2_neighbors = [j for j in G.neighbors(i) if j in n2] # need to add a contact in group 2 if len(group_2_neighbors) == 0: random_group_2_j = np.random.choice(n2) G.add_edge(i, random_group_2_j) # some in group 2 have contacts to remove to preserve the degree distribution else: for i in n1: group_2_neighbors = [j for j in G.neighbors(i) if j in n2] # increase the degree of the node in group 1, while decreasing the degree of a member of group 2 at random if len(group_2_neighbors) == 0: random_group_2_j = np.random.choice(n2) random_group_2_neighbors = [ii for ii in G.neighbors(random_group_2_j) if ii in n2] # add an edge to random_group_2_j G.add_edge(i, random_group_2_j) # if the group 2 person has an edge they can cut to their own group, remove it if len(random_group_2_neighbors) > 0: random_group_2_neighbor_cut = np.random.choice(random_group_2_neighbors) G.remove_edge(random_group_2_j, random_group_2_neighbor_cut) E = G.edges() for e in E: i, j = e id_i = group[i] id_j = group[j] popdict[id_i]['contacts'][setting].add(id_j) popdict[id_j]['contacts'][setting].add(id_i) return popdict
[docs]def get_contact_counts_by_layer(popdict, layer='S', with_layer_ids=False): """ Method to count the number of contacts for individuals in the population based on their role in a layer and the role of their contacts. For example, in schools this method can distinguish the number of contacts between students, teachers, and non teaching staff in the population, as well as return the number of contacts between all individuals present in a school. In a population with a school layer and roles defined as students, teachers, and non teaching staff, this method will return the number of contacts or edges for sc_students, sc_teachers, and sc_staff to sc_student, sc_teacher, sc_staff, all_staff, all. all_staff is the combination of sc_teacher and sc_staff, and all is all kinds of people in schools. Args: popdict (dict) : popdict of a Pop object, Dictionary keys are the IDs of individuals in the population and the values are a dictionary layer (str) : name of the physial contact layer: H for households, S for schools, W for workplaces, C for community, etc. with_layer_ids (bool) : If True, return additional dictionary on contacts by layer group id Returns: If with_layer_ids is False: A dictionary with keys = people_types (default to ['sc_student', 'sc_teacher', 'sc_staff']) and each value is a dictionary which stores the list of counts for each type of contact: default to ['sc_student', 'sc_teacher', 'sc_staff', 'all_staff', 'all'] for example: contact_counter['sc_teacher']['sc_teacher'] store the counts of each teacher's contacts or edges to other teachers. If with_layer_ids is True: additionally return a dictionary with keys = layer_id (for example: scid, wpid...), and value is list of contact contacts. """ layer = layer.upper() # layer keys are used to identify the people in that layer layer_keys = {"S": "scid", "W": "wpid", "H": "hhid", "LTCF": "ltcfid"} # for all layers, 'all' contact_types will store counts for all contacts but # based on each different layer, there can be more contact_types for example in school layer, # there is sc_student, sc_staff etc if layer == 'S': people_types = ['sc_student', 'sc_teacher', 'sc_staff'] contact_types = people_types + ['all_staff', 'all'] elif layer == "LTCF": people_types = ['ltcf_res', 'ltcf_staff'] contact_types = people_types + ['all'] elif layer in ["W", "H"]: people_types = [layer_keys[layer]] contact_types = ['all'] else: raise NotImplementedError(f"layer {layer} not supported.") # initialize the contact counter between each people type and contact type as empty list contact_counter = {k: dict(zip(contact_types, ([] for _ in contact_types))) for k in dict.fromkeys(people_types)} # index_switcher is a case-switch selector for the person selected by its type index_switcher = {k: contact_counter[k] for k in people_types} # also store all contacts count per layer id in contacts_counter_by_id contacts_counter_by_id = dict() for uid, person in popdict.items(): if person[layer_keys[layer]] is not None: # count_switcher is a case-switch selector for contact counts by type count_switcher = { 'sc_student': len([c for c in person["contacts"]["S"] if popdict[c]['sc_student']]), 'sc_teacher': len([c for c in person["contacts"]["S"] if popdict[c]['sc_teacher']]), 'sc_staff': len([c for c in person["contacts"]["S"] if popdict[c]['sc_staff']]), 'ltcf_res': len([c for c in person["contacts"]["LTCF"] if popdict[c]['ltcf_res']]), 'ltcf_staff': len([c for c in person["contacts"]["LTCF"] if popdict[c]['ltcf_staff']]), 'all_staff': len([c for c in person["contacts"]["S"] if popdict[c]['sc_teacher']]) + len([c for c in person["contacts"]["S"] if popdict[c]['sc_staff']]), 'all': len([c for c in person["contacts"][layer]]) } contacts_counter_by_id.setdefault(person[layer_keys[layer]], []) for k1 in people_types: # if this person does not belong to a particular key, we don't need to store the counts under this key if person.get(k1) is not None: # store sc_teacher, sc_student, sc_staff, all_staff and all below if layer == "S": for k2 in people_types: index_switcher.get(k1)[k2].append(count_switcher.get(k2)) index_switcher.get(k1)["all_staff"].append( count_switcher.get('sc_teacher') + count_switcher.get('sc_staff')) # for other types, only all contacts are stored index_switcher.get(k1)["all"].append(count_switcher.get('all')) if with_layer_ids: contacts_counter_by_id[person[layer_keys[layer]]].append(count_switcher.get('all')) if with_layer_ids: return contact_counter, contacts_counter_by_id else: return contact_counter
[docs]def filter_people(pop, ages=None, uids=None): """ Helper function to filter people based on their uid and age. Args: pop (sp.Pop) : population ages (list or array) : ages of people to include uids (list or array) : ids of people to include Returns: array: An array of the ids of people to include for further analysis. """ output = np.arange(pop.n) if uids is not None: # catch instance where the only uids supplied is the first one, 0 output = np.intersect1d(output, uids) if ages is not None: # catch instance where the only ages supplied is age 0 output = np.intersect1d(output, sc.findinds(np.isin(pop.age_by_uid, ages))) return output
[docs]def count_layer_degree(pop, layer='H', ages=None, uids=None, uids_included=None): """ Create a dataframe from the population of people in the layer, including their uid, age, degree, and the ages of contacts in the layer. Args: pop (sp.Pop) : population layer (str) : name of the physial contact layer: H for households, S for schools, W for workplaces, C for community or other ages (list or array) : ages of people to include uids (list or array) : ids of people to include uids_included (list or None) : pre-calculated mask of people to include Returns: pandas.DataFrame: A pandas DataFrame of people in the layer including uid, age, degree, and the ages of contacts in the layer. """ if uids_included is None: uids_included = filter_people(pop, ages=ages, uids=uids) layerid_mapping = {'H': 'hhid', 'LTCF': 'ltcfid', 'S': 'scid', 'W': 'wpid'} degree_dicts = [] for i in uids_included: a = pop.age_by_uid[i] if pop.popdict[i][layerid_mapping[layer]] is not None: nc = len(pop.popdict[i]['contacts'][layer]) ca = [pop.age_by_uid[j] for j in pop.popdict[i]['contacts'][layer]] degree_dicts.append({'uid': i, 'age': a, 'degree': nc, 'contact_ages': ca}) degree_df = pd.DataFrame(degree_dicts) return degree_df
[docs]def compute_layer_degree_description(pop, layer='H', ages=None, uids=None, uids_included=None, degree_df=None, percentiles=None): """ Compute a description of the statistics for the degree distribution by age for a layer in the population contact network. See pandas.Dataframe.describe() for more details on all of the statistics included by default. Args: pop (sp.Pop) : population layer (str) : name of the physial contact layer: H for households, S for schools, W for workplaces, C for community or other ages (list or array) : ages of people to include uids (list or array) : ids of people to include uids_included (list or None): pre-calculated mask of people to include degree_df (dataframe) : pandas dataframe of people in the layer and their uid, age, degree, and ages of their contacts in the layer percentiles (list) : list of the percentiles to include as statistics Returns: pandas.DataFrame: A pandas DataFrame of the statistics for the layer degree distribution by age. """ if degree_df is None: degree_df = count_layer_degree(pop, layer, ages, uids, uids_included) if percentiles is None: percentiles = [0.05, 0.25, 0.5, 0.75, 0.95] d = degree_df.groupby('age')['degree'].describe(percentiles=percentiles) return d
[docs]def random_graph_model(uids, average_degree, seed=None): """ Generate edges for a group of individuals given their ids from an Erdos-Renyi random graph model given the expected average degree. Args: uids (list, np.ndarray) : a list or array of the ids of people in the graph average_degree (float) : the average degree in the generated graph Returns: nx.Graph : Fast implementation of the Erdos-Renyi random graph model. """ N = len(uids) if N == 0: raise ValueError(f"Expected uids to a non-empty list or array. Instead, the length of uids is {len(uids)}.") if average_degree >= N: log.debug(f"Desired average degree is greater than or equal to the number of nodes. This method does not support multi-edges; returning a fully connected graph.") G = nx.complete_graph(N) else: p = average_degree / N G = nx.fast_gnp_random_graph(N, p, seed=seed) return G
[docs]def get_expected_density(average_degree, n_nodes): """ Calculate the expected density of an undirected graph with no self-loops given graph properties. The expected density of an undirected graph with no self-loops is defined as the number of edges as a fraction of the number of maximal edges possible. Reference: Newman, M. E. J. (2010). Networks: An Introduction (pp 134-135). Oxford University Press. Args: average_degree (float) : average expected degree n_nodes (int) : number of nodes in the graph Returns: float: The expected graph density. """ E = n_nodes * average_degree / 2 Emax = n_nodes * (n_nodes - 1) / 2 density = min(E / Emax, 1) # capture when the average density is greater than the number of nodes - 1 return density