CS7280 Assignment 2 Degree Distributions and Network Top

Assignment 2: Degree Distributions and Network Topology
import collections
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats
from typing import Callable, List, Tuple, Dict

from powerlaw import *
# parse the “blog.txt” into graph
G = nx.read_edgelist(“blog.txt”, create_using=nx.DiGraph)
# Create an undirected version
G_undirected = G.to_undirected()
Part 1 – Assortativity and The Friendship Paradox
# Compute the Pearson correlation coefficieient for
# the degrees of adjacent nodes.
# use a one sample t-test to evaluate whether your
# answer is statistically significant.

def calculate_pvalue_and_pearson(G: nx.Graph) -> (float, float, str):
G: NetworkX graph object

p_value (float): p-value of one sample t-test of the correlation coefficient
r (float): the Pearson correlation coefficieient for the degrees of adjacent nodes
network_type[*0-2*]: return one string coresponding to the network type. It has to be “assortative”, “disassortative”, or “neutral.”

# This is a placeholder
r = 7280.00
p_value = 7280.00
network_type = [“assortative”, “disassortative”, “neutral”]

return p_value, r, network_type[0]
# Do not modify
p_value, r , network_type= calculate_pvalue_and_pearson(G_undirected)
print(“>>Part 1.1 result<<") print(p_value, r, network_type) def get_average_neighbor_degrees(G: nx.Graph) -> (List, List):
Given graph G, this function calculates the average node degrees for nodes with degree k.

G: NetworkX graph object

k: a sequence representing the unique node degrees of nodes in G
knn: a sequence representing the corresponding average neighbor degrees for each degree
in `k`. In other words, knn[i] is the average neighbor degree for nodes with degree k[i].
# This line is a placeholder

dictionary = dict()
k = list(dictionary.keys())
knn = list(dictionary.values())

return k, knn
def plot_average_neighbor_degree(k: List, knn: List):
k: dictionary keys
knn: dictionary values

Nothing, but draws the visualizations of the graphs.

# This line is a placeholder

# Do not modify
k, knn = get_average_neighbor_degrees(G_undirected)
plot_average_neighbor_degree(k, knn)
def average_neighbor_degree_test(k: List, knn: List) -> (float, float, str):
k: dictionary keys
knn: dictionary values

p_value (float): p-value of one sample t-test of the correlation coefficient
r (float): the Pearson correlation coefficieient
network_type[*0-2*]: return one string coresponding to the network type

# This line is a placeholder
r = 7280.00
p_value = 7280.00
network_type = [“assortative”, “disassortative”, “neutral”]

return p_value, r, network_type[1]

# Do not modify
print(“>>Part 1.2 result<<") average_neighbor_degree_test(k , knn) Provide an explaination for how you determined the network type Part 2 - Power-Law Distributions # Plot the out-degree distribution def out_degrees(G: nx.Graph) -> List:
G: NetworkX graph object

out_degrees: a list of out degrees
# This line is a placeholder
out_degrees = []

return out_degrees

def in_degrees(G: nx.Graph) -> List:
G: NetworkX graph object

in_degrees: a list of in degrees
# This line is a placeholder
in_degrees = []

return in_degrees
def plot_degrees(degrees: List):
Degrees: list of degrees

Nothing, but draws the visualizations of the graphs.
# This line is a placeholder
# Plot the out-degree distribution
# Do not modify
print(“>>Part 2.1 out-degree plot<<") l_out_degrees = out_degrees(G) plot_degrees(l_out_degrees) # Plot the in-degree distribution # Do not modify print(">>Part 2.1 in-degree plot<<") l_in_degrees = in_degrees(G) plot_degrees(l_in_degrees) #fit the out-degree distributon without xmax def fit_outdegrees_without_xmax(out_degrees: List) -> (float, float):
out_degrees: list of out degrees

alpha (float): alpha value from the fit function
xmin (float): xmin from the fit function

# This is a placeholder
alpha = 7280.00
xmin = 7280.00

return alpha, xmin
#fit the out-degree distributon with xmax = 200
def fit_outdegrees_with_xmax(out_degrees: List) -> (float, float):
out_degrees: list of out degrees

alpha (float): alpha value from the fit function
xmin (float): xmin from the fit function

# This is a placeholder
alpha = 7280.00
xmin = 7280.00

return alpha, xmin
# Fit the in-degree distributon without xmax
def fit_indegrees_without_xmax(in_degrees: List) -> (float, float):
in_degrees: list of in degrees

alpha (float): alpha value from the fit function
xmin (float): xmin from the fit function

# This is a placeholder
alpha = 7280.00
xmin = 7280.00

return alpha, xmin
# Fit the in-degree distributon with xmax = 300
def fit_indegrees_with_xmax(in_degrees: List) -> (float, float):
in_degrees: list of in degrees

alpha (float): alpha value from the fit function
xmin (float): xmin from the fit function

# This is a placeholder
alpha = 7280.00
xmin = 7280.00

return alpha, xmin
# Do not modify
a1, x1 = fit_outdegrees_without_xmax(l_out_degrees)
a2, x2 = fit_outdegrees_with_xmax(l_out_degrees)
a3, x3 = fit_indegrees_without_xmax(l_in_degrees)
a4, x4 = fit_indegrees_with_xmax(l_in_degrees)

print(“>>Part 2.2 results<<") print("Out degrees no xmax", "alpha: " + str(a1), "xmin: " + str(x1)) print("Out degrees with xmax", "alpha: " + str(a2), "xmin: " + str(x2)) print("In degrees no xmax", "alpha: " + str(a3), "xmin: " + str(x3)) print("In degrees with xmax", "alpha: " + str(a4), "xmin: " + str(x4)) Part 3 - The Small-World Property Clustering Coefficient and Transitivity # Find the largest strongly connected component of the directed network def largest_SCC(G: nx.Graph) -> nx.Graph:
G: NetworkX graph object

G_0: NetworkX graph object, the LCC undirected

# This is a placeholder
G_0 = nx.Graph()

return G_0
def create_random_network(G: nx.Graph) -> (nx.Graph, int, int):
G: NetworkX graph object

R: A random NetworkX graph object
n: n of the G(n,p)
p: p of the G(n,p)

# This is a placeholder
R = nx.Graph()

return R, n, p

# Plot the clustering coefficient of the LCC and a G(n,p) network in CCDF format
def get_ccdf(values: List) -> (Tuple, List):
values: clustering coefficient values

unique_value: tuple
ccdf: array or list

# This is a placeholder
unique_value = ()

return unique_value, ccdf

def get_clustering_coefficients(G: nx.Graph, R: nx.Graph) -> (Dict, Dict):
Compute the clustering coefficients for graphs G and R.
The results are put into two dicts: `clustering_coefficient` and `clustering_coefficient_r`.

G: NetworkX graph object
R: A random NetworkX graph object

clustering_coefficient: dict – holds the clustering coefficients for graph G. The keys
are the nodes and the values are the corresponding clustering
coefficients.
clustering_coefficient_r: dict – holds the clustering coefficients for random graph R.

clustering_coefficient = {}
clustering_coefficient_r = {}

return clustering_coefficient, clustering_coefficient_r

def plot_ccdf(G: nx.Graph,
get_ccdf: Callable, # function get_ccdf
clustering_coefficient: Dict,
clustering_coefficient_r: Dict):
G: NetworkX graph object
get_ccdf: ccdf function previously implemented
clustering_coefficient: dict
clustering_coefficient_r:dict of random

Nothing, but draws the visualizations of the graphs.

# This is a placeholder
# Do not modify
print(“>>Part 3.1 and 3.2 results<<") G_0 = largest_SCC(G) R, n, p = create_random_network(G_0) clustering_coefficient, clustering_coefficient_r = get_clustering_coefficients(G_0, R) plot_ccdf(G_0, get_ccdf, clustering_coefficient, clustering_coefficient_r) def KS_test(clustering_coefficient: Dict, clustering_coefficient_r: Dict) -> (scipy.stats.kstest):
clustering_coefficient: dict
clustering_coefficient_r:dict of random

ks: KstestResults object

# This is a placeholder
ks = scipy.stats.kstest(list(clustering_coefficient.values()), list(clustering_coefficient_r.values()))

# do not modify
print(“>>Part 3.3 results<<") KS_test(clustering_coefficient, clustering_coefficient_r) # Plot the average clustering coefficient as a function of the node degree def average_metric_on_degree(G: nx.Graph, metric: Dict) -> Dict:
Given a graph G and a dict `metric` that contains some metric value by node, compute the
average metric value by node degree.

G: NetworkX graph object
metric: A `metric` dict, where the keys are the nodes and the values are the metric values of the nodes.
An example of the metric is the cluster coefficient.

k_coef: dict – the keys are the node degrees and the values are the corresponding average metric values

k_coef = {}
return k_coef

def plot_average_clustering_coefficient(G_0: nx.Graph,
average_metric_on_degree: Callable,
R: nx.Graph):
G: NetworkX graph object
average_metric_on_degree: function you implemented
R: Random NetworkX graph object

Nothing, but draws the visualizations of the graphs.

# Do not modify
print(“>>Part 3.4 results<<") plot_average_clustering_coefficient(G_0, average_metric_on_degree, R) # Plot the transitivity coefficient of the overall network of the LCC and the 100 G(n,p) networks def get_transitivities_and_random_graphs(n: int, p: int, N: int) -> (List, List):
n: n of the G(n,p)
p: p of the G(n,p)
N: number of random graphs to generate

transitivities: list of transitivities corresponding to the random graphs in R_graphs
R_graphs: list of random graphs

# This is a placeholder
transitivities = []
R_graphs = []

return transitivities, R_graphs
def plot_transitivity_coefficients(G: nx.Graph, transitivities: List):
G: NetworkX graph object
transitivities: list of transitivities

Nothing, but draws the visualizations of the graphs.

# do not modify

print(“>>Part 3.5 results<<") R, n, p = create_random_network(G_0) transitivities, R_graphs = get_transitivities_and_random_graphs(n, p, 100) plot_transitivity_coefficients(G_0, transitivities) CPL/ASPL and Diameter def get_diameters_and_aspls(R_graphs: List) -> (List, List):
R_graphs: list of random graphs

diameters: list of corresponding diameters
aspls: list of coresponding aspls

# This is a placeholder
diameters = []
aspls = []

return diameters, aspls
def plot_diameters(G: nx.Graph, diameters: List):
G: NetworkX graph object
diameters: list of diameters

Nothing, but draws the visualizations of the graphs.

# Plot the average shortest path length of the LCC
# and the 100 G(n,p) networks
def plot_aspls(G: nx.Graph, aspls: List):
G: NetworkX graph object
aspls: list of aspls

Nothing, but draws the visualizations of the graphs.

# Do not modify
print(“>>Part 3.6 results<<") diameters, aspls = get_diameters_and_aspls(R_graphs) plot_diameters(G_0, diameters) plot_aspls(G_0, aspls) def diameter_ttest(G: nx.Graph, diameters: List) -> scipy.stats.ttest_ind:
G: NetworkX graph object
diameters: list of diameters

ttest: TtestResult object

# This is a placeholder
ttest = ()
return ttest
def aspls_ttest(G: nx.Graph, aspls: List) -> scipy.stats.ttest_ind:
G: NetworkX graph object
aspls: list of aspls

ttest: TtestResult object

# This is a placeholder
ttest = ()
return ttest
print(“>>Part 3.7 results<<") ttest_diameter = diameter_ttest(G_0, diameters) ttest_aspls = aspls_ttest(G_0, aspls) print(ttest_diameter,"\n", ttest_aspls) Are the values statistically different? Are they within the same order of magnitude? Small World Property Based on your findings in 3.1-3.7, can you conclude that G0 is a small-world network? Explain which metrics you used and how they support your conclusion. Part 4 - Motifs def get_LCC(G: nx.Graph) -> nx.Graph:
G: NetworkX graph object

lcc: Largest Connected Component (Directed) of G

# This is a placeholder
lcc = nx.Graph()

return lcc
def get_motif_59(G: nx.Graph) -> (set, set):
G: NetworkX graph object

type_5_motif: set of type 5 motifs
type_9_motif: set of type motifs

# This is a placeholder
type_5_motif = set()
type_9_motif = set()

return type_5_motif, type_9_motif
# Number of motifs in empirical networks
def get_emperical_motifs(get_motif_59: Callable,
lcc: nx.Graph) -> (int, int):
get_motif_59: function you already implemented
lcc: NetworkX graph object

type_5_motif: int – number of type 5 motifs
type_9_motif: int – number of type 9 motifs

# This is a placeholder
num_5_motif, num_9_motif = 0, 0

return num_5_motif, num_9_motif

# Number of motifs in random networks
def get_random_motifs(get_motif_59: Callable,
lcc: nx.Graph) -> (List, List):
get_motif_59: function you already implemented
lcc: NetworkX graph object

type_5_motifs: list – the list is of size N, where N is the number of
randomly generated graphs. The elements are the number
of type 5 motifs for the N generated random graphs.
type_9_motifs: list – same as above for type 9 motifs

# This is a placeholder
num_5_motifs, num_9_motifs = [], []

return num_5_motifs, num_9_motifs
def plot_5_motifs(num_5_motifs: List, num_5_motif: List):
num_5_motifs:
num_5_motif:

Nothing, but draws the visualizations of the graphs.

def plot_9_motifs(num_9_motifs: List, num_9_motif: List):
num_9_motifs:
num_9_motif:

Nothing, but draws the visualizations of the graphs.

# do not modify
print(“>>Part 4.1 results<<") lcc = get_LCC(G) num_5_motif, num_9_motif = get_emperical_motifs(get_motif_59, lcc) num_5_motifs, num_9_motifs = get_random_motifs(get_motif_59, lcc) plot_5_motifs(num_5_motifs, num_5_motif) plot_9_motifs(num_9_motifs, num_9_motif) def motif5_ttest(num_5_motifs: List, num_5_motif: List): num_5_motifs: num_5_motif: ttest: TtestResult object # This is a placeholder ttest = () return ttest def motif9_ttest(num_9_motifs: List, num_9_motif: List): num_9_motifs: num_9_motif: ttest: TtestResult object # This is a placeholder ttest = () return ttest print(">>Part 4.2 results<<") motif5_tstat = motif5_ttest(num_5_motifs, num_5_motif) motif9_tstat = motif9_ttest(num_9_motifs, num_9_motif) print(motif5_tstat, motif9_tstat) Which of the previous triplet types are statistically more common (or less common) in the network of the LCC compared to what we would expense based on chance? Part 5 - Transitivity and the Average Clustering Coefficient