Social Network Analytics

Social Network Analytics

Social Network Analytics (with a Case Study in Python)

import networkx as nx

import matplotlib.pyplot as plt

%matplotlib notebook

G = nx.Graph()

G.add_edge(‘A’,’B’,weight=13,relation=’friend’)

G.add_edge(‘B’,’C’,weight=9,relation=’family’)

G.add_edge(‘B’,’D’,weight=7,relation=’friend’)

G.add_edge(‘E’,’B’,weight=10,relation=’friend’)

G.add_edge(‘E’,’A’,weight=1,relation=’enemy’)

G.add_edge(‘F’,’B’,weight=13,relation=’family’)

G.edges(data=True)

[(‘C’, ‘B’, {‘relation’: ‘family’, ‘weight’: 9}),

(‘E’, ‘B’, {‘relation’: ‘friend’, ‘weight’: 10}),

(‘E’, ‘A’, {‘relation’: ‘enemy’, ‘weight’: 1}),

(‘B’, ‘F’, {‘relation’: ‘family’, ‘weight’: 13}),

(‘B’, ‘A’, {‘relation’: ‘friend’, ‘weight’: 13}),

(‘B’, ‘D’, {‘relation’: ‘friend’, ‘weight’: 7})]

G.add_edge(‘A’,’B’,weight=13,relation=’friend’)

G.add_edge(‘B’,’C’,weight=9,relation=’family’)

G.add_node(‘A’,role=’Trader’)

G.add_node(‘B’,role=’Analyst’)

G.add_node(‘C’,role=’Manager’)

G.nodes(data=True)

[(‘C’, {‘role’: ‘Manager’}),

(‘B’, {‘role’: ‘Analyst’}),

(‘A’, {‘role’: ‘Trader’})]

from networkx.algorithms import bipartite

B = nx.Graph()

B.add_nodes_from([‘A’,’B’,’C’,’D’,’E’],bipartite=0)

B.add_nodes_from([1,2,3,4],bipartite=1)

G.add_edges_from([(‘A’,1),(‘B’,1),(‘C’,1),(‘C’,3),(‘D’,4),(‘E’,1),(‘A’,2),(‘E’,2)])

bipartite.is_bipartite(B)

True

import networkx as nx

import pandas as pd

import numpy as np

import pickle

G = nx.read_gpickle(‘Social_Network.txt’)

print(nx.info(G))

Type: Graph

Number of nodes: 1005

Number of edges: 16706

Average degree: 33.2458

from sklearn.neural_network import MLPClassifier

from sklearn.preprocessing import MinMaxScaler

for node in G.nodes():
G.node[node][‘community’] = G.node[node][‘Department’]

preferential_attachment = list(nx.preferential_attachment(G))

df = pd.DataFrame(index=[(x[0], x[1]) for x in preferential_attachment])

df[‘preferential_attachment’] = [x[2] for x in preferential_attachment]

SH = list(nx.cn_soundarajan_hopcroft(G))

df_SH = pd.DataFrame(index=[(x[0], x[1]) for x in SH])

df_SH[‘soundarajan_hopcroft’] = [x[2] for x in SH]

df = df.join(df_SH,how=’outer’)

M = mean(df[‘soundarajan_hopcroft’] )

df[‘soundarajan_hopcroft’] = df[‘soundarajan_hopcroft’].fillna(value=M)

df[‘resource_allocation_index’] = [x[2] for x in list(nx.resource_allocation_index(G))]

df[‘jaccard_coefficient’] = [x[2] for x in list(nx.jaccard_coefficient(G))]

df = future_connections.join(df,how=’outer’)

df_train = df[~pd.isnull(df[‘Future Connection’])]

df_test = df[pd.isnull(df[‘Future Connection’])]

features = [‘cn_soundarajan_hopcroft’, ‘preferential_attachment’, ‘resource_allocation_index’, ‘jaccard_coefficient’]

%%Creating an MLPClassifier Model for predicting the future links

X_train = df_train[features]

Y_train = df_train[‘Future Connection’]

X_test = df_test[features]

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train)

X_test_scaled = scaler.transform(X_test)

clf = MLPClassifier(hidden_layer_sizes = [10, 5], alpha = 5,random_state = 0, solver=’lbfgs’, verbose=0)

clf.fit(X_train_scaled, Y_train)

test_proba = clf.predict_proba(X_test_scaled)[:, 1]

predictions = pd.Series(test_proba,X_test.index)

target = future_connections[pd.isnull(future_connections[‘Future Connection’])]

target[‘prob’] = [predictions[x] for x in target.index]