tutorial.Rmd

---
title: "Gnorm: A new centrality index designed for multilayer networks"
output: html_document
date: "2023-06-15"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# TUTORIAL

This tutorial is part of the supplement to the paper: Lotfi N, Requejo HS, Rodrigues F, Mello MAR. (2023). A new centrality index designed for multilayer networks. Methods in Ecology and Evolution: *in press*.

Authors: Nastaran Lotfi, Henrique S. Requejo, Francisco A. Rodrigues & Marco A. R. Mello.

[Ecological Synthesis Lab](https://marcomellolab.wordpress.com) (SintECO), University of São Paulo, Brazil.

Contact: [*nas.naslot\@gmail.com*](mailto:nas.naslot@gmail.com){.email}{.email} OR [*marmello\@usp.br*](mailto:marmello@usp.br){.email}{.email}

See [README](https://github.com/Nastaranlotfi/Gnorm-new-centrality-index-multilayer-networks/blob/main/README.md) for further info.

## Objective

This tutorial, prepared as an RMD notebook, aims to help you calculate `Gnorm` for the example data used in our paper. You can adapt it to calculate `Gnorm` for your own data.

## Summary

1.  [Setting the stage](#data)
2.  [Building the network](#network)
3.  [Calculating Gnorm](#Gnorm)
4.  [Summarizing Gnorm results](#Summarize)
5.  [Plotting Gnorm curves](#plot)
6.  [Exporting Gnorm results](#export)

## 1. Setting the stage {#data}

Set the working directory:

```{r}
cat("\014") 
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
```

Delete all previous objects:

```{r}
rm(list= ls())
```

Load the required packages:

```{r}
library(akima)
library(plyr)
library(dplyr)
library(ggplot2)
library(igraph)
library(multinet)
library(png)
library(RColorBrewer)
library(pheatmap)
library(plot3D)
```

Create the output folders:

```{r}

rm(list= ls())

if (!dir.exists(path = "data")){
  dir.create(path = "data")
} else {
  print("Dir already exists!")
}

if (!dir.exists(path = "input")){
  dir.create(path = "input")
} else {
  print("Dir already exists!")
}

if (!dir.exists(path = "figures")){
  dir.create(path = "figures")
} else {
  print("Dir already exists!")
}

if (!dir.exists(path = "results")){
  dir.create(path = "results")
} else {
  print("Dir already exists!")
}

currentTime_start <- Sys.time()
```

Source the auxiliary functions:

```{r}

source("Aux_functions.R", encoding="utf-8")
```

Load the data:

```{r}
data = read.csv("input/links_clean.csv", header=T, as.is=T)
```

Inspect the data:

```{r}
head(data)
tail(data)

currentTime_prep <- Sys.time()
```

Define the edge list of the network:

```{r}
Fruit1 <- list()
Fruit2 <- list()
Fruit3 <- list()
Fruit4 <- list()
Nectar1 <- list()
Nectar2 <- list()
Nectar3 <- list()
Nectar4 <- list()

leng<-dim(data)[1]
k2=1
k1=1
for (i in 1:leng) {
	if(data[[i,3]]=="Frugivory"){
		Fruit1[k1]<-c(data[[i,1]])
		Fruit2[k1]<-c(data[[i,2]])
		Fruit3[k1]<-c(1)
		Fruit4[k1]<-c(data[[i,3]])
		#cat('hi',"\n")
		k1=k1+1
		}
	if(data[[i,3]]=="Nectarivory"){
		Nectar1[k2]<-c(data[[i,1]])
		Nectar2[k2]<-c(data[[i,2]])
		Nectar3[k2]<-c(2)
		Nectar4[k2]<-c(data[[i,3]])
		
		k2=k2+1
		}
		
	}


Fruit<-list()
Nectar<-list()
Fruit<-cbind(Fruit1,Fruit2,Fruit3,Fruit4)
Nectar<-cbind(Nectar1,Nectar2,Nectar3,Nectar4)

Links<-rbind(Fruit,Nectar)
colnames(Links) <- c("from","to", "layer_num", "layer")

dim(Links)
head(Links)
tail(Links)

currentTime_link <- Sys.time()
cat('end_link_construction', "\n")

```

Define the node list of the network:

```{r}
name1=unique(data$CurrentBatSpecies)
name1<- name1[order(name1) ]

Fa1=rep("Bats",length(name1))
Fa2=rep(1,length(name1))
Fa3=rep(1,length(name1))

name2=unique(data$CurrentPlantSpecies)
name2<- name2[order(name2) ]

Na1=rep("Plants",length(name2))
Na2=rep(2,length(name2))
Na3=rep(1,length(name2))


Fa<-cbind(name1,Fa1,Fa2,Fa3)
Na<-cbind(name2,Na1,Na2,Na3)

Nodes<-rbind(Fa,Na)
colnames(Nodes) <- c("name","taxon","taxon.label","species.size")

dim(Nodes)
head(Nodes)
tail(Nodes)
```

Save the nodes and links:

```{r}
write.csv(Nodes,"data/nodes1.csv", row.names = FALSE)
write.csv(Links,"data/links1.csv", row.names = FALSE)

currentTime_node <- Sys.time()
cat('end_node_construction', "\n")

```

## 2. Building the network {#network}

Find the giant component:

```{r}

nodes1 = read.csv("data/nodes1.csv", header=T, as.is=T)
links1 = read.csv("data/links1.csv", header=T, as.is=T)


net_mono1 = graph_from_data_frame(d = links1, vertices = nodes1, directed = F)

c=clusters(net_mono1, mode="weak") #finding the clusters
b=which.max(c$csize) #find the max
v=V(net_mono1)[c$membership!=b] #find the names of nodes in the max component

b1=split(names(v),v) #formating the v file into a list
b2=list()
for (i in 1:length(b1)){
	b2=append(b2,b1[[i]])}

b2=unlist(b2)

df1<-nodes1
df2<-links1

for (i in 1:length(b2)){#remove the nodes that don't belong to the max component
	df1<-df1 %>% filter(!name==b1[i])}

for (i in 1:length(b2)){#removing the links related to the removed nodes
	df2<-df2 %>% filter(!from==b1[i])
	df2<-df2 %>% filter(!to==b1[i])}


write.csv(df1,"data/nodes2.csv", row.names = FALSE)
write.csv(df2,"data/links2.csv", row.names = FALSE)

currentTime_compo <- Sys.time()
cat('end_names_filtering-by-max-component', "\n")

```

Build multilayer networks for both the complete empirical network and its giant component:

```{r}

# Complete network #1
nodes1 = read.csv("data/nodes1.csv", header=T, as.is=T)
links1 = read.csv("data/links1.csv", header=T, as.is=T)

nodes1 = nodes1[order(nodes1$name),] 

net_multinet1 = Convert_to_Multinet(nodes1, links1)


# The giant component of the network #2
nodes2 = read.csv("data/nodes2.csv", header=T, as.is=T)
links2 = read.csv("data/links2.csv", header=T, as.is=T)

nodes2 = nodes2[order(nodes2$name),] 

net_multinet2 = Convert_to_Multinet(nodes2, links2)


# Compare the complete network to its giant component
net_multinet1
net_multinet2


currentTime_netcons <- Sys.time()
cat('end_network_construction', "\n")
```

Inspect the complete empirical network and its giant component:

```{r}

links_no_dupl1 = links1[-which(duplicated(links1[,c("from", "to")])==T),] 
net_layout1 = graph_from_data_frame(d = links_no_dupl1,
                                   vertices = nodes1, directed = F) 
layout1 = layout_nicely(net_layout1) 

png(filename="figures/network_visualization_complete.png", 
    res = 300, width = 4000, height = 2200)
Custom_plot2D(links1, nodes1, layout1, vertex_label_cex = NULL, vertex_size = 3)
dev.off()


# The giant component of the network #2
links_no_dupl2 = links2[-which(duplicated(links2[,c("from", "to")])==T),] 
net_layout2 = graph_from_data_frame(d = links_no_dupl2,
                                   vertices = nodes2, directed = F) 
layout2 = layout_nicely(net_layout2) 

png(filename="figures/network_visualization_component.png", 
    res = 300, width = 4000, height = 2200)
Custom_plot2D(links2, nodes2, layout2, vertex_label_cex = NULL, vertex_size = 3)
dev.off()


Custom_plot2D(links1, nodes1, layout1, vertex_label_cex = NULL, vertex_size = 3)
Custom_plot2D(links2, nodes2, layout2, vertex_label_cex = NULL, vertex_size = 3)

currentTime_netvis <- Sys.time()
cat('end_network_visualization', "\n")
```

## 3. Calculating Gnorm {#Gnorm}

In the subsequent analyses, we'll work only with the giant component. Set the main parameters used to calculate Gnorm:

```{r}

# Partitioning, omega, gamma, and number of iterations (for getting the mean)
partitions_of_omega = 10 #Number of partitions
seq_G = Create_seq_G_Merged(net_multinet2, partitions_of_omega)
vec_W = Create_vec_W(partitions_of_omega)
gamma_min = 0.25
gamma_max = 4
gamma_spacing = 0.25
gammas = seq(from = gamma_min, to = gamma_max, by = gamma_spacing)
iterations = 2 #Increasing the number of iterations increases the computing time
               #considerably. But for stable, reliable results use at least 100.

# Saving lists definition
Seq_G_Mean_gamma_list = list() 
G_norm_list = list()

# G_analysis
cont_perc = 1 # Calculation of running progress

for (gamma_index in 1:length(gammas)) {
	start_time <- round(as.numeric(Sys.time()))
  	seq_G_list = list()
    	for (i in 1:iterations) {
    		seq_G_list[[i]] = Create_seq_G_Merged(net_multinet2, 
    		                                      partitions_of_omega,
    		                                      gamma = gammas[gamma_index])
    		                                      
    		#####Run-time approximation
    		if (cont_perc==1 ){
    			end_time <- round(as.numeric(Sys.time()))
			time_taken <- round(end_time - start_time,2)
			print (time_taken)
		
			cat("Estimated time needed for run (secs): ", time_taken*(iterations*length(gammas)),"\n" )}
			#cat("\n")}
			#print (time_taken)
		
    		cat(cont_perc*100/(iterations*length(gammas)), "%  ")###print the run progress
    		cont_perc = cont_perc + 1
  		}#end of iterations
  
  
  #Removing names
  	seq_G_list_no_names = list()
  	for (i in 1:length(seq_G_list)) {
		seq_G_list_temp = seq_G_list[[i]]
		seq_G_list_temp[,1] = 1
		seq_G_list_no_names[[i]] = seq_G_list_temp
  		}#end of seg_G_list
  
  #Summation of Gvalues during the iteration
  	seq_G_sum = seq_G_list_no_names[[1]]
	for (i in 2:length(seq_G_list)) {
		seq_G_sum = seq_G_sum + seq_G_list_no_names[[i]]
		}#end of sum for 100 iterations
		#seq_G_sum
  
  	#Finding the mean-G_value over iteration
	seq_G_mean = seq_G_sum / iterations
  
  	#Adding names
	seq_G_mean[,1] = seq_G_list[[1]]$actor
  
  	#STD-calculation
	seq_G_StdDev = StdDev_list_of_seq_G(seq_G_list)
  
  	#Sorting with G_norm
	nodes_G_norm = Sort_Nodes_by_Total_G(seq_G_mean, ordered = FALSE)
	nodes_G_norm_Ordered = Sort_Nodes_by_Total_G(seq_G_mean, ordered = TRUE)

  	#Saving G_values respect to gamma
	Seq_G_Mean_gamma_list[[gamma_index]] = cbind(seq_G_mean, gammas[gamma_index])
	G_norm_list[[gamma_index]] = nodes_G_norm
  
	}#end of gamma

##Finding mean over Gamma
G_norm_sum = G_norm_list[[1]]
for (i in 2:length(G_norm_list)) {
	G_norm_sum = G_norm_sum + G_norm_list[[i]]
	}
G_norm_mean = G_norm_sum / (length(G_norm_list))

##Sorting G_norm_mean
G_norm_mean_ordered =  sort(G_norm_mean, decreasing = TRUE)

save(gammas, vec_W, iterations, partitions_of_omega, links2, nodes2,
     Seq_G_Mean_gamma_list,G_norm_mean, G_norm_mean_ordered,
     file = "results/Bat_Net.RData")

currentTime_Gnorm <- Sys.time()
cat('end_Gnorm', "\n")

```

## 4. Summarizing Gnorm results {#Summarize}

Inspect the distribution of Gnorm values in the network:

```{r}
partitions_of_omega1 = 4 
gamma_min1 = 0.5
gamma_max1 = 3.5
gamma_spacing1 = 0.5

plots = Plot_number_modularity(partitions_of_omega1,
                             gamma_min1,
                             gamma_max1,
                             gamma_spacing1,
                             net_multinet2)

currentTime_modularity <- Sys.time()
cat('end_modularity', "\n")


################### G-NORM FREQUENCY ###########################################


load("results/Bat_Net.RData")

G_plot<-G_norm_mean 
names(G_plot)<-NULL 
df<-unlist(G_plot) 

png(filename="figures/hist_Gnorm.png", 
    res = 500, width = 4000, height = 3000)
labs = colnames(df)

hist(df,breaks=5,col="darkmagenta", xlim=c(1,2),
     main="Distribution of Gnorm", xlab='G_norm',cex=40,pch = 190,cex.lab = 1.6,cex.main=2,col.main="#515357")

dev.off()


hist(df,breaks=5,col="darkmagenta", xlim=c(1,2),
     main="Distribution of Gnorm", xlab='G_norm',cex=40,pch = 190,cex.lab = 1.6,cex.main=2,col.main="#515357")
currentTime_gnormfreq <- Sys.time()
cat('end_gnormfreq', "\n")


```

## 5. Plotting Gnorm curves {#plot}

Inspect the Gnorm curves produced for each node of the network. In this tutorial, we've selected only the most important species pointed out by a previous study, based on other centrality metrics (see [Mello et al. 2019 NatEcoEvo](https://doi.org/10.1038/s41559-019-1002-3)).

In the following, we produce two major kinds of plots: (1) 2D line plots and (2) 2D heatmaps. (1) The 2D line plots show how Gnorm varies with omega for the selected species. Different lines indicate different values of gamma. (2) The 2D heatmaps show how Gnorm varies with omega and gamma. The blue-red gradient represents the mean Gnorm values calculated for the selected species.

```{r}
load("results/Bat_Net.RData")
seq_Gnorm_gamma_mean = Unite_list_of_dataframes(Seq_G_Mean_gamma_list)
selection =read.csv("input/Names_impo.csv",  as.is=1)
selection = selection[order(selection$name),]
for (i in 1:length(selection)) {
  
  chosen_node = selection[i]
  png_name = paste("figures//important_",selection[i], "_2d.png", sep = "")
  png(png_name, width = 700, height = 700)
  plots = G_curves_for_different_gammas(seq_Gnorm_gamma_mean, chosen_node, vec_W, gammas)
  plot(plots)
  dev.off()
  png_name = paste("figures//important_",selection[i],"_heat.png", sep = "")
  png(png_name, width = 700, height = 700)
  Plot_G_gamma_omega_heat_3D(seq_Gnorm_gamma_mean, chosen_node, vec_W, gammas)
  dev.off()

}#end for

```

## 6. Exporting Gnorm results {#export}

Export the Gnorm values as a TXT file, organized by network layer and species.

```{r}
nodes3<-list()

nodes3<-cbind.data.frame(name=nodes2$name,taxon=nodes2$taxon,taxon.lable=nodes2$taxon.label,G_norm_mean=G_norm_mean)
nodes3<-nodes3[with(nodes3, order(taxon,name)), ]
rownames(nodes3) <- 1:nrow(nodes3)

write.table(nodes3,file = "results/names_Gnorm.txt")

```