forked from EDJNet/internet_speed
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1-data_analysis.R
113 lines (79 loc) · 3.06 KB
/
1-data_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
## Libraries
if (!require("pacman")) install.packages("pacman")
pacman::p_load_gh("teamookla/ooklaOpenDataR") # Source for Speedtest data
pacman::p_load_gh("giocomai/latlon2map") # Source for European geometries
pacman::p_load(tidyverse,
sf,
here,
lubridate)
## Geometries
lau <- ll_get_lau_eu() %>%
rename("id" = "GISCO_ID",
"name" = "LAU_NAME") %>%
select(id, name)
nuts_3 <- ll_get_nuts_eu(level = 3, resolution = 1) %>%
rename("id" = "NUTS_ID",
"name" = "NAME_LATN") %>%
select(id, name)
nuts_2 <- ll_get_nuts_eu(level = 2, resolution = 1) %>%
rename("id" = "NUTS_ID",
"name" = "NAME_LATN") %>%
select(id, name)
nuts_0 <- ll_get_nuts_eu(level = 0, resolution = 1) %>%
rename("id" = "NUTS_ID",
"name" = "NAME_LATN") %>%
select(id, name)
## Vectors to loop
year <- c(2019, 2020, 2021, 2022)
quarter <- c(1, 2, 3, 4)
level <- c("lau", "nuts_3", "nuts_2", "nuts_0")
## Folders
dir.create(here("data"))
dir.create(here("data", "raw_data"))
## Turn off spherical geometries
sf_use_s2(F)
## Loop to download raw data/analyse/write outputs
for (i in year) {
for (j in quarter) {
if (!file.exists(here("data", "raw_data", paste(i, j, sep = "_") %>% paste0(".rds")))) {
df <- get_performance_tiles("fixed", year = i, quarter = j, sf = T) %>%
write_rds(here("data", "raw_data", paste(i, j, sep = "_") %>% paste0(".rds")))
for (k in level) {
dir.create(here("data", k))
st_join(get(k), df) %>%
st_set_geometry(NULL) %>%
group_by(id, name) %>%
summarise(across(contains("avg"), weighted.mean, w = tests, na.rm = T)) %>%
mutate(quarter = yq(paste(i, j, sep = "_")),
avg_d = round(avg_d_kbps/1000, 2),
avg_u = round(avg_u_kbps/1000, 2),
avg_l = round(avg_lat_ms, 2),
.keep = "unused") %>%
write_csv(here("data", k, paste(k, i, j, sep = "_") %>% paste0(".csv")))
}
} else {
df <- read_rds(here("data", "raw_data", paste(i, j, sep = "_") %>% paste0(".rds")))
for (k in level) {
dir.create(here("data", k))
st_join(get(k), df) %>%
st_set_geometry(NULL) %>%
group_by(id) %>%
summarise(across(contains("avg"), weighted.mean, w = tests, na.rm = T)) %>%
mutate(quarter = yq(paste(i, j, sep = "_")),
avg_d = round(avg_d_kbps/1000, 2),
avg_u = round(avg_u_kbps/1000, 2),
avg_l = round(avg_lat_ms, 2),
.keep = "unused") %>%
write_csv(here("data", k, paste(k, i, j, sep = "_") %>% paste0(".csv")))
}
}
}
}
## Timeseries
dir.create(here("data", "timeseries"))
for (k in level) {
list.files(here("data", k)) %>%
map_df(~read_csv(here("data", k, .x))) %>%
arrange(id, quarter) %>%
write_csv(here("data", "timeseries", paste("timeseries_", ".csv", sep = k)))
}