-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcuckoo Movebank data tidying source_new data 2014.R
176 lines (128 loc) · 6.77 KB
/
cuckoo Movebank data tidying source_new data 2014.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
##########################################################
#
# Cuckoo ORIGINAL data - tidying source file, adding new variables (distance moved between points, bearing, movement groups, and movement types)
#
# Samantha Franks
# 15 Nov 2013
# 4 Dec 2013
# 23 Dec 2013
# 2 Sep 2014 - re-run with Argos Movebank data (pre-filtered) and new birds
#
#
##########################################################
######### NOTES ##########
# code requires a data file object called d, which is a .csv file downloaded straight from Movebank
# outputs an object called fulloriginal
library(rgdal)
library(maptools)
library(shape)
library(splancs)
library(geosphere)
library(adehabitat) #
library(sp) # converts R dataframes into spatially explicit dta
library(rgeos)
library(raster)
library(adehabitatHR)
library(chron)
library(plyr)
###-----------------------------------------------###
# Load cuckoo data & tidying
###-----------------------------------------------###
# cuckoo coordinate locations are in Lat-Long and UTM coordinates and WGS84 datum
# setwd("C:/Users/samf/Documents/Git/cuckoos/data")
############## USES ARGOS MOVEBANK PRE-FILTERED DATA
# d <- read.csv("BTO Cuckoo migration study_Movebank_20141128.csv.csv", header=T)
d2 <- with(d, data.frame(name=individual.local.identifier, id=event.id, timestamp, long=location.long, lat=location.lat, loc.class=argos.lc, error.major=argos.semi.major, error.minor=argos.semi.minor))
d2$long <- as.numeric(as.character(d2$long))
# d2$name <- revalue(d2$name, c("lloyd"="Lloyd", "Rob Roy"="Roy", "Jake"="Patch"))
x <- do.call(rbind,(strsplit(as.character(d2$timestamp), " ")))
dat.datetime <- chron(x[,1], x[,2], format=c(dates="y-m-d", times="h:m:s"))
d2 <- data.frame(d2, datetime=dat.datetime, year=as.numeric(as.character(years(dat.datetime))), month=as.numeric(months(dat.datetime)), day=as.numeric(days(dat.datetime)))
d2 <- d2[order(d2$name, d2$datetime),]
# removes locations outside Afro-Palearctic (reference points in North America plus any really weird points in the Atlantic)
d3 <- subset(d2, long > -30)
# d4 <- subset(d3, loc.class=="1" | loc.class=="2" | loc.class=="3") # subset by best location classes
if (length(rm.names) > 0) {
d5 <- d3[-grep(paste(rm.names, collapse="|"), d3$name),]
} else {
d5 <- d3
}
d5 <- droplevels(d5)
# remove first Waller point which is given as in the North Sea
d5 <- d5[-which(d5$name=="Waller" & d5$id==192215226),]
###-----------------------------------------------###
# Add distance, bearing, mgroup and mtype to original data
###-----------------------------------------------###
newdat <- split(d5, list(d5$name)) # split dataset by individual bird
trimdata <- function(dat) { # distance, bearing, etc function
# for each element of data, do the following
# order dat according to location_date - not needed if dataset has already been ordered in Excel by location_date
### --- DISTANCE & BEARING CALCULATION --- ###
# calculate distances and bearings
# distbearmvmt <- function(input){ # requires 2 variables called newlongs & newlats or long & lat or centroidlong & centroidlat
# use newlongs & newlats to calculate distances based on resampled points
# use long & lat to calculate distances based on original measured location point
input <- dat
dist.cuckoo <- input[,c("long","lat")]
coordinates(dist.cuckoo) <- c("long", "lat")
proj4string(dist.cuckoo) <- CRS("+init=epsg:4326")
d1 <- dist.cuckoo@coords
d2 <- dist.cuckoo@coords[-1,] # create new matrix minus the first row so that d2 starts at the second observation
last.d2 <- matrix(c(0,0), nrow=1, dimnames=list(1, c("long","lat"))) # create a placeholder last row in the second distance matrix
d2 <- rbind(d2, c(0,0))
dist <- distCosine(d1,d2)/1000 # distance between points, in km
bear <- bearing(d1,d2) # bearing between points
dist <- c(NA,dist[-length(dist)])
bear <- c(NA,bear[-length(bear)])
distbear <- data.frame(input, distance=dist, bearing=bear)
### --- MOVEMENT GROUPS --- ###
mgroup <- rep(NA,nrow(distbear))
for (n in 1:nrow(distbear)){
if (is.na(distbear$distance[n])) {
mgroup[n] <- 1
} else if (distbear$distance[n] <= 30) {
mgroup[n] <- mgroup[n-1]
} else {mgroup[n] <- mgroup[n-1] + 1}
}
### --- MOVEMENT TYPES --- ###
mtype <- c("C", rep(NA, nrow(distbear)-1))
for (n in 2:(nrow(distbear)-1)){
if (mgroup[n] != mgroup[n-1] & mgroup[n] != mgroup[n+1]) {
mtype[n] <- "M"
} else {
mtype[n] <- "S"
}
}
completedata <- data.frame(distbear,mgroup,mtype)
return(completedata)
}
original <- lapply(newdat, trimdata) # apply function to calculate distance, bearing, mgroup, mtype across all elements of dataset (ie. across each individual bird's data)
fulloriginal <- do.call(rbind,original)
# setwd("C:/Users/samf/Documents/Git/cuckoos/data")
# write.csv(fulloriginal, "allbirdfinal - Movebank original data new birds 20140903.csv", row.names=F)
#
# setwd("C:/Users/samf/Documents/Git/cuckoos/data/Movebank original data new birds + distance, movement groups, etc")
# for (i in 1:length(original)){
# write.csv(original[[i]], paste(original[[i]]$name[1],".csv", sep=""), row.names=FALSE)
# }
# # ### Histogram of cuckoo distance movements
# #
# # hist(fulloriginal$distance)
# #
# par(mfrow=c(2,2))
# hist(subset(fulloriginal, distance > 20)$distance, main=c("d > 20km"), xlab=c("Distance (km)"))
# hist(subset(fulloriginal, distance > 200 & distance <= 500)$distance, main=c("100km < d < 500km"), xlab=c("Distance (km)"))
# hist(subset(fulloriginal, distance > 500)$distance, main=c("d > 500km"), xlab=c("Distance (km)"))
# hist(subset(fulloriginal, distance > 20 & distance <= 200)$distance, main=c("20km < d < 200km"), breaks=20, xlab=c("Distance (km)"))
#
# par(mfrow=c(2,2), mar=c(4,2,2,1))
# hist(subset(fulloriginal, distance > 0 & distance <= 100)$distance, main=c("0km < d < 100km"), xlab=c("Distance (km)"))
# hist(subset(fulloriginal, distance > 10 & distance <= 100)$distance, main=c("10km < d < 100km"), xlab=c("Distance (km)"))
# hist(subset(fulloriginal, distance > 20 & distance <= 50)$distance, main=c("20km < d < 100km"), xlab=c("Distance (km)"))
# hist(subset(fulloriginal, distance < 10)$distance, main=c("d < 10km"), xlab=c("Distance (km)"))
#
# par(mfrow=c(2,2))
# hist(subset(fulloriginal, distance > 20 & distance <= 200)$distance, main=c("20km < d < 200km"), xlab=c("Distance (km)"), breaks=10)
# hist(subset(fulloriginal, distance > 20 & distance <= 200)$distance, main=c("20km < d < 200km"), xlab=c("Distance (km)"), breaks=20)
# hist(subset(fulloriginal, distance > 20 & distance <= 100)$distance, main=c("20km < d < 100km"), xlab=c("Distance (km)"), breaks=10)
# hist(subset(fulloriginal, distance > 20 & distance <= 100)$distance, main=c("20km < d < 100km"), xlab=c("Distance (km)"), breaks=20)