-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcuckoo original data tidying source.R
179 lines (128 loc) · 7.65 KB
/
cuckoo original data tidying source.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
##########################################################
#
# Cuckoo ORIGINAL data - tidying source file, adding new variables (distance moved between points, bearing, movement groups, and movement types)
#
# Samantha Franks
# 15 Nov 2013
# 4 Dec 2013
# 23 Dec 2013
#
#
##########################################################
library(NCStats)
library(rgdal)
library(maptools)
library(shape)
library(splancs)
library(geosphere)
library(adehabitat) #
library(sp) # converts R dataframes into spatially explicit dta
library(rgeos)
library(raster)
library(adehabitatHR)
###-----------------------------------------------###
# Load cuckoo data & tidying
###-----------------------------------------------###
# cuckoo coordinate locations are in Lat-Long and UTM coordinates and WGS84 datum
setwd("C:/Users/samf/Documents/Git/cuckoos/data")
#d <- read.csv("BTO Cuckoo migration study - raw data + new variables 15112013.csv", header=T) # cuckoo movement
d <- read.csv("cuckoos raw data 20131118_reformatted_20131213.csv", header=T)
# cuckoo <- read.csv("cuckoo movements.csv", header=T) # cuckoo movement
#
# reqcuckoo <- with(cuckoo, data.frame(timestamp, julian=julian.date, year, month, day, id, lat, long, duration, days.since.tagging, name, country, continent, movement.number))
# $filter refers to the values identified as best in the transmission cycle by the Douglas filter applied in Movebank: TRUE values are the best locations
# $loc.class refers to the Argos location class: from best to worst, 3,2,1,0,A,B,Z. Alpha location classes have no associated errors, so remove. Location class=0 have gigantic major axis errors, so also remove these. Use only location classes 1:3
# error.major = error radius along the major axis of the ellipse
# error.minor = error radius along the minor axis of the ellipse
# test location refers to points that were used as either reference points or tested outside Chris Hewson's house in Cambridge (long/lat = 0.125/52.135)
# d2 <- with(d, data.frame(name=individual.local.identifier, id=event.id, filter=visible, tcycle=Transmission.cycle, timestamp, julian, year, month, day, long=location.long, lat=location.lat, loc.class=argos.lc, error.major=argos.semi.major, error.minor=argos.semi.minor))
d2 <- with(d, data.frame(name, id, tcycle=transmission.cycle, timestamp, location_date, julian, year, month, day, hour, long=longitude, lat=latitude, loc.class=location_class, error.major=semi_major_axis, error.minor=semi_minor_axis, ell.orientation=ellipse_orientation, sensor1=X_1, sensor2=X_2, sensor3=X_3, sensor4=X_4,test_location))
# d2$filter <- as.factor(d2$filter) # define the filter as a factor rather than logical
d2$tcycle <- as.factor(d2$tcycle)
# removes locations outside Afro-Palearctic (reference points in North America plus any really weird points in the Atlantic)
d3 <- Subset(d2, long > -30)
d4 <- Subset(d3, loc.class=="1" | loc.class=="2" | loc.class=="3") # subset by best location classes
d5 <- Subset(d4, test_location=="N")
d6 <- Subset(d5, name!="" & name!="115592" & name!="115601" & name!="121792")
d7 <- Subset(d6, error.major>0 & error.minor>0)
unique_locdates <- which(duplicated(d7$location_date)==FALSE)
d8 <- d7[unique_locdates,]
d9 <- na.omit(d8) # complete cases only
###-----------------------------------------------###
# Add distance, bearing, mgroup and mtype to original data
###-----------------------------------------------###
byname <- d9$name
newdat <- split(d9, list(byname)) # split dataset by individual bird
trimdata <- function(dat) { # distance, bearing, etc function
# for each element of data, do the following
# order dat according to location_date - not needed if dataset has already been ordered in Excel by location_date
# datorder <- dat[order(dat$location_date),]
smallerror <- Subset(dat, error.major <= 5000)
### --- DISTANCE & BEARING CALCULATION --- ###
# calculate distances and bearings
# distbearmvmt <- function(input){ # requires 2 variables called newlongs & newlats or long & lat or centroidlong & centroidlat
# use newlongs & newlats to calculate distances based on resampled points
# use long & lat to calculate distances based on original measured location point
input <- smallerror
dist.cuckoo <- input[,c("long","lat")]
coordinates(dist.cuckoo) <- c("long", "lat")
proj4string(dist.cuckoo) <- CRS("+proj=longlat +datum=WGS84")
d1 <- dist.cuckoo@coords
d2 <- dist.cuckoo@coords[-1,] # create new matrix minus the first row so that d2 starts at the second observation
last.d2 <- matrix(c(0,0), nrow=1, dimnames=list(1, c("long","lat"))) # create a placeholder last row in the second distance matrix
d2 <- rbind(d2, c(0,0))
dist <- distCosine(d1,d2)/1000 # distance between points, in km
bear <- bearing(d1,d2) # bearing between points
dist <- c(NA,dist[-length(dist)])
bear <- c(NA,bear[-length(bear)])
distbear <- data.frame(input, distance=dist, bearing=bear)
### --- MOVEMENT GROUPS --- ###
mgroup <- rep(NA,nrow(distbear))
for (n in 1:nrow(distbear)){
if (is.na(distbear$distance[n])) {
mgroup[n] <- 1
} else if (distbear$distance[n] <= 30) {
mgroup[n] <- mgroup[n-1]
} else {mgroup[n] <- mgroup[n-1] + 1}
}
### --- MOVEMENT TYPES --- ###
mtype <- c("C", rep(NA, nrow(distbear)-1))
for (n in 2:(nrow(distbear)-1)){
if (mgroup[n] != mgroup[n-1] & mgroup[n] != mgroup[n+1]) {
mtype[n] <- "M"
} else {
mtype[n] <- "S"
}
}
completedata <- data.frame(distbear,mgroup,mtype)
return(completedata)
}
original <- lapply(newdat, trimdata) # apply function to calculate distance, bearing, mgroup, mtype across all elements of dataset (ie. across each individual bird's data)
fulloriginal <- do.call(rbind,original)
setwd("C:/Users/samf/Documents/Git/cuckoos/data")
write.csv(fulloriginal, "allbirdfinal - original data.csv", row.names=F)
setwd("C:/Users/samf/Documents/Git/cuckoos/data/original data + distance, movement groups, etc")
for (i in 1:length(original)){
write.csv(original[[i]], paste(original[[i]]$name[1],".csv", sep=""), row.names=FALSE)
}
# # ### Histogram of cuckoo distance movements
# #
# # hist(fulloriginal$distance)
# #
# par(mfrow=c(2,2))
# hist(subset(fulloriginal, distance > 20)$distance, main=c("d > 20km"), xlab=c("Distance (km)"))
# hist(subset(fulloriginal, distance > 200 & distance <= 500)$distance, main=c("100km < d < 500km"), xlab=c("Distance (km)"))
# hist(subset(fulloriginal, distance > 500)$distance, main=c("d > 500km"), xlab=c("Distance (km)"))
# hist(subset(fulloriginal, distance > 20 & distance <= 200)$distance, main=c("20km < d < 200km"), breaks=20, xlab=c("Distance (km)"))
#
# par(mfrow=c(2,2), mar=c(4,2,2,1))
# hist(subset(fulloriginal, distance > 0 & distance <= 100)$distance, main=c("0km < d < 100km"), xlab=c("Distance (km)"))
# hist(subset(fulloriginal, distance > 10 & distance <= 100)$distance, main=c("10km < d < 100km"), xlab=c("Distance (km)"))
# hist(subset(fulloriginal, distance > 20 & distance <= 50)$distance, main=c("20km < d < 100km"), xlab=c("Distance (km)"))
# hist(subset(fulloriginal, distance < 10)$distance, main=c("d < 10km"), xlab=c("Distance (km)"))
#
# par(mfrow=c(2,2))
# hist(subset(fulloriginal, distance > 20 & distance <= 200)$distance, main=c("20km < d < 200km"), xlab=c("Distance (km)"), breaks=10)
# hist(subset(fulloriginal, distance > 20 & distance <= 200)$distance, main=c("20km < d < 200km"), xlab=c("Distance (km)"), breaks=20)
# hist(subset(fulloriginal, distance > 20 & distance <= 100)$distance, main=c("20km < d < 100km"), xlab=c("Distance (km)"), breaks=10)
# hist(subset(fulloriginal, distance > 20 & distance <= 100)$distance, main=c("20km < d < 100km"), xlab=c("Distance (km)"), breaks=20)