forked from BrunaLab/HeliconiaREU-Andrew
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCalc_Dist_to_Edge_20210607.R
353 lines (261 loc) · 12.4 KB
/
Calc_Dist_to_Edge_20210607.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
# Overview ------------------------------------------------------------------
rm(list=ls())
# Andrew Mercadante
# REU Project Summer 2021
#
# This code measures the linear distance of a point in a 1-ha fragment
# (e.g., the xy coordinates of a plant) to the nearest edge of that fragment.
# loading packages --------------------------------------------------------
if(!"Rfast" %in% installed.packages()) {
install.packages("Rfast")
} # Need this package for the nth() function
library(Rfast)
library(tidyverse)
library(here)
if(!"car" %in% installed.packages()) {
install.packages("car")
}
library(car)
if(!"broom" %in% installed.packages()) {
install.packages("broom")
}
library(broom)
if(!"lme4" %in% installed.packages()){
install.packages("lme4")
}
library(lme4)
# loading datasets --------------------------------------------------------
data <- read_rds(here("data","one_ha_coords_updated.rds")) # reading in .rds file
# data organization -------------------------------------------------------
# create a dataframe for each fragment
# COLOSSO 1-HA
EST_1_ha <- data %>%
filter(habitat == "1-ha" & ranch == "Esteio-Colosso") %>% #filtering data
filter(!is.na(x_final) & !is.na(y_final))
# Simplify to one location per plant, since plants don't move
EST_simple <- EST_1_ha %>%
group_by(plot, ha_id_number,row,column) %>%
summarize(x = unique(x_final), y = unique(y_final))
str(EST_simple)
EST_simple$column <- as.character(EST_simple$column)
str(EST_simple)
length(EST_simple$x) # 192 length, index for for loop
# DIMONA 1-HA
Dimona_2107_1ha <- data %>%
filter(bdffp_reserve_no == "2107" & habitat == "1-ha") %>%
filter(!is.na(x_final) & !is.na(y_final))
Dimona_2107_1ha_simple <- Dimona_2107_1ha %>%
group_by(plot, ha_id_number,row,column) %>%
summarize(plot = unique(plot), x=unique(x_final), y=unique(y_final))
# PORTO ALEGRE 1-HA
Porto_Alegre_1ha <- data %>%
filter(habitat == "1-ha" & ranch == "PortoAlegre") %>%
filter(!is.na(x_final) & !is.na(y_final))
Porto_Alegre_simple <- Porto_Alegre_1ha %>%
group_by(plot, ha_id_number,row,column) %>%
summarize(plot = unique(plot), x= unique(x_final),y=unique(y_final))
# A generic way to check if they have the same number of plants is
# to ask TRUE/FALSE if the number of unique id's in each df are equal
# that way you don't have to know the exact number (in this case 210).
# be sure to use the == (double equals).
length(unique(Porto_Alegre_1ha$ha_id_number)) ==
length(unique(Porto_Alegre_simple$ha_id_number))
length(unique(Dimona_2107_1ha$ha_id_number)) ==
length(unique(Dimona_2107_1ha_simple$ha_id_number))
length(unique(EST_1_ha$ha_id_number)) ==
length(unique(EST_simple$ha_id_number))
# ----------------------------- New Method, exhaustive --------------
EST_simple
distance_to_N_edge <- NULL # set up variables to store distances to all edges
distance_to_S_edge <- NULL
distance_to_E_edge <- NULL
distance_to_W_edge <- NULL
# ERS: you might consider shorter variable names. Maybe `dist_to_N` or even just `dist_N`?
#doesn't need to be a for-loop. Addition is vectorized.
distance_to_N_edge <- EST_simple$y + 20
distance_to_E_edge <- EST_simple$x
distance_to_W_edge <- 100 - EST_simple$x
distance_to_S_edge <- EST_simple$y + 50
ha_id_number <- EST_simple$ha_id_number # need to make this a variable so I can add it
# into a new dataframe to allow for a join later.
distances <- data.frame(ha_id_number,distance_to_N_edge,distance_to_E_edge,
distance_to_W_edge,distance_to_S_edge)
Colosso_1ha <- left_join(EST_simple,distances) # join dataframes
#distances$dist_to_nearest_edge <- apply(distances,1,FUN=min) this is how to get first edge
# How to get second closest edge?
distance_to_nearest_edge <- NULL # set up variables to store the two lowest values from the 4 distances
distance_to_next_nearest_edge <- NULL #ERS or maybe just `dist_nearest` and `dist_next`?
# This is awesome! I could not think of how to get next-nearest distance. Sorting and using the nth() function is a perfect solution!
for(i in 1:length(EST_simple$x)){
distances$distance_to_nearest_edge[i] <-apply(distances[i,2:5],1,FUN=min) # apply min() to 2nd-5th column of ith row
distance.temp <- as.matrix(distances[i,2:5]) # convert to matrix so nth() can work
distances$distance_to_next_nearest_edge[i] <- Rfast::nth(distance.temp,2,descending=F) # run nth() to select 2nd value
# in a vector of increasing distances
}
Colosso_1ha <- left_join(EST_simple,distances) # join the previous Colosso data with newly aquired distances df
# Porto Alegre 1-ha fragment -----------------------------------------------
distance_to_N_edge <- NULL # setup distance variables
distance_to_E_edge <- NULL
distance_to_W_edge <- NULL
distance_to_S_edge <- NULL
distance_to_N_edge <- 50-Porto_Alegre_simple$y
distance_to_E_edge <- Porto_Alegre_simple$x
distance_to_W_edge <- 100- Porto_Alegre_simple$x
distance_to_S_edge <- Porto_Alegre_simple$y + 50
ha_id_number <- Porto_Alegre_simple$ha_id_number
distances_Alegre <- data.frame(ha_id_number,distance_to_N_edge,distance_to_E_edge, # form df of distances w/ ha_id_number
distance_to_W_edge,distance_to_S_edge)
## Tidyverse version (I think):
# distances_Alegre <-
# Porto_Alegre_simple %>%
# mutate(distance_to_N_edge = 50 - y,
# distance_to_E_edge = x,
# distance_to_W_edge = 100 - x,
# distance_to_S_edge = y + 50)
distance_to_nearest_edge <- NULL
distance_to_next_nearest_edge <- NULL
for(i in 1:210){
distances_Alegre$distance_to_nearest_edge[i] <- apply(distances_Alegre[i,2:5],1,FUN=min)
distance.temp <- as.matrix(distances_Alegre[i,2:5])
distances_Alegre$distance_to_next_nearest_edge[i] <- Rfast::nth(distance.temp,2,descending=F)
}
## Tidyverse version (I think):
# distances_Alegre %>%
# rowwise() %>%
# mutate(nearest_dist = min(c_across(ends_with("_edge"))),
# next_nearest_dist = nth(sort(c_across(ends_with("_edge"))), 2))
# The tidyverse version is maybe a bit less readable for this one
Porto_Alegre_1ha_join <- left_join(Porto_Alegre_simple,distances_Alegre)
# Dimona 1-ha fragment 2107 ------------------------------------------
distance_to_N_edge <- NULL # same as above
distance_to_E_edge <- NULL
distance_to_W_edge <- NULL
distance_to_S_edge <- NULL
distance_to_nearest_edge <- NULL # same as above
distance_to_next_nearest_edge <- NULL
distance_to_N_edge <- Dimona_2107_1ha_simple$y + 50
distance_to_E_edge <- Dimona_2107_1ha_simple$x
distance_to_W_edge <- 100-Dimona_2107_1ha_simple$x
distance_to_S_edge <- 50-Dimona_2107_1ha_simple$y
ha_id_number <- Dimona_2107_1ha_simple$ha_id_number
distances_Dimona_2107 <- data.frame(ha_id_number,distance_to_N_edge,distance_to_E_edge, # form df containing distances and ha_id_number
distance_to_W_edge,distance_to_S_edge)
for (i in 1:length(Dimona_2107_1ha_simple$x)) {
distances_Dimona_2107$distance_to_nearest_edge[i] <- apply(distances_Dimona_2107
[i,2:5],1,FUN=min)
distance.temp <- as.matrix(distances_Dimona_2107[i,2:5])
distances_Dimona_2107$distance_to_next_nearest_edge[i] <-Rfast::nth(distance.temp,2,descending=F)
}
Dimona_2107_1ha_join <- left_join(Dimona_2107_1ha_simple,distances_Dimona_2107) # join dfs
# plot data to check for weirdness
ggplot(Dimona_2107_1ha_join, aes(x = x, y = y, color = distance_to_N_edge, size = distance_to_E_edge)) + geom_point(alpha = 0.7)
# Check that y=0 is on the north edge and x=0 is the east edge
# re-combine plots --------------------------------------------------------
Colosso_1ha
Dimona_2107_1ha_join
Porto_Alegre_1ha_join
Dimona_2107_1ha_join$column <- as.double(Dimona_2107_1ha_join$column) # type error, wouldn't let me bind rows due to conflict
Porto_Alegre_1ha_join$column <- as.double(Porto_Alegre_1ha_join$column)
Colosso_1ha$column <- as.double(Colosso_1ha$column)
xy_dist <-
bind_rows(
Dimona_2107_1ha_join,
Porto_Alegre_1ha_join,
Colosso_1ha
)
full <- right_join(data %>% select(-x, -y), #get rid of old, incorrect coords which don't join up.
xy_dist)
range(full$distance_to_nearest_edge)
nrow(full)
# data validation ---------------------------------------------------------
# A place for tests and plots of the data to double check that you are getting sensible data.
# plot data:
p <- ggplot(full, aes(x = x, y = y)) + facet_wrap(~plot)
p + geom_point(aes(color = distance_to_nearest_edge)) + scale_color_viridis_c()
p + geom_point(aes(color = distance_to_next_nearest_edge)) + scale_color_viridis_c()
# do we have growth data for all ?
ggplot(full, aes(x = year, y = ht, group = ha_id_number)) + geom_line() + facet_wrap(~plot)
# simple logistic regression --------------------------------------------------------
survival.fit <- glm(surv ~ distance_to_nearest_edge, data=full, family=binomial)
summary(survival.fit) #not the p-values you're looking for!
# back transform log-odds to a probability
log_odds <-survival.fit$coef[2]
probability <- plogis(log_odds)
?plogis()
library(car)
car::Anova(survival.fit) #these are the p-values you want.
confint(survival.fit)
# Not significant results given this data. SE is fairly large, 95% confidence interval overlaps zero
#plot fitted line from model with confidence bands:
library(broom)
plotdf <-
broom::augment(survival.fit,
se_fit = TRUE, #yes, calculate confidence bands
type.predict = "response") #back-transform calculations to probability
?augment()
ggplot(plotdf, aes(x = distance_to_nearest_edge)) +
geom_line(aes(y = .fitted)) +
# confidence bands:
geom_ribbon(aes(ymin = .fitted - .se.fit, ymax = .fitted + .se.fit), alpha =0.4) +
# shows x-values of points along bottom:
geom_rug()
#survival is *higher* near the edge. Interesting!
# Logistic regression for flowering probability --------------------------------
flower.fit <- glm(flwr~distance_to_nearest_edge,data=full,family=binomial)
summary(flower.fit)
log_odds_flwr <- flower.fit$coef[2]
flwr_probability <- plogis(log_odds_flwr)
car::Anova(flower.fit)
confint(flower.fit)
flowerdf <- broom::augment(flower.fit,
se_fit = TRUE,
type.predict = "response")
ggplot(flowerdf, aes(x = distance_to_nearest_edge)) +
geom_line(aes(y = .fitted)) +
geom_ribbon(aes(ymin = .fitted - .se.fit, ymax = .fitted + .se.fit), alpha = 0.4) # alpha modulates transparency
# Some exploratory data visualization, practicing with ggplot ------------------
ggplot(data=full, aes(x=year , y= ht, color=shts))+
geom_point()+
facet_wrap(~plot)
ggplot(data=full, aes(x=year))+
geom_boxplot(aes(y=ht))+
facet_wrap(~plot)
# look at how height distributions changes over time ----6-25-2021--------------
full$year <- as.factor(full$year) # had to do this to plot year as linetype or color..
ggplot(data=full, aes(x=ht, color= year))+
geom_density()+
facet_wrap(~plot)
# easier to see how year-to-year plant height changed within plots
# seems like height distributions are right-skewed, not normal.
ggplot(data=full, aes(x=ht, color=plot))+
geom_density()+
facet_wrap(~year)
# Helps me see how plant height distributions vary among plots within a given year.
# Look at survival using lme4 package and glmer function -----------------------
survival2.fit <- glmer(surv~size_prev+distance_to_nearest_edge+(1|year),data=full,
family=binomial,nAGQ=25) #uses GHQ method, 25 iterations
summary(survival2.fit)
# Trying to find a survival probability for CF habitat
CF_surv <- data %>%
filter(!is.na(surv)) %>%
filter(habitat=="CF") %>%
select(ranch,habitat,ha_id_number,year,plot,surv)
CF.survival <- glm(surv~1,data = CF_surv,family=binomial)
summary(CF.survival)
log_odds_CF <- CF.survival$coef[1]
surv_transform <- plogis(log_odds_CF)
Ha.survival <- glm(surv~1, data=full, family = binomial)
log_odds_1ha <-Ha.survival$coef[1]
surv_1ha <- plogis(log_odds_1ha)
# Unexpected result: 1-ha fragment survival seems to be higher than CF based on
# this intercept-only model.
plot_CF <-
broom::augment(CF.survival,
se_fit = TRUE, #yes, calculate confidence bands
type.predict = "response")
# Visual comparison of the two survival rates
ggplot(plotdf, aes(x = distance_to_nearest_edge)) +
geom_line(aes(y = .fitted)) +
# confidence bands:
geom_ribbon(aes(ymin = .fitted - .se.fit, ymax = .fitted + .se.fit), alpha =0.4) +
geom_hline(yintercept=0.955,color="red")