diff --git a/Day 3/CSC_D3S1 - Raster data in R.Rmd b/Day 3/CSC_D3S1 - Raster data in R.Rmd index 59ff79f..c2f93de 100644 --- a/Day 3/CSC_D3S1 - Raster data in R.Rmd +++ b/Day 3/CSC_D3S1 - Raster data in R.Rmd @@ -33,16 +33,18 @@ library(paletteer) side <- 15 empty_mat <- matrix(0, nrow = side, ncol = side) +empty_mat mat1 <- empty_mat for(i in 1:side) { mat1[,i] <- i } - # look at the matrix, convert it to a raster and plot the raster mat1 -r1 <- terra::rast(mat1) +r1 <- terra::rast(t(mat1)) +r1 terra::ext(r1) <- c(0,side,0,side) # set extent +r1 plot(r1) ``` @@ -53,7 +55,7 @@ Let's do another kind of raster, this time filled with random numbers. mat2 <- matrix(runif(length(empty_mat),0,10), nrow=side, ncol=side) r2 <- terra::rast(mat2) terra::ext(r2) <- c(0,side,0,side) # set extent -plot(r2, col = paletteer::paletteer_c("grDevices::Viridis", 10)) +plot(r2, col = paletteer::paletteer_c("grDevices::Viridis", 100)) ``` @@ -81,6 +83,7 @@ r1^2 1 / r1 stack <- c(r1, r1+5, r1^2, 1/r1) +stack names(stack) <- c("add 5", "square root", "square", "division") plot(stack) @@ -98,11 +101,12 @@ Let's read in the elevation model of Finland included in the data and inspect it elevation <- terra::rast("../Data/raster/Elevation_10km.tif") summary(elevation) -ext(elevation) # this is the boundinx box of the raster +ext(elevation) # this is the bounding box of the raster plot(elevation, col = paletteer::paletteer_c("viridis::viridis", 100)) elevation[elevation == 0] <- NA plot(elevation, col = paletteer::paletteer_c("viridis::viridis", 100)) +summary(elevation) ``` We can compute statistics from the elevation layer using *global()* function. @@ -114,6 +118,7 @@ global(elevation, median, na.rm=TRUE) global(elevation, range, na.rm=TRUE) global(elevation, sd, na.rm=TRUE) +plot(stack) global(stack, mean) ``` @@ -131,6 +136,15 @@ remember: for CSC notebooks replace ".." with "/home/rstudio/r-spatial-course" * plot a histogram using *hist()* ```{r} +t01 <- rast("../Data/raster/Tmean_Monthly_2005-2014/Tmon_20050101.tif") +t07 <- rast("../Data/raster/Tmean_Monthly_2005-2014/Tmon_20050701.tif") +t10 <- rast("../Data/raster/Tmean_Monthly_2005-2014/Tmon_20051001.tif") + +t <- c(t01, t07, t10) + +global(t, mean, na.rm=TRUE) + + ``` @@ -142,6 +156,13 @@ remember: for CSC notebooks replace ".." with "/home/rstudio/r-spatial-course" ```{r} +plot(t) + +brk <- seq(-11,20, length.out = 10) +brk + +plot(t, col = paletteer_c("scico::turku", 100), breaks = brk) + ``` diff --git a/Day 3/CSC_D3S2 - Raster_data_manipulation.Rmd b/Day 3/CSC_D3S2 - Raster_data_manipulation.Rmd index 0feed39..7e3afc4 100644 --- a/Day 3/CSC_D3S2 - Raster_data_manipulation.Rmd +++ b/Day 3/CSC_D3S2 - Raster_data_manipulation.Rmd @@ -19,9 +19,13 @@ library(tmap) # for CSC notebooks replace ".." with "/home/rstudio/r-spatial-course" admin <- sf::read_sf("../Data/raster/SuomenKuntajako_2021_100k.shp") +plot(admin) + helsinki <- dplyr::filter(admin, NAMEFIN == "Helsinki") plot(admin) +plot(helsinki) + ``` @@ -31,24 +35,58 @@ Read in a list of files. # for CSC notebooks replace ".." with "/home/rstudio/r-spatial-course" files <- list.files("../Data/raster/Tmean_Monthly_2005-2014/", full.names = TRUE) +files temperature <- lapply(files, terra::rast) #read in all files, output is a list + +length(temperature) +temperature[[1]] +temperature[1:5] + + temperature <- do.call("c", temperature) # stack all rasters together +temperature plot(temperature) + + +temperature <- terra::rast(files) +plot(temperature) + ``` Before we do anything, let's make sure both datasets are in the same coordinate reference system. We'll use the CRS of temperature raster stack, because tranforming a raster changes its values. ```{r} -sf::st_crs(helsinki) +( sfcrs <- sf::st_crs(helsinki) ) terra::crs(temperature) +class(sfcrs) +str(sfcrs) +sfcrs$wkt helsinki <- sf::st_transform(helsinki, terra::crs(temperature)) +st_crs(helsinki) + + # we could project the raster with: -# temperature <- terra::project(temperature, sf::st_crs(helsinki)) +temperature <- terra::project(temperature, sf::st_crs(helsinki)$wkt) + +temp <- temperature[[1]] +temp +plot(temp) + +temp_projected <- project(temp, "epsg:4326" ) +temp_reprojected <- project(temp_projected, temp ) + +plot( c(temp, temp_reprojected) ) +c(temp, temp_reprojected) + +all.equal(temp, temp_reprojected) + +plot(temp_projected) +plot(temp_reprojected) ``` @@ -59,6 +97,8 @@ Now we can find out all the values in the raster stack for Helsinki! to do this ```{r} helsinki_terra <- terra::vect(helsinki) helsinki_temps <- terra::extract(temperature, helsinki_terra) +helsinki_temps + helsinki_temps <- dplyr::summarise_all(helsinki_temps, mean, na.rm=TRUE) %>% tidyr::gather(date, temperature, -ID) %>% @@ -67,6 +107,7 @@ helsinki_temps <- dplyr::summarise_all(helsinki_temps, mean, na.rm=TRUE) %>% helsinki_temps plot(helsinki_temps[,3]) lines(helsinki_temps[,3]) + ``` @@ -75,8 +116,10 @@ lines(helsinki_temps[,3]) Aggregation is done by providing the function a factor - how many cells in each direction are used to aggregate. The temperature has a resolution of 10km. Aggregating by factor 4 yields a raster with resolution of 40km (by 40km). ```{r} -temp_40km <- terra::aggregate(temperature, fact = 4, fun = mean) +temp_40km <- terra::aggregate(temperature, fact = 4, fun = mean, na.rm=TRUE) plot(temp_40km) +temp_40km + ``` We can also disaggregate - but the original values cannot be returned! @@ -97,6 +140,8 @@ Resampling can achieve both. But are they the same - no! temp_40km_resampled <- terra::resample(temperature, temp_40km, method = "bilinear") plot(temp_40km_resampled) +temp_40km_resampled +temp_40km plot(temp_40km - temp_40km_resampled) ``` @@ -129,9 +174,8 @@ However, it only runs for a single layer # ntice the message saying it only used the first layer temperature_smoothed <- terra::focal(temperature, - w = 3, - fun = mean, - na.rm=TRUE) + w = 21, + fun = max) plot(temperature_smoothed) plot(temperature[[1]]) @@ -141,9 +185,8 @@ plot(temperature[[1]]) We can also define the neighbourhood in other ways: ```{r} temperature_smoothed <- terra::focal(temperature[[1]], - w = c(3,ncol(temperature)-1), - fun = mean, - na.rm=TRUE) + w = c(nrow(temperature)-1, 3), + fun = mean) plot(temperature_smoothed) # remove values from the sea @@ -158,6 +201,7 @@ or with matrices: ```{r} weight_matrix <- matrix(1, nrow = nrow(temperature)-1, ncol = 1) +weight_matrix temperature_smoothed <- terra::focal(temperature[[1]], w = weight_matrix, @@ -180,15 +224,24 @@ The final part of this lesson shows reclassification. Here we divide Finland int But before that, lets compute the mean temeprature at each cell over the 10-year period 2005-2014, and find "optimal" class intervals. ```{r} +plot(temperature) temperature_mean <- mean(temperature) +temperature_mean +plot(temperature_mean) + + library(classInt) -intervals <- classIntervals(terra::values(temperature_mean), +intervals <- classInt::classIntervals(terra::values(temperature_mean), n = 4, style = "quantile") +# quantile(values(temperature_mean), probs = c(0.2, 0.4, 0.6, 0.8), na.rm=TRUE) + +intervals + reclass_matrix <- matrix(c(-Inf, intervals$brks[1], 1, intervals$brks[1], intervals$brks[2], 2, intervals$brks[2], intervals$brks[3], 3, @@ -196,6 +249,7 @@ reclass_matrix <- matrix(c(-Inf, intervals$brks[1], 1, intervals$brks[4], Inf, 5), ncol = 3, byrow = TRUE) +reclass_matrix temperature_classified <- terra::classify(temperature_mean, reclass_matrix) @@ -209,7 +263,8 @@ And finally, let's save that raster for later use. ```{r} # for CSC notebooks replace ".." with "/home/rstudio/r-spatial-course" terra::writeRaster(temperature_classified, - filename = "../Data/raster/temperature_classified.tif") + filename = "../Data/raster/temperature_classified.tif", + overwrite = TRUE) ``` @@ -225,6 +280,18 @@ terra::writeRaster(temperature_classified, * find the mean, min, and max temperatures for that municipality over the period 2005-2014 (the full raster stack) ```{r} +kolari <- admin %>% + filter(NAMEFIN == "Kolari") + +temp_kolari <- crop(temperature, kolari) + +values <- global(temp_kolari, mean) +mean_temp <- mean(values$mean) + + +values <- terra::extract(temp_kolari, vect(kolari)) +mean(as.matrix(values[,-1]),na.rm=TRUE) + ``` @@ -237,6 +304,15 @@ terra::writeRaster(temperature_classified, ```{r} +focal_mean <- focal(temperature_mean, + w = matrix(c(0,1,0,1,-4,1,0,1,0), nrow=3)) + +plot(focal_mean) + +focal_mean <- crop(focal_mean, kolari) +plot(c(focal_mean, mean(temp_kolari))) + +plot(focal_mean - mean(temp_kolari)) ``` @@ -248,6 +324,18 @@ terra::writeRaster(temperature_classified, ```{r} +reclass_matrix <- matrix( c(-Inf, mean(values(temp_kolari), na.rm=TRUE), factor("cold", levels = c("hot", "cold")), + mean(values(temp_kolari), na.rm=TRUE), Inf, factor("hot", levels = c("hot", "cold"))), + ncol = 3, + byrow = TRUE) + +reclass_matrix + +temp_kolari_classified <- classify(mean(temp_kolari), + reclass_matrix) + +plot(mean(temp_kolari)) +plot(temp_kolari_classified) ``` diff --git a/Day 3/CSC_D3S3 - Map algebra.Rmd b/Day 3/CSC_D3S3 - Map algebra.Rmd index 5e9249d..516a826 100644 --- a/Day 3/CSC_D3S3 - Map algebra.Rmd +++ b/Day 3/CSC_D3S3 - Map algebra.Rmd @@ -16,10 +16,14 @@ library(dplyr) files <- list.files("../Data/raster/RRmon_Monthly_2005-2014/", full.names = TRUE) -rainfall <- lapply(files, terra::rast) #read in all files, output is a list -rainfall <- do.call("c", rainfall) # stack all rasters together +# rainfall <- lapply(files, terra::rast) #read in all files, output is a list +# rainfall <- do.call("c", rainfall) # stack all rasters together + +rainfall <- terra::rast(files) plot(rainfall) +rainfall + ``` @@ -31,10 +35,13 @@ Local functions are those which deal with single cells at the time, without cons ```{r} rainfall / 30 # approximate per-day rainfall, assuming 30-day months +plot(rainfall / 30) mean(rainfall) # average per-cell rainfall +plot(mean(rainfall)) max(rainfall) # maximum per-cell rainfall +plot(max(rainfall)) rainfall_quantiles <- quantile(rainfall, probs = c(0.05, 0.5, 0.95)) # 5th, 50th, and 95th percentiles @@ -52,7 +59,7 @@ cv <- function(x, na.rm=FALSE) { return(stdev/mn) } -# cv(rainfall) this fails! +# cv(rainfall) #this fails! rainfall_cv <- terra::app(rainfall, fun = cv) @@ -74,19 +81,19 @@ rainfall_cv_50km <- terra::focal(rainfall, plot(rainfall_cv_50km) - - ``` Focal does not (yet) support multiple layers, but we can overcome this issue with some tricks: ```{r} - -rainfall_cv_50km <- lapply(1:12, function(i) { +nlayers <- terra::nlyr(rainfall) +rainfall_cv_50km <- lapply(1:nlayers, + function(i) { cv_50km <- terra::focal(rainfall[[i]], w = 5, fun = cv) return(cv_50km) }) %>% do.call("c", .) + plot(rainfall_cv_50km) ``` @@ -99,7 +106,7 @@ Zonal functions have not been addressed yet. In zonal operations, some function ```{r} # for CSC notebooks replace ".." with "/home/rstudio/r-spatial-course" temperature_zones <- rast("../Data/raster/temperature_classified.tif") - +plot(temperature_zones) ( zonal_rainfall <- terra::zonal(rainfall, temperature_zones, fun = mean) ) @@ -120,14 +127,18 @@ What id we'd like to know the mean rainfall in each of Finland's municipalities? ```{r} # for CSC notebooks replace ".." with "/home/rstudio/r-spatial-course" -admin <- sf::read_sf("../Data/raster/SuomenKuntajako_2021_100k.shp") +admin <- sf::read_sf("../Data/raster/SuomenKuntajako_2021_100k.shp") %>% + dplyr::mutate(NATCODE = as.numeric(NATCODE)) admin_raster <- terra::rasterize(terra::vect(admin), rainfall, # this is the target raster which is used for rasterization field = "NATCODE") # which attribute we're rasterizing +plot(admin_raster) +table(values(admin_raster) == 604) # check how many cells have NATCODE == 604 -rainfall_admin <- terra::zonal(mean(rainfall), admin_raster, fun = mean) +rainfall_admin <- terra::zonal(mean(rainfall), admin_raster, fun = mean, na.rm=TRUE) +rainfall_admin rainfall_admin <- dplyr::left_join(dplyr::select(admin, NATCODE, NAMEFIN), rainfall_admin, @@ -166,16 +177,29 @@ remember: for CSC notebooks replace ".." with "/home/rstudio/r-spatial-course" * Convert the temperatures in that layer from Celcius to Kelvin. ```{r} +r <- rast("/home/rstudio/r-spatial-course/Data/raster/Tmean_Monthly_2005-2014/Tmon_20051001.tif") + +r + 273.15 +plot(r + 273.15) ``` 2. -* Create a zone (rasterize) for a munucipality of your choosing (bigger ones better - use only that municipality polygon when rasterizing) +* Create a zone (rasterize) for a municipality of your choosing (bigger ones better - use only that municipality polygon when rasterizing) * Compute some zonal statistics for that municipality. ```{r} +kajaani <- dplyr::filter(admin, NAMEFIN == "Kajaani") + +kajaani_r <- rasterize(vect(kajaani), r) +kajaani_r[is.na(kajaani_r)] <- 0 + +zonal_stats <- terra::zonal(r, kajaani_r, fun = mean, na.rm=TRUE) +zonal_stats + + ``` diff --git a/Day 3/CSC_D3S4 - Spatial modelling with rasters.Rmd b/Day 3/CSC_D3S4 - Spatial modelling with rasters.Rmd index b0944d0..94252fa 100644 --- a/Day 3/CSC_D3S4 - Spatial modelling with rasters.Rmd +++ b/Day 3/CSC_D3S4 - Spatial modelling with rasters.Rmd @@ -46,6 +46,7 @@ Let's run a linear model looking at the changes in temperature and rainfall in 2 ### Define functions to fit a model and extract relevant information ### # For obtaining estimated model coefficients fun_model <- function(x) { + if(all(is.na(x))) return(c(NA,NA,NA,NA)) n <- 1:length(x) @@ -58,6 +59,7 @@ fun_model <- function(x) { } rainfall_coef <- terra::app(rainfall, fun = fun_model) +rainfall_coef names(rainfall_coef) <- c("Intercept", "Slope", "p_int", "p_slope") plot(rainfall_coef) @@ -73,11 +75,11 @@ Let's look at the trend in a single month, February. ```{r} - - subset_vect <- seq(2, by = 12, length.out = 10) -feb_temp <- subset(temperature, subset = subset_vect) -feb_rain <- subset(rainfall, subset = subset_vect) +subset_vect + +feb_temp <- terra::subset(temperature, subset = subset_vect) +feb_rain <- terra::subset(rainfall, subset = subset_vect) rainfall_coef <- terra::app(feb_rain, fun = fun_model) @@ -103,7 +105,7 @@ model_data <- as.data.frame(temperature_coef, dplyr::as_tibble() summary(model_data) - +model_data ``` @@ -119,10 +121,13 @@ july2011 <- temperature[["Tmon_20110701"]] july2011 <- terra::project(july2011, elevation, method = "near") stack <- c(july2011, elevation) +plot(stack) data <- terra::as.data.frame(stack, xy = TRUE) -trainset <- sample(1:nrow(data), floor(nrow(data)*0.75)) %>% +data +trainset <- sample(1:nrow(data), floor(nrow(data)*0.50)) %>% sort() +trainset model_fit <- lm(Tmon_20110701 ~ x + y + Elevation_10km, data = data[trainset, ]) @@ -132,30 +137,53 @@ summary(model_fit) # very good! # Evaluate with the trainig data prediction <- predict(model_fit, newdata = data[-trainset, ]) -cor(prediction, data$Tmon_20110701[-trainset]) +cor(prediction, data$Tmon_20110701[-trainset])^2 ``` With a model fitted, we can apply this to a raster stack. But before that, we need to get x and y coordinate rasters in the stack. We can *rasterize* a point data set for that. ```{r} -xy_raster <- data %>% - dplyr::mutate(x_coord = x, - y_coord = y) %>% - terra::vect(geom = c("x_coord", "y_coord")) %>% - terra::rasterize(july2011, field = c("x", "y")) + +# xy_raster <- data %>% +# dplyr::mutate(x_coord = x, +# y_coord = y) %>% +# terra::vect(geom = c("x_coord", "y_coord")) %>% +# terra::rasterize(july2011, field = c("x", "y")) + +temp <- stack +values(temp) <- 1 +coords <- terra::crds(temp) +head(coords) + +x_raster <- matrix(coords[,1], ncol = ncol(temp), byrow=TRUE) %>% + terra::rast() +plot(x_raster) + +y_raster <- matrix(coords[,2], ncol = ncol(temp), byrow=TRUE) %>% + terra::rast() +plot(y_raster) +x_raster + +crs(x_raster) <- crs(stack) +crs(y_raster) <- crs(stack) + +ext(x_raster) <- ext(stack) +ext(y_raster) <- ext(stack) +x_raster + +xy_raster <- c(x_raster, y_raster) names(xy_raster) <- c("x", "y") prediction_stack <- c(xy_raster, stack) +prediction_stack prediction_raster <- terra::predict(prediction_stack, model_fit) plot(c(prediction_raster, july2011)) - - ``` @@ -182,10 +210,12 @@ points(random_points) # extract data and fit a model fitting_data <- terra::extract(stack, random_points, xy = TRUE) %>% dplyr::filter(!is.nan(Tmon_20110701)) +fitting_data model_fit_points <- lm(Tmon_20110701 ~ x + y + Elevation_10km, data = fitting_data) - +summary(model_fit_points) +stack prediction_interpolated <- terra::interpolate(stack, model_fit_points) @@ -193,7 +223,7 @@ plot(prediction_interpolated) points(random_points) -plot(c(prediction_raster, prediction_interpolated, july2011)) +plot(c(prediction_interpolated, july2011)) ``` @@ -208,11 +238,39 @@ plot(c(prediction_raster, prediction_interpolated, july2011)) * **predict** * plot +### CTRL + ALT + i +```{r} + + +lakes <- project(lakes, prediction_stack, method = "near") + +prediction_stack <- c(lakes, prediction_stack) + +data_update <- as.data.frame(prediction_stack, xy = TRUE) + +trainset <- sample(1:nrow(data_update), floor(nrow(data_update)*0.75)) %>% sort() + +model_fit <- lm(Tmon_20110701 ~ x + y + Elevation_10km + Lake_10km, data = data_update[trainset, ]) + +predicted <- predict(prediction_stack, model_fit) + +plot(predicted) +``` + 2. * Extract values from prediction raster, and july2011 * compute correlation and mean error (*pred - obs*) -* plot a scatterplot with july2011 values on x-axis, and mean error on y-axis. +* plot a scatterplot with july2011 values on x-axis, and error on y-axis. + +```{r} +cor( values(predicted), values(july2011), use = "complete.obs") + +mean(values(predicted) - values(july2011) , na.rm=TRUE) + +plot( values(july2011), values(predicted) - values(july2011) ) +abline(a=0, b=0, col="red") +```