Annex I: Compendium of R scripts
This chapter contains the complete list of R scripts to run the process of mapping soil nutrient and associated soil properties.
Script 0: Introduction to R
# Introduction to R
# 0. Playground =================================================
# learn important keyboard shortcuts
# Ctrl + enter for running code
# tab after writing the first three
# characters of the function name
# F1 to access the help
# explore the use of <-, $, [], ==, !=, c(), :,
# data.frame(), list(), as.factor()
<- 10:15
a 2]
a[2:3]
a[<- c("1", "a", a )
b length(b)
<- data.frame(column_a = 1:8, column_b = b)
df
1]
df[,$column_b
dfas.numeric(df$column_b)
plot(df)
1:3,]
df[1]
df[,
as.factor(b)
<- list(a, b, df)
d
dnames(d)
names(d) <- c("numeric_vector", "character_vector", "dataframe")
d1]]
d[[$numeric_vector
d
== b
a != b
a
# 1. Set working directory ======================================
setwd("C:/GIT/Digital-Soil-Mapping/")
# 2. Install and load packages ==================================
# readxl, tidyverse, and data.table packages using the functions
install.packages("tidyverse")
install.packages("readxl")
install.packages("data.table")
library(tidyverse)
library(readxl)
library(data.table)
# 3. Import an spreadsheet ======================================
## 3.1 Read the MS Excel file -----------------------------------
#Read the soil_data.xlsx file, spreadsheet 2, using read_excel
read_excel(path = "01-Data/soil_data.xlsx", sheet = 2)
## 3.2 Read the csv file with the native function ---------------
# 01-Data/horizon.csv
read.csv("01-Data/soil_profile_data.csv")
## 3.3 Read the csv file with the tidyverse function ------------
read_csv("01-Data/soil_profile_data.csv")
## 3.4 Read the csv file with the data.table function -----------
fread("01-Data/soil_profile_data.csv")
## 3.5 Assign the dataframe to an object called dat -------------
<- read_csv("01-Data/soil_profile_data.csv")
dat
# 4. Tidyverse functions ========================================
## 4.1 Select pid, hip, top, bottom, ph_h2o, cec from dat -------
<- dat %>%
dat_1 select(id_prof, id_hor, top, bottom, ph_h2o, cec)
## 4.2 Filter: pick observations by their values ----------------
# filter observations with cec > 50 cmolc/100g
<- dat_1 %>%
dat_2 filter(cec > 30)
dat_2## 4.3 Mutate: create a new variable ----------------------------
# thickness = top - bottom
<- dat_2 %>%
dat_3 mutate(thickness = bottom - top)
## 4.4 Group_by and summarise -----------------------------------
# group by variable pid
# summarise taking the mean of pH and cec
<- dat_3 %>%
dat_4 group_by(id_prof) %>%
summarise(mean_ph = mean(ph_h2o),
mean_cec = mean(cec))
## 4.5 Reshape the table using pivot_longer ---------------------
# use dat_3
# put the names of the variables ph_h2o,
# cec and thickness in the column
# variable and keep the rest of the table. Save in dat_5
<- dat_3 %>%
dat_5 pivot_longer(ph_h2o:thickness, names_to = "soil_property",
values_to = "value")
## 4.6 Join the table sites.csv with dat_3 ----------------------
# Load soil_phys_data030.csv (in 01-Data folder)
# Join its columns with dat_3 keeping all the rows of dat_3
# save the result as dat_6
<- read_csv("01-Data/soil_phys_data030.csv")
phys
<- phys %>% rename(id_prof = "ProfID")
phys
<- dat_3 %>%
dat_6 left_join(phys)
# or
<- phys %>%
dat_6 right_join(dat_3)
# 5. Data visualization with ggplot2 ============================
## 5.1 1D plot: histograms --------------------------------------
# histograms of cec and ph_h2o
ggplot(dat_3, aes(x=cec)) + geom_histogram()
## 5.2 2D plot: scatterplot -------------------------------------
# Scatterplot bottom vs. ph_h2o
ggplot(dat_3, aes(x = bottom, y = ph_h2o)) +
geom_point()
# add a fitting line
ggplot(dat_3, aes(x = bottom, y = ph_h2o)) +
geom_point() +
geom_smooth(method = "lm" )
## 5.3 3D plot: scatterplot -------------------------------------
# Scatterplot bottom vs. ph_h2o, add clay as color
# and size inside the
# function aes()
ggplot(dat_3,
aes(x = bottom, y = ph_h2o, color = cec, size = cec)) +
geom_point()
# 6. Geospatial data with terra =================================
## Load packages (install them if needed)
library(terra)
## 6.1 Load a raster and a vector layer -------------------------
# Load 01-Data/covs/grass.tif using rast() function, then plot it
# Load 01-Data/soil map/SoilTypes.shp using vect() function and
# plot it
# explore the attributes of these layers
<- rast("01-Data/Macedonia/grass.tif")
r plot(r)
<- vect("01-Data/Macedonia/SoilTypes.shp")
v plot(v)
## 6.2 Load a raster and a vector layer -------------------------
# Check the current CRS (EPSG) of the raster and the vector.
# Find a *projected* CRS in http://epsg.io for Macedonia and
# copy the number
# Check the Arguments of function project (?project) that need to
# be defined
# Save the new object as r_proj and v_proj
# plot both objects
<- project(x = r, y = "epsg:6204", method = "bilinear",
r_proj res = 250)
plot(r_proj)
<- project(x = v, y = "epsg:6204")
v_proj plot(v_proj, add = TRUE)
## 6.3 Cropping and masking a raster ----------------------------
# Compute the area of the polygons in v_proj
# (search for a function) and
# assign the values to a new column named area
# select the largest polygon using [], $, == and max() func.
# and save it as pol
# crop the raster with pol using the crop() function and save
#it as r_pol
# mask the raster r_pol with the polygon pol and save it
# with the same name
# plot each result
$area <- expanse(v_proj, unit = "ha")
v_proj<- v_proj[v_proj$area == max(v_proj$area)]
pol plot(pol)
<- crop(r_proj, pol)
r_pol plot(r_pol)
plot(pol, add = TRUE)
<- mask(r_pol, pol)
r_pol plot(r_pol)
## 6.4 Replace values in a raster by filtering their cells ------
# Explore the following link to understand how terra
#manage cell values
# https://rspatial.org/terra/pkg/4-algebra.html
# Replace values lower than 5 in r+pol by 0
$grass < 5] <- 0
r_pol[r_polplot(r_pol)
## 6.5 Rasterize a vector layer ---------------------------------
# Use rasterize() function to convert v_proj to raster
# Use r_proj as reference raster
# Use field Symbol to assign cell values, and plot the new map
<- rasterize(x = v_proj, y = r_proj, field = "Symbol" )
v_class plot(v_class)
v_classactiveCat(v_class) <- 1
## 6.6 Extracting raster values using points --------------------
# Covert dat_6 to spatial points using vect() function
# (check help of vect())
# Note that the EPSG number is 6204
# Save the points as s
# Plot s and r_proj together in the same map (Argument add=TRUE)
# Extract the values of the raster using extract()
# function (check the help)
# Remove the ID column of the extracted values
# merge the extracted data with s using cbind() function
# Convert s as a dataframe
<- vect(dat_6, geom=c("x", "y"), crs = "epsg:6204")
s plot(r_proj)
plot(s, add=TRUE)
<- extract(r_proj,s, ID=FALSE)
x <- cbind(s,x)
s <- as.data.frame(s)
d
d#GGally::ggscatmat(d)
## 6.7 Zonal statistics using polygons and rasters --------------
# Use the extract() func. to estimate the mean value of
# r_proj at each polygon
# Use the fun= argument (check the help)
# Use the cbind() func. to merge v_proj and the extracted values
# convert v_proj to a dataframe
# Create a ggplot boxplot (geom_boxplot) with x=Symbol
# and y=grass
<- extract(r_proj, v_proj, fun = mean, ID=FALSE)
x <- cbind(v_proj, x)
v_proj
<- as_tibble(v_proj)
d
%>%
d ggplot(aes(x =Symbol, y = grass, fill = Symbol)) +
geom_boxplot() +
ylab("Grass probability")
## 6.8 Bonus track: use zonal() function ------------------------
zonal(r_proj, v_class, na.rm=T)
Script 2: Data preparation
#
# Digital Soil Mapping
# Soil Profile Data
# Cleaning and Processing
#
# GSP-Secretariat
# Contact: Isabel.Luotto@fao.org
# Marcos.Angelini@fao.org
#________________________________________________________________
#Empty environment and cache
rm(list = ls())
gc()
# Content of this script ========================================
# The goal of this script is to organise the soil data for
# mapping, including:
#
# 0 - User-defined variables
# 1 - Set working directory and load necessary packages
# 2 - Import national data
# 3 - Select useful columns
# 4 - Quality check
# 5 - Estimate BD using pedotransfer function
# 6 - Harmonize soil layers
# 7 - Add chemical properties from additional dataset
# 8 - Plot and save results
#________________________________________________________________
# 0 - User-defined variables ====================================
<- "C:/GIT/GSNmap-TM/Digital-Soil-Mapping"
wd #wd <- 'C:/Users/hp/Documents/GitHub/GSNmap-TM/Digital-Soil-Mapping'
# 1 - Set working directory and load necessary packages =========
setwd(wd) # change the path accordingly
#(setwd(dirname(rstudioapi::getActiveDocumentContext()$path)))
library(tidyverse) # for data management and reshaping
library(readxl) # for importing excel files
library(mapview) # for seeing the profiles in a map
library(sf) # to manage spatial data (shp vectors)
library(aqp) # for soil profile data
library(mpspline2) # for horizon harmonization
library(plotly) # interactive plots
# 2 - Import national data ======================================
# Save your national soil dataset in the data folder
# /01-Data as a .csv file or
# as a .xlsx file
## 2.1 - for .xlsx files ----------------------------------------
# Import horizon data
# hor <- read_excel("01-Data/soil_data.xlsx", sheet = 2)
# # Import site-level data
# site <- read_excel("01-Data/soil_data.xlsx", sheet = 1)
# chem <- read_excel("01-Data/soil_data.xlsx", sheet = 2)
# phys <- read_excel("01-Data/soil_data.xlsx", sheet = 3)
## 2.2 - for .csv files -----------------------------------------
# Import horizon data
<-
hor read_csv(file =
"01-Data/soil_profile_data.csv",show_col_types = FALSE)
<-
chem read_csv(file =
"01-Data/soil_chem_data030.csv",show_col_types = FALSE)
<- read_csv(file =
phys "01-Data/soil_phys_data030.csv",show_col_types = FALSE)
<- select(hor, id_prof, x, y) %>% unique()
site <- select(hor, id_prof, id_hor, top:cec)
hor
# change names of key columns
names(site)
names(site)[1] <- "ProfID"
names(hor)
names(hor)[1] <- "ProfID"
names(hor)[2] <- "HorID"
# scan the data
summary(site)
summary(hor)
# 3 - select useful columns =====================================
## 3.1 - select AND RENAME columns ------------------------------
<- select(hor, ProfID, HorID, top, bottom, ph=ph_h2o,
hor
k, soc, bd, cec)
# 4 - Quality check =============================================
## 4.1 - Check locations ----------------------------------------
# Macedonian State Coordinate System EPSG:6204
# https://epsg.io/6204
%>%
site st_as_sf(coords = c("x", "y"), crs = 4326) %>% # convert to
#spatial object
mapview(zcol = "ProfID", cex = 3, lwd = 0.1) # visualise in an
#interactive map
<- site[site$ProfID=='2823',]
site_sub
%>%
site_sub st_as_sf(coords = c("x", "y"), crs = 4326) %>% # convert to
#spatial object
mapview(zcol = "ProfID", cex = 3, lwd = 0.1) # visualise in an
#interactive map
# profile 2823 is wrongly located, so let's remove it
<- filter(site, ProfID != 2823)
site
## 4.2 - Convert data into a Soil Profile Collection ------------
::depths(hor) <- ProfID ~ top + bottom
aqp@site$ProfID <- as.numeric(hor@site$ProfID)
horsite(hor) <- left_join(site(hor), site)
<- hor
profiles
profiles
## 4.3 - plot first 20 profiles using pH as color ---------------
plotSPC(x = profiles[1:20], name = "pH", color = "ph",
name.style = "center-center")
## 4.4 - check data integrity -----------------------------------
# A valid profile is TRUE if all of the
#following criteria are false:
# + depthLogic : boolean, errors related to depth logic
# + sameDepth : boolean, errors related to same top/bottom
#depths
# + missingDepth : boolean, NA in top / bottom depths
# + overlapOrGap : boolean, gaps or overlap in adjacent
#horizons
::checkHzDepthLogic(profiles)
aqp
# Identify non-valid profiles
<- checkHzDepthLogic(profiles)
dl $depthLogic==T | dl$sameDepth==T | dl$missingDepth==T |
dl[dl$overlapOrGap==T,"ProfID"]
dl
# visualize some of these profiles by the pid
subset(profiles, grepl(6566, ProfID, ignore.case = TRUE))
subset(profiles, grepl(7410 , ProfID, ignore.case = TRUE))
subset(profiles, grepl(8002, ProfID, ignore.case = TRUE))
## 4.5 - keep only valid profiles -------------------------------
<- HzDepthLogicSubset(profiles)
clean_prof metadata(clean_prof)$removed.profiles
# write_rds(clean_prof, "01-Data/soilProfileCollection.rds")
## 4.6 convert soilProfileCollection to a table -----------------
<- left_join(clean_prof@site, clean_prof@horizons)
dat <- select(dat, ProfID, HorID, x, y, top, bottom, ph:cec )
dat
# 5 - Estimate BD using pedotransfer functions ==================
# create the function with all PTF
<- c("Saini1996", "Drew1973", "Jeffrey1979",
method_names "Grigal1989",
"Adams1973", "Honeyset_Ratkowsky1989")
<- function(SOC=NULL, method=NULL) {
estimateBD <- SOC * 1.724
OM <- switch(method,
BD "Saini1996" = 1.62 - 0.06 * OM,
"Drew1973" = 1 / (0.6268 + 0.0361 * OM),
"Jeffrey1979" = 1.482 - 0.6786 * (log(OM)),
"Grigal1989" = 0.669 + 0.941 * exp(1)^(-0.06 * OM),
"Adams1973" = 100 / (OM / 0.244 + (100 - OM) / 2.65),
"Honeyset_Ratkowsky1989" = 1 / (0.564 + 0.0556 * OM),
stop("Invalid method specified.")
)return(BD)
}
# 5.1 - Select a pedotransfer function --------------------------
# Create a test dataset with BD and SOC data
<- data.frame(SOC = dat$soc, BD_observed = dat$bd)
BD_test
# Remove missing values
<- BD_test[complete.cases(BD_test),]
BD_test <- na.omit(BD_test)
BD_test
# 5.2 - Estimate BLD for a subset using the
# pedotransfer functions ------------
for (i in method_names) {
<- estimateBD(BD_test$SOC, method = i)
BD_test[[i]]
}
# Print the resulting data frame
BD_test
## 5.3 Compare results ------------------------------------------
# Observed values:
summary(BD_test)
# Compare data distributions for observed and predicted BLD
<- BD_test %>%
plot.bd select(-SOC) %>%
pivot_longer(cols = c("BD_observed", "Saini1996", "Drew1973",
"Jeffrey1979",
"Grigal1989", "Adams1973",
"Honeyset_Ratkowsky1989"),
names_to = "Method", values_to = "BD") %>%
ggplot(aes(x = BD, color = Method)) +
geom_density()
plot.bd
# Dymanic plot with plotly
ggplotly(plot.bd)
ggplotly(plot.bd) %>%
layout(hovermode = "x")
# Plot the Selected function again
%>%
BD_test select(-SOC) %>%
pivot_longer(cols = c("BD_observed", "Honeyset_Ratkowsky1989"),
names_to = "Method", values_to = "BD") %>%
ggplot(aes(x = BD, color = Method)) +
geom_density() + xlim(c(0,2.5))
# Same dynamic plot
ggplotly(BD_test %>%
select(-SOC) %>%
pivot_longer(cols = c("BD_observed",
"Honeyset_Ratkowsky1989"),
names_to = "Method", values_to = "BD") %>%
ggplot(aes(x = BD, color = Method)) +
geom_density() + xlim(c(0,2.5))) %>%
layout(hovermode = "x")
## 5.4 Estimate BD for the missing horizons ---------------------
$bd[is.na(dat$bd)] <-
datestimateBD(dat[is.na(dat$bd),]$soc,
method="Honeyset_Ratkowsky1989")
# Explore the results
summary(dat$bd)
<- BD_test %>%
g select(-SOC) %>%
pivot_longer(cols = c("BD_observed"),
names_to = "Method", values_to = "BD") %>%
ggplot(aes(x = BD, color = Method)) +
geom_density() +
xlim(c(0,2.5))
+ geom_density(data = dat, aes(x=bd,
g color = "Predicted +\n observed"))
## 5.5 - Explore outliers ---------------------------------------
# Outliers should be carefully explored and compared with
#literature values.
# Only if it is clear that outliers represent
#impossible or highly unlikely
# values, they should be removed as errors.
#
# Carbon content higher than 15% is only typical for organic soil
# (histosols)
# We will remove all atypically high SOC as outliers
summary(dat$soc)
na.omit(dat$ProfID[dat$soc > 10])
$ProfID[dat$soc > 10][!is.na(dat$ProfID[dat$soc > 10])]
dat
<- dat[dat$ProfID != 6915,]
dat <- dat[dat$ProfID != 7726,]
dat
<- dat[!(dat$ProfID %in% dat$ProfID[dat$soc > 10]
dat!is.na(dat$ProfID[dat$soc > 10])]),]
[
# Explore bulk density data, identify outliers
# remove layers with Bulk Density < 1 g/cm^3
<- na.omit(dat$ProfID[dat$bd<1])
low_bd_profiles <- dat[!(dat$ProfID %in% low_bd_profiles),]
dat
# Explore data, identify outliers
<- pivot_longer(dat, cols = ph:cec, values_to = "value",
x names_to = "soil_property")
<- na.omit(x)
x ggplot(x, aes(x = soil_property, y = value,
fill = soil_property)) +
geom_boxplot() +
facet_wrap(~soil_property, scales = "free")
# 6 - Harmonize soil layers =====================================
source("03-Scripts/.spline_functions.R")
## 6.1 - Set target soil properties and depths ------------------
names(dat)
<- select(dat, ProfID, HorID, x, y, top, bottom,
dat
ph, k, soc, bd, cec)
<- c("ph", "k", "soc", "bd", "cec")
target <- c(0,30)
depths
## 6.2 - Create standard layers ---------------------------------
<- apply_mpspline_all(df = dat, properties = target,
splines depth_range = depths)
summary(splines)
# merge splines with x and y
<- unique(select(dat, ProfID, x, y))
d <- left_join(d, splines)
d
# 7 - Harmonize units ===========================================
#Harmonize units if different from target units
# Mandatory Soil Propertes and corresponing units:
# Total N - ppm
# Available P - ppm
# Available K - ppm
# Cation exchange capacity cmolc/kg
# pH
# SOC - %
# Bulk density g/cm3
# Soil fractions (clay, silt and sand) -
# Units soil profile data (dataframe d)
#
head(d) # pH; K cmolc/kg; SOC %; BD g/cm3; CEC cmolc/kg
# K => convert cmolc/kg to ppm (K *10 * 39.096)
$k_0_30 <- d$k_0_30 * 10 * 39.096
d
head(chem)# P ppm; N %; K ppm
# N => convert % to ppm (N * 10000)
$tn <-chem$tn*10000
chem
head(phys)# clay, sand, silt g/kg
# convert g/kg to % (/10)
$clay_0_30 <-phys$clay_0_30/10
phys$sand_0_30 <-phys$sand_0_30 /10
phys$silt_0_30 <-phys$silt_0_30/10
phys
# Correct negative clay content
$clay_0_30[phys$clay_0_30<0] <- 0
phys
# Add chemical and physical properties from additional datasets =
# Rename columns to match the main data set
names(d)
names(chem)[1] <- 'ProfID'
names(chem)[4] <- 'p_0_30'
names(chem)[5] <- 'k_0_30'
names(chem)[6] <- 'n_0_30'
#The chem dataframe comes from and independent
#dataset we need to create new unique ProfIDs
#Create unique ProfID
$ProfID <- seq(max(d$ProfID)+1,max(d$ProfID)+1+nrow(chem)-1)
chem
# Add the new data as new rows using dplyr
# we can add empty rows
# automatically for the not measured
# properties in the chem dataset
<- bind_rows(d, chem)
d
#The phys dataframe with the texture instead shares the
# same ProfIDs (we can directly merge)
<- left_join(d, phys, by=c('ProfID', 'x', 'y'))
d
# 8 - Plot and save results ====================================
names(d)
<- pivot_longer(d, cols = ph_0_30:silt_0_30,
x values_to = "value",
names_sep = "_",
names_to = c("soil_property",
"top", "bottom"))
<- mutate(x, depth = paste(top, "-" , bottom))
x #x <- na.omit(x)
ggplot(x, aes(x = depth, y = value, fill = soil_property)) +
geom_boxplot() +
facet_wrap(~soil_property, scales = "free")
ggplotly(ggplot(x, aes(x = depth, y = value,
fill = soil_property)) +
geom_boxplot() +
facet_wrap(~soil_property, scales = "free"))
# save the data
write_csv(d, "02-Outputs/harmonized_soil_data.csv")
Scripts 3: Download environmental covariates
var assets =
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/bio1",
["projects/digital-soil-mapping-gsp-fao/assets/CHELSA/bio12",
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/bio13",
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/bio14",
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/bio16",
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/bio17",
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/bio5",
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/bio6",
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/ngd10",
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/pet_penman_max",
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/pet_penman_mean",
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/pet_penman_min",
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/pet_penman_range",
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/sfcWind_max",
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/sfcWind_mean",
"projects/digital-soil-mapping-gsp-fao/assets/CHELSA/sfcWind_range",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/fpar_030405_500m_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/fpar_030405_500m_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/fpar_060708_500m_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/fpar_060708_500m_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/fpar_091011_500m_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/fpar_091011_500m_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/fpar_120102_500m_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/fpar_120102_500m_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/lstd_030405_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/lstd_030405_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/lstd_060708_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/lstd_060708_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/lstd_091011_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/lstd_091011_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/lstd_120102_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/lstd_120102_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndlst_030405_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndlst_030405_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndlst_060708_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndlst_060708_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndlst_091011_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndlst_091011_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndlst_120102_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndlst_120102_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndvi_030405_250m_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndvi_030405_250m_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndvi_060708_250m_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndvi_060708_250m_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndvi_091011_250m_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndvi_091011_250m_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndvi_120102_250m_mean",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/ndvi_120102_250m_sd",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/snow_cover",
"projects/digital-soil-mapping-gsp-fao/assets/MODIS/swir_060708_500m_mean",
"projects/digital-soil-mapping-gsp-fao/assets/LANDCOVER/crops",
"projects/digital-soil-mapping-gsp-fao/assets/LANDCOVER/flooded_vegetation",
"projects/digital-soil-mapping-gsp-fao/assets/LANDCOVER/grass",
"projects/digital-soil-mapping-gsp-fao/assets/LANDCOVER/shrub_and_scrub",
"projects/digital-soil-mapping-gsp-fao/assets/LANDCOVER/trees",
"projects/digital-soil-mapping-gsp-fao/assets/OPENLANDMAP/dtm_curvature_250m",
"projects/digital-soil-mapping-gsp-fao/assets/OPENLANDMAP/dtm_downslopecurvature_250m",
"projects/digital-soil-mapping-gsp-fao/assets/OPENLANDMAP/dtm_dvm2_250m",
"projects/digital-soil-mapping-gsp-fao/assets/OPENLANDMAP/dtm_dvm_250m",
"projects/digital-soil-mapping-gsp-fao/assets/OPENLANDMAP/dtm_elevation_250m",
"projects/digital-soil-mapping-gsp-fao/assets/OPENLANDMAP/dtm_mrn_250m",
"projects/digital-soil-mapping-gsp-fao/assets/OPENLANDMAP/dtm_neg_openness_250m",
"projects/digital-soil-mapping-gsp-fao/assets/OPENLANDMAP/dtm_pos_openness_250m",
"projects/digital-soil-mapping-gsp-fao/assets/OPENLANDMAP/dtm_slope_250m",
"projects/digital-soil-mapping-gsp-fao/assets/OPENLANDMAP/dtm_tpi_250m",
"projects/digital-soil-mapping-gsp-fao/assets/OPENLANDMAP/dtm_twi_500m",
"projects/digital-soil-mapping-gsp-fao/assets/OPENLANDMAP/dtm_upslopecurvature_250m",
"projects/digital-soil-mapping-gsp-fao/assets/OPENLANDMAP/dtm_vbf_250m",
"projects/digital-soil-mapping-gsp-fao/assets/POP/hfp2013_merisINT",
"projects/digital-soil-mapping-gsp-fao/assets/POP/night_lights_stable_2013",
"projects/digital-soil-mapping-gsp-fao/assets/POP/population_density_2020"];
// Load borders
/// Using UN 2020 (replace the countries to download)
var ISO = ['ITA'];
var aoi =
.FeatureCollection(
ee'projects/digital-soil-mapping-gsp-fao/assets/UN_BORDERS/BNDA_CTY'
).filter(ee.Filter.inList('ISO3CD', ISO));
var region = aoi.geometry();
/// Using a shapefile
/// 1. Upload the borders of your countries as an asset
/// 2. Replace 'your_shapefile' with the path to your shapefile
// var shapefile =
//ee.FeatureCollection('users/your_username/your_shapefile');
// var region = shapefile.geometry().bounds();
// Load assets as ImageCollection
var assetsCollection = ee.ImageCollection(assets);
// Clip each image in the collection to the region of interest
var clippedCollection = assetsCollection.map(function(img){
return img.clip(region).toFloat();
;
})
// Function to replace masked values with zeroes for fpar bands
function replaceMaskedFpar(img) {
var allBands = img.bandNames();
var fparBands =
.filter(ee.Filter.stringStartsWith('item', 'fpar'));
allBandsvar nonFparBands = allBands.removeAll(fparBands);
var fparImg = img.select(fparBands).unmask(0);
var nonFparImg = img.select(nonFparBands);
// If there are no fpar bands, return the original image
var result = ee.Algorithms.If(fparBands.length().eq(0),
,
img.addBands(fparImg));
nonFparImg
return ee.Image(result);
}
// Clip each image in the collection to the region of
//interest and replace masked values for fpar bands
var clippedCollection = assetsCollection.map(function(img){
var clippedImg = img.clip(region).toFloat();
return replaceMaskedFpar(clippedImg);
;
})
// Stack the layers and maintain the
//layer names in the final file
var stacked = clippedCollection.toBands();
// Get the list of asset names
var assetNames = ee.List(assets).map(function(asset) {
return ee.String(asset).split('/').get(-1);
;
})
// Rename the bands with asset names
var renamed = stacked.rename(assetNames);
print(renamed, 'Covariates to be exported')
// Visualize the result
// Set a visualization parameter
// (you can adjust the colors as desired)
var visParams = {
bands: 'bio1',
min: 0,
max: 1,
palette: ['blue', 'green', 'yellow', 'red']
;
}
// Add the layer to the map
Map.centerObject(renamed, 6)
Map.addLayer(renamed, visParams, 'Covariates');
// Export the stacked image to Google Drive
.image.toDrive({
Exportimage: renamed,
description: 'covariates',
folder: 'GEE',
scale: 250,
maxPixels: 1e13,
region: region
;
})
/* Create mask for croplands ----------------------------*/
// Load the Copernicus Global Land Service image collection
var imageCollection =
.Image("COPERNICUS/Landcover/100m/Proba-V-C3/Global/2019")
ee.select("discrete_classification")
.clip(region)
var crs = 'EPSG:4326'; // WGS84
var res = 250; // Resolution in decimal degrees
// Default resampling is nearest neighbor
var image1 = imageCollection.resample()
.reproject({
crs: crs, // Add your desired CRS here
scale: res // Add your desired scale here
;
})
// Reclassify the land cover classes
var inList = [0, 20, 30, 40, 50, 60, 70, 80, 90, 100,
111, 112, 113, 114, 115, 116,
121, 122, 123, 124, 125, 126, 200];
var outList = [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
var FAO_lu = image1.remap(inList, outList)
.toDouble()
.clip(region);
// print(FAO_lu)
// Convert 0 to NA
var mask = FAO_lu.neq(0);
print(mask)
= FAO_lu.updateMask(mask);
FAO_lu
print(FAO_lu, "Mask")
var visParams = {
bands: 'remapped',
min: 0,
max: 1,
palette: ['green', 'yellow']
;
}
// Add the layer to the map
Map.addLayer(FAO_lu,visParams ,'Mask');
// Export the land cover image as a raster to Google Drive
.image.toDrive({
Exportimage: FAO_lu,
folder: 'GEE',
description: 'mask',
scale: res, // Add your desired scale here
region: region,
crs: crs, // Add your desired CRS here
maxPixels: 1e13 // Add a maximum number of pixels
//for export if needed
; })
Script 4: Modelling, validation and prediction using soil data with coordinates
#________________________________________________________________
#
# Quantile Regression Forest
# Soil Property Mapping
#
# GSP-Secretariat
# Contact: Isabel.Luotto@fao.org
# Marcos.Angelini@fao.org
#________________________________________________________________
#Empty environment and cache
rm(list = ls())
gc()
# Content of this script ========================================
# 0 - Set working directory, soil attribute, and packages
# 1 - Merge soil data with environmental covariates
# 2 - Covariate selection
# 3 - Model calibration
# 4 - Uncertainty assessment
# 5 - Prediction
# 6 - Export final maps
#________________________________________________________________
# 0 - Set working directory, soil attribute, and packages =======
# Working directory
<- 'C:/User/Documents/GitHub/GSNmap-TM/Digital-Soil-Mapping'
wd setwd(wd)
# Define country of interes throuhg 3-digit ISO code
='ISO'
ISO
# Load Area of interest (shp)
<- '01-Data/AOI.shp'
AOI
# Terget soil attribute (Mandatory 10)
<- c("ph_0_30", "k_0_30" ,
target_properties"soc_0_30" ,"bd_0_30", "cec_0_30","p_0_30",
"n_0_30","clay_0_30", "sand_0_30"
"silt_0_30")
,
# Function for Uncertainty Assessment
load(file = "03-Scripts/eval.RData")
#load packages
library(tidyverse)
library(data.table)
library(caret)
library(quantregForest)
library(terra)
library(sf)
library(doParallel)
# 1 - Merge soil data with environmental covariates =============
## 1.1 - Load covariates ----------------------------------------
<- list.files(path= '01-Data/covs/', pattern = '.tif$',
files full.names = T)
<- list.files(path= '01-Data/covs/', pattern = '.tif$',
ncovs full.names = F)
#In case of extent error, or if covariates other
#than the default ones are added
# ref <- rast(files[1])
# covs <- list()
# for (i in seq_along(files)) {
# r <- rast(files[i])
# r <- project(r, ref)
# covs[[i]] <- r
# }
# covs <- rast(covs)
<- rast(files)
covs<- filename <- sub('.tif', '', ncovs)
ncovs
=="dtm_neg-openness_250m"] = 'dtm_neg'
ncovs[ncovs=="dtm_pos-openness_250m"] = 'dtm_pos'
ncovs[ncovsnames(covs) <- ncovs
## 1.2 - Load the soil data (Script 2) -------------------------
<- read_csv("02-Outputs/harmonized_soil_data.csv")
dat
# Convert soil data into a spatial object
#(check https://epsg.io/6204)
<- vect(dat, geom=c("x", "y"), crs = crs(covs))
dat
# Reproject point coordinates to match coordinate
#system of covariates
<- terra::project(dat, covs)
dat names(dat)
## 1.3 - Extract values from covariates to the soil points ------
<- terra::extract(x = covs, y = dat, xy=F)
pv <- cbind(dat,pv)
dat <- as.data.frame(dat)
dat
summary(dat)
for(soilatt in unique(target_properties)){
## 1.4 - Target soil attribute + covariates ---------------------
<- dplyr::select(dat, soilatt, names(covs))
d <- na.omit(d)
d
# 2 - Covariate selection with RFE ==============================
## 2.1 - Setting parameters -------------------------------------
# Repeatedcv = 3-times repeated 10-fold cross-validation
<- rfeControl(functions = rfFuncs,
fitControl method = "repeatedcv",
number = 10, ## 10 -fold CV
repeats = 3, ## repeated 3 times
verbose = TRUE,
saveDetails = TRUE,
returnResamp = "all")
# Set the regression function
= as.formula(paste(soilatt," ~", paste0(ncovs,
fm collapse = "+")))
# Calibrate the model using multiple cores
<- makeCluster(detectCores()-1)
cl registerDoParallel(cl)
## 2.2 - Calibrate a RFE model to select covariates -------------
<- rfe(fm,
covsel data = d,
sizes = seq(from=10, to=length(ncovs)-1, by = 5),
rfeControl = fitControl,
verbose = TRUE,
keep.inbag = T)
stopCluster(cl)
saveRDS(covsel, "02-Outputs/models/covsel.rda")
## 2.3 - Plot selection of covariates ---------------------------
trellis.par.set(caretTheme())
plot(covsel, type = c("g", "o"))
# Extract selection of covariates and subset covs
<- predictors(covsel)
opt_covs
# 3 - QRF Model calibration =====================================
## 3.1 - Update formula with the selected covariates ------------
<- as.formula(paste(soilatt," ~",
fm paste0(opt_covs, collapse = "+")))
# parallel processing
<- makeCluster(detectCores()-1)
cl registerDoParallel(cl)
## 3.2 - Set training parameters --------------------------------
<- trainControl(method = "repeatedcv",
fitControl number = 10, ## 10 -fold CV
repeats = 3, ## repeated 3 times
savePredictions = TRUE)
# Tune mtry hyperparameters
<- round(length(opt_covs)/3)
mtry <- expand.grid(mtry = c(mtry-5, mtry, mtry+5))
tuneGrid
## 3.3 - Calibrate the QRF model --------------------------------
<- caret::train(fm,
model data = d,
method = "qrf",
trControl = fitControl,
verbose = TRUE,
tuneGrid = tuneGrid,
keep.inbag = T,
importance = TRUE)
stopCluster(cl)
gc()
## 3.4 - Extract predictor importance as relative values (%)
<- randomForest::importance(model$finalModel)
x $importance <- x
model## 3.5 - Print and save model -----------------------------------
print(model)
saveRDS(model, file = paste0("02-Outputs/models/model_",
".rds"))
soilatt,
# 4 - Uncertainty assessment ====================================
# extract observed and predicted values
<- model$pred$obs
o <- model$pred$pred
p <- data.frame(o,p)
df
## 4.1 - Plot and save scatterplot ------------------------------
<- ggplot(df, aes(x = o, y = p)) +
(g1 geom_point(alpha = 0.1) +
geom_abline(slope = 1, intercept = 0, color = "red")+
ylim(c(min(o), max(o))) + theme(aspect.ratio=1)+
labs(title = soilatt) +
xlab("Observed") + ylab("Predicted"))
# ggsave(g1, filename = paste0("02-Outputs/residuals_",
".png"), scale = 1,
soilatt,# units = "cm", width = 12, height = 12)
## 4.2 - Print accuracy coeficients -----------------------------
# https://github.com/AlexandreWadoux/MapQualityEvaluation
eval(p,o)
## 4.3 - Plot Covariate importance ------------------------------
<- varImpPlot(model$finalModel, main = soilatt, type = 1))
(g2
# png(filename = paste0("02-Outputs/importance_",soilatt,".png"),
# width = 15, height = 15, units = "cm", res = 600)
# g2
# dev.off()
# 5 - Prediction ================================================
# Generation of maps (prediction of soil attributes)
## 5.1 - Produce tiles ------------------------------------------
<-covs[[1]]
r <- rast(nrows = 5, ncols = 5, extent = ext(r), crs = crs(r))
t <- makeTiles(r, t,overwrite=TRUE,filename=
tile "02-Outputs/tiles/tiles.tif")
## 5.2 - Predict soil attributes per tiles ----------------------
# loop to predict on each tile
for (j in seq_along(tile)) {
gc()
<- rast(tile[j])
t <- crop(covs, t)
covst
# plot(r)#
<- terra::predict(covst, model =
pred_mean $finalModel, na.rm=TRUE,
modelcpkgs="quantregForest", what=mean)
<- terra::predict(covst, model =
pred_sd $finalModel, na.rm=TRUE,
modelcpkgs="quantregForest", what=sd)
# ###### Raster package solution (in case terra results
# in many NA pixels)
# library(raster)
# covst <- stack(covst)
# class(final_mod$finalModel) <-"quantregForest"
# # Estimate model uncertainty
# pred_sd <- predict(covst,model=final_mod$finalModel,type=sd)
# # OCSKGMlog prediction based in all available data
# pred_mean <- predict(covst,model=final_mod)
#
#
# ##################################
writeRaster(pred_mean,
filename = paste0("02-Outputs/tiles/soilatt_tiles/",
"_tile_", j, ".tif"),
soilatt,overwrite = TRUE)
writeRaster(pred_sd,
filename = paste0("02-Outputs/tiles/soilatt_tiles/",
"_tileSD_", j, ".tif"),
soilatt,overwrite = TRUE)
rm(pred_mean)
rm(pred_sd)
print(paste("tile",tile[j]))
}
## 5.3 - Merge tiles both prediction and st.Dev -----------------
<- list.files(path = "02-Outputs/tiles/soilatt_tiles/",
f_mean pattern = paste0(soilatt,"_tile_"),
full.names = TRUE)
<- list.files(path = "02-Outputs/tiles/soilatt_tiles/",
f_sd pattern = paste0(soilatt,"_tileSD_"),
full.names = TRUE)
<- list()
r_mean_l <- list()
r_sd_l
for (g in 1:length(f_mean)){
<- rast(f_mean[g])
r <-r
r_mean_l[g] rm(r)
}
for (g in 1:length(f_sd)){
<- rast(f_sd[g])
r <-r
r_sd_l[g] rm(r)
}<-sprc(r_mean_l)
r_mean <-sprc(r_sd_l)
r_sd <- mosaic(r_mean)
pred_mean <- mosaic(r_sd)
pred_sd
<- vect(AOI)
aoi <- mask(pred_mean,aoi)
pred_mean <- mask(pred_sd,aoi)
pred_sd
plot(pred_mean)
plot(pred_sd)
# 6 - Export final maps =========================================
## 6.1 - Mask croplands -----------------------------------------
<- rast("01-Data/mask.tif")
msk plot(msk)
<- mask(pred_mean, msk)
pred_mean plot(pred_mean)
<- mask(pred_sd, msk)
pred_sd plot(pred_sd)
plot(pred_sd/pred_mean*100, main = paste("CV",soilatt))
## 6.2 - Save results -------------------------------------------
# Harmonized naming
if (soilatt == 'ph_0_30'){
<-'_GSNmap_pH_Map030.tiff'
name else if (soilatt == 'k_0_30'){
}<-'_GSNmap_Ktot_Map030.tiff'
name else if (soilatt == 'soc_0_30'){
}<-'_GSNmap_SOC_Map030.tiff'
name else if (soilatt == 'clay_0_30'){
}<-'_GSNmap_Clay_Map030.tiff'
name else if (soilatt == 'bd_0_30'){
}<-'_GSNmap_BD_Map030.tiff'
name else if (soilatt == 'cec_0_30'){
}<-'_GSNmap_CEC_Map030.tiff'
name else if (soilatt == 'p_0_30'){
}<-'_GSNmap_Pav_Map030.tiff'
name else if (soilatt == 'n_0_30'){
}<-'_GSNmap_Ntot_Map030.tiff'
name else if (soilatt == 'sand_0_30'){
}<-'_GSNmap_Sand_Map030.tiff'
name else if (soilatt == 'silt_0_30'){
}<-'_GSNmap_Silt_Map030.tiff'
name
}
writeRaster(pred_mean,
paste0("02-Outputs/maps/",ISO,name),
overwrite=TRUE)
writeRaster(pred_sd,
paste0("02-Outputs/maps/",ISO, '_SD',name),
overwrite=TRUE)
}