This RNotebook script reads a netCDF file with the Long Term Global Mean Average Monthly Rate of Precipitation and plots the twelve months.
First we need to load some libraries
# load libraries
library(ncdf4)
library(CFtime)
library(lattice)
library(RColorBrewer)
library(terra)
## terra 1.7.65
Let’s bring in our netCDF file and open it
# set path and file name
ncpath <- "/Users/elizalawrence/Documents/GEOG495/data/nc_files/"
ncname <- "CMAP_precip.mon.ltm" # (long-term means)
ncfname <- paste(ncpath, ncname, ".nc", sep="")
dname <- "precip"
# open a netCDF file
ncin <- nc_open(ncfname, write = TRUE)
print(ncin)
## File /Users/elizalawrence/Documents/GEOG495/data/nc_files/CMAP_precip.mon.ltm.nc (NC_FORMAT_CLASSIC):
##
## 3 variables (excluding dimension variables):
## double climatology_bounds[nbnds,time]
## long_name: Climate Time Boundaries
## units: hours since 1800-01-01 00:00:0.0
## float precip[lon,lat,time]
## long_name: Long Term Mean Average Monthly Rate of Precipitation
## valid_range: 0
## valid_range: 70
## units: mm/day
## add_offset: 0
## scale_factor: 1
## missing_value: -9.96920996838687e+36
## precision: 2
## least_significant_digit: 2
## var_desc: Precipitation
## dataset: CPC Merged Analysis of Precipitation Enhanced
## level_desc: Surface
## statistic: Long Term Mean
## parent_stat: Mean
## actual_range: 0
## actual_range: 22.3866691589355
## short valid_yr_count[lon,lat,time]
## long_name: count of non-missing values used in mean
## missing_value: 32767
## add_offset: 0
## scale_factor: 1
##
## 4 dimensions:
## lon Size:144
## units: degrees_east
## long_name: Longitude
## actual_range: 1.25
## actual_range: 358.75
## standard_name: longitude
## axis: X
## lat Size:72
## units: degrees_north
## actual_range: 88.75
## actual_range: -88.75
## long_name: Latitude
## standard_name: latitude
## axis: Y
## time Size:12
## units: hours since 1800-01-01 00:00:0.0
## long_name: Time
## delta_t: 0000-01-00 00:00:00
## avg_period: 0030-00-00 00:00:00
## prev_avg_period: 0000-01-00 00:00:00
## standard_name: time
## axis: T
## actual_range: -15769752
## actual_range: -15761736
## climatology: climatology_bounds
## climo_period: 1981/01/01 - 2010/12/31
## ltm_range: 1586616
## ltm_range: 1848840
## interpreted_actual_range: 0001/01/01 00:00:00 - 0001/12/01 00:00:00
## nbnds Size:2 (no dimvar)
##
## 12 global attributes:
## Conventions: COARDS
## title: CPC Merged Analysis of Precipitation (includes NCEP Reanalysis)
## platform: Analyses
## source: ftp ftp.cpc.ncep.noaa.gov precip/cmap/monthly
## dataset_title: CPC Merged Analysis of Precipitation
## documentation: https://www.esrl.noaa.gov/psd/data/gridded/data.cmap.html
## References: https://www.esrl.noaa.gov/psd/data/gridded/data.cmap.html
## date_modified: 26 Feb 2019
## version: V1905
## history: Created 2019/05/09 by doMonthLTM
## data_modified: 2019-05-09
## not_missing_threshold_percent: minimum 3% values input to have non-missing output value
Let’s figure out our longitude, latitude, and time of the netCDF file
# get longitude and latitude
lon <- ncvar_get(ncin,"lon")
nlon <- dim(lon)
list(lon)
## [[1]]
## [1] 1.25 3.75 6.25 8.75 11.25 13.75 16.25 18.75 21.25
## [10] 23.75 26.25 28.75 31.25 33.75 36.25 38.75 41.25 43.75
## [19] 46.25 48.75 51.25 53.75 56.25 58.75 61.25 63.75 66.25
## [28] 68.75 71.25 73.75 76.25 78.75 81.25 83.75 86.25 88.75
## [37] 91.25 93.75 96.25 98.75 101.25 103.75 106.25 108.75 111.25
## [46] 113.75 116.25 118.75 121.25 123.75 126.25 128.75 131.25 133.75
## [55] 136.25 138.75 141.25 143.75 146.25 148.75 151.25 153.75 156.25
## [64] 158.75 161.25 163.75 166.25 168.75 171.25 173.75 176.25 178.75
## [73] -178.75 -176.25 -173.75 -171.25 -168.75 -166.25 -163.75 -161.25 -158.75
## [82] -156.25 -153.75 -151.25 -148.75 -146.25 -143.75 -141.25 -138.75 -136.25
## [91] -133.75 -131.25 -128.75 -126.25 -123.75 -121.25 -118.75 -116.25 -113.75
## [100] -111.25 -108.75 -106.25 -103.75 -101.25 -98.75 -96.25 -93.75 -91.25
## [109] -88.75 -86.25 -83.75 -81.25 -78.75 -76.25 -73.75 -71.25 -68.75
## [118] -66.25 -63.75 -61.25 -58.75 -56.25 -53.75 -51.25 -48.75 -46.25
## [127] -43.75 -41.25 -38.75 -36.25 -33.75 -31.25 -28.75 -26.25 -23.75
## [136] -21.25 -18.75 -16.25 -13.75 -11.25 -8.75 -6.25 -3.75 -1.25
lat <- ncvar_get(ncin,"lat")
nlat <- dim(lat)
list(lat)
## [[1]]
## [1] 88.75 86.25 83.75 81.25 78.75 76.25 73.75 71.25 68.75 66.25
## [11] 63.75 61.25 58.75 56.25 53.75 51.25 48.75 46.25 43.75 41.25
## [21] 38.75 36.25 33.75 31.25 28.75 26.25 23.75 21.25 18.75 16.25
## [31] 13.75 11.25 8.75 6.25 3.75 1.25 -1.25 -3.75 -6.25 -8.75
## [41] -11.25 -13.75 -16.25 -18.75 -21.25 -23.75 -26.25 -28.75 -31.25 -33.75
## [51] -36.25 -38.75 -41.25 -43.75 -46.25 -48.75 -51.25 -53.75 -56.25 -58.75
## [61] -61.25 -63.75 -66.25 -68.75 -71.25 -73.75 -76.25 -78.75 -81.25 -83.75
## [71] -86.25 -88.75
print(c(nlon,nlat))
## [1] 144 72
# get time
time <- ncvar_get(ncin,"time")
time
## [1] -15769752 -15769008 -15768336 -15767592 -15766872 -15766128 -15765408
## [8] -15764664 -15763920 -15763200 -15762456 -15761736
tunits <- ncatt_get(ncin,"time","units")
tunits
## $hasatt
## [1] TRUE
##
## $value
## [1] "hours since 1800-01-01 00:00:0.0"
dim(time)
## [1] 12
nt <- dim(time)
nt
## [1] 12
The longitude values range from 0-360 instead of -180-180. Let’s fix that!
# Convert 0-360 to -180-180
lon[lon > 180] <- lon[lon > 180] - 360
# Update the longitude variable with the new values
ncvar_put(ncin, "lon", lon)
#confirm that the longitude values are updated
lon <- ncvar_get(ncin,"lon")
nlon <- dim(lon)
list(lon)
## [[1]]
## [1] 1.25 3.75 6.25 8.75 11.25 13.75 16.25 18.75 21.25
## [10] 23.75 26.25 28.75 31.25 33.75 36.25 38.75 41.25 43.75
## [19] 46.25 48.75 51.25 53.75 56.25 58.75 61.25 63.75 66.25
## [28] 68.75 71.25 73.75 76.25 78.75 81.25 83.75 86.25 88.75
## [37] 91.25 93.75 96.25 98.75 101.25 103.75 106.25 108.75 111.25
## [46] 113.75 116.25 118.75 121.25 123.75 126.25 128.75 131.25 133.75
## [55] 136.25 138.75 141.25 143.75 146.25 148.75 151.25 153.75 156.25
## [64] 158.75 161.25 163.75 166.25 168.75 171.25 173.75 176.25 178.75
## [73] -178.75 -176.25 -173.75 -171.25 -168.75 -166.25 -163.75 -161.25 -158.75
## [82] -156.25 -153.75 -151.25 -148.75 -146.25 -143.75 -141.25 -138.75 -136.25
## [91] -133.75 -131.25 -128.75 -126.25 -123.75 -121.25 -118.75 -116.25 -113.75
## [100] -111.25 -108.75 -106.25 -103.75 -101.25 -98.75 -96.25 -93.75 -91.25
## [109] -88.75 -86.25 -83.75 -81.25 -78.75 -76.25 -73.75 -71.25 -68.75
## [118] -66.25 -63.75 -61.25 -58.75 -56.25 -53.75 -51.25 -48.75 -46.25
## [127] -43.75 -41.25 -38.75 -36.25 -33.75 -31.25 -28.75 -26.25 -23.75
## [136] -21.25 -18.75 -16.25 -13.75 -11.25 -8.75 -6.25 -3.75 -1.25
The netCDF file only has one variable: precipitation, so let’s make an array of that
# get precipitation variable
precip_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(precip_array)
## [1] 144 72 12
Let’s get the global attributes of our data
# get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")
Then let’s close the netCDF file because we have already read it in
#close the netCDF file
nc_close(ncin)
Now we are ready to reshape the netCDF file from a raster to a rectangular 2-D array.
# decode time
cf <- CFtime(tunits$value, calendar = "proleptic_gregorian", time + 15769752) # convert time to CFtime class, add offset
cf
## CF datum of origin:
## Origin : 1800-01-01 00:00:00
## Units : hours
## Calendar: proleptic_gregorian
## CF time series:
## Elements: [1800-01-01 .. 1800-12-01] (average of 728.727273 hours between 12 elements)
timestamps <- CFtimestamp(cf) # get character-string times
timestamps
## [1] "1800-01-01T00:00:00" "1800-02-01T00:00:00" "1800-03-01T00:00:00"
## [4] "1800-04-01T00:00:00" "1800-05-01T00:00:00" "1800-06-01T00:00:00"
## [7] "1800-07-01T00:00:00" "1800-08-01T00:00:00" "1800-09-01T00:00:00"
## [10] "1800-10-01T00:00:00" "1800-11-01T00:00:00" "1800-12-01T00:00:00"
class(timestamps)
## [1] "character"
time_cf <- CFparse(cf, timestamps) # parse the string into date components
time_cf
class(time_cf)
## [1] "data.frame"
Some of the values of the precipitation variable are missing or not available, so let’s remove those and replace them with NA’s
# replace netCDF fill values with NA's
precip_array[precip_array==fillvalue$value] <- NA
length(na.omit(as.vector(precip_array[,,1])))
## [1] 10363
Now we are ready to create a data frame. First we will create a matrix with the rows of longitude and latitude pairs.
# create matrix -- reshape data
lonlat <- as.matrix(expand.grid(lon,lat))
dim(lonlat)
## [1] 10368 2
Then we will create a vector from the large array
# reshape the whole array into vector
precip_vec_long <- as.vector(precip_array)
length(precip_vec_long)
## [1] 124416
And now we will combine the precipitation vector and the longitude and latitude vector into a matrix
# reshape the vector into a matrix
precip_mat <- matrix(precip_vec_long, nrow=nlon*nlat, ncol=nt)
dim(precip_mat)
## [1] 10368 12
head(na.omit(precip_mat))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 0.2866667 0.2650000 0.2156667 0.2270000 0.4836667 0.8870000 1.128000
## [2,] 0.3140000 0.2730000 0.2240000 0.2460000 0.5023333 0.9350001 1.176000
## [3,] 0.3083333 0.2706667 0.2203334 0.2443334 0.4953333 0.9240000 1.158667
## [4,] 0.3049999 0.2683333 0.2136667 0.2426667 0.4910000 0.8863333 1.146000
## [5,] 0.3013333 0.2673333 0.2096667 0.2410000 0.4863333 0.8749999 1.133667
## [6,] 0.2989999 0.2686666 0.2083334 0.2403333 0.4826666 0.8920000 1.126000
## [,8] [,9] [,10] [,11] [,12]
## [1,] 0.7020000 0.5623333 0.3956667 0.2380000 0.2669999
## [2,] 0.7373334 0.6086666 0.4189999 0.2600000 0.2860000
## [3,] 0.7290000 0.6063334 0.4123333 0.2543333 0.2806667
## [4,] 0.7206668 0.6026667 0.4066667 0.2506667 0.2770000
## [5,] 0.7163334 0.6010000 0.4036666 0.2473333 0.2723333
## [6,] 0.7140000 0.6020000 0.4029999 0.2470000 0.2686667
Finally, we can create a data frame from the matrix we previously created
# create a data frame
lonlat <- as.matrix(expand.grid(lon,lat))
precip_df02 <- data.frame(cbind(lonlat,precip_mat))
names(precip_df02) <- c("lon","lat","precipJan","precipFeb","precipMar","precipApr", "precipMay","precipJun", "precipJul","precipAug","precipSep", "precipOct","precipNov","precipDec")
head(na.omit(precip_df02, 20))
dim(na.omit(precip_df02, 20))
## [1] 10332 14
# write out the data frame
csvpath <- "/Users/elizalawrence/Documents/GEOG495/data/csv_files"
csvname <- "CMAP_precip.mon.ltm_2.csv"
csvfile <- paste(csvpath, csvname, sep="")
write.table(na.omit(precip_df02),csvfile, row.names=FALSE, sep=",")
Now that we have our data frame, we can finally visualize our data! First we need to load some more packages
# load more packages
library(ggplot2)
library(maps)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:terra':
##
## extract
library(ggthemes)
library(stars)
## Loading required package: abind
We need to reshape the data frame because of the type of map we are trying to make - a facet wrap
# Reshape the data frame into long format, excluding 'lon' and 'lat'
df_long <- gather(precip_df02, key = "month", value = "value", -lon, -lat)
To make sure our months are in sequential order instead of alphabetical we need to define the order and the full names
# Define the order of months
month_order <- c("precipJan", "precipFeb", "precipMar", "precipApr","precipMay",
"precipJun","precipJul","precipAug","precipSep","precipOct",
"precipNov","precipDec") # Add more months as needed
month_names <- c("January", "February", "March", "April", "May", "June", "July",
"August", "September", "October", "November", "December") # Corresponding month names
# Convert 'month' variable to factor with custom order
df_long$month <- factor(df_long$month, levels = month_order, labels = month_names)
We also want to put outlines of the world map on our map of precipitation
#get world map data
# Specify the path to your shape file
shapefile_path <- "/Users/elizalawrence/Documents/GEOG495/data/ne_110m_coastline/ne_110m_coastline.shp"
# Read the shape file
coastline <- st_read(shapefile_path)
## Reading layer `ne_110m_coastline' from data source
## `/Users/elizalawrence/Documents/GEOG495/data/ne_110m_coastline/ne_110m_coastline.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 134 features and 3 fields
## Geometry type: LINESTRING
## Dimension: XY
## Bounding box: xmin: -180 ymin: -85.60904 xmax: 180 ymax: 83.64513
## Geodetic CRS: WGS 84
Let’s use ggplot to visualize the long term monthly means of global precipitation
# Create ggplot object with coastline and precipitation data
p <- ggplot() +
geom_tile(data = df_long, aes(x = lon, y = lat, fill = value)) + # Add precipitation data
scale_fill_gradient(low = "white", high = "blue", name = "mm/day") + # Color scale for precipitation
labs(x = "Longitude", y = "Latitude", title = "Long Term (1981-2010) Mean Average Monthly Rate of Precipitation") + # Labels and title
facet_wrap(~ month, nrow = 4) + # Facet wrap by month with 4 plots per row
scale_x_continuous(breaks = seq(-120, 120, by = 60), labels = seq(-120, 120, by = 60), expand = c(0, 0)) + # Adjust x-axis breaks
scale_y_continuous(breaks = seq(-60, 60, by = 60), labels = seq(-60, 60, by = 60), expand = c(0, 0)) + # Adjust y-axis breaks
geom_sf(data = coastline, color = "grey80", inherit.aes = FALSE) + # Add coastline
theme_minimal() + # Minimal theme
theme(axis.ticks = element_line(color = "black", linewidth = 0.5)) # Customize axis ticks
# Print the plot
print(p)