This RNotebook script reads a netCDF file with the Long Term Global Mean Average Monthly Rate of Precipitation and plots the twelve months.

First we need to load some libraries

# load libraries
library(ncdf4)
library(CFtime)
library(lattice)
library(RColorBrewer)
library(terra)
## terra 1.7.65

Let’s bring in our netCDF file and open it

# set path and file name
ncpath <- "/Users/elizalawrence/Documents/GEOG495/data/nc_files/"
ncname <- "CMAP_precip.mon.ltm"  # (long-term means)
ncfname <- paste(ncpath, ncname, ".nc", sep="")
dname <- "precip"

# open a netCDF file
ncin <- nc_open(ncfname, write = TRUE)
print(ncin)
## File /Users/elizalawrence/Documents/GEOG495/data/nc_files/CMAP_precip.mon.ltm.nc (NC_FORMAT_CLASSIC):
## 
##      3 variables (excluding dimension variables):
##         double climatology_bounds[nbnds,time]   
##             long_name: Climate Time Boundaries
##             units: hours since 1800-01-01 00:00:0.0
##         float precip[lon,lat,time]   
##             long_name: Long Term Mean Average Monthly Rate of Precipitation
##             valid_range: 0
##              valid_range: 70
##             units: mm/day
##             add_offset: 0
##             scale_factor: 1
##             missing_value: -9.96920996838687e+36
##             precision: 2
##             least_significant_digit: 2
##             var_desc: Precipitation
##             dataset: CPC Merged Analysis of Precipitation Enhanced
##             level_desc: Surface
##             statistic: Long Term Mean
##             parent_stat: Mean
##             actual_range: 0
##              actual_range: 22.3866691589355
##         short valid_yr_count[lon,lat,time]   
##             long_name: count of non-missing values used in mean
##             missing_value: 32767
##             add_offset: 0
##             scale_factor: 1
## 
##      4 dimensions:
##         lon  Size:144 
##             units: degrees_east
##             long_name: Longitude
##             actual_range: 1.25
##              actual_range: 358.75
##             standard_name: longitude
##             axis: X
##         lat  Size:72 
##             units: degrees_north
##             actual_range: 88.75
##              actual_range: -88.75
##             long_name: Latitude
##             standard_name: latitude
##             axis: Y
##         time  Size:12 
##             units: hours since 1800-01-01 00:00:0.0
##             long_name: Time
##             delta_t: 0000-01-00 00:00:00
##             avg_period: 0030-00-00 00:00:00
##             prev_avg_period: 0000-01-00 00:00:00
##             standard_name: time
##             axis: T
##             actual_range: -15769752
##              actual_range: -15761736
##             climatology: climatology_bounds
##             climo_period: 1981/01/01 - 2010/12/31
##             ltm_range: 1586616
##              ltm_range: 1848840
##             interpreted_actual_range: 0001/01/01 00:00:00 - 0001/12/01 00:00:00
##         nbnds  Size:2 (no dimvar)
## 
##     12 global attributes:
##         Conventions: COARDS
##         title: CPC Merged Analysis of Precipitation (includes NCEP Reanalysis)
##         platform: Analyses
##         source: ftp ftp.cpc.ncep.noaa.gov precip/cmap/monthly
##         dataset_title: CPC Merged Analysis of Precipitation
##         documentation: https://www.esrl.noaa.gov/psd/data/gridded/data.cmap.html
##         References: https://www.esrl.noaa.gov/psd/data/gridded/data.cmap.html
##         date_modified: 26 Feb 2019
##         version: V1905
##         history: Created 2019/05/09 by doMonthLTM
##         data_modified: 2019-05-09
##         not_missing_threshold_percent: minimum 3% values input to have non-missing output value

Let’s figure out our longitude, latitude, and time of the netCDF file

# get longitude and latitude
lon <- ncvar_get(ncin,"lon")
nlon <- dim(lon)
list(lon)
## [[1]]
##   [1]    1.25    3.75    6.25    8.75   11.25   13.75   16.25   18.75   21.25
##  [10]   23.75   26.25   28.75   31.25   33.75   36.25   38.75   41.25   43.75
##  [19]   46.25   48.75   51.25   53.75   56.25   58.75   61.25   63.75   66.25
##  [28]   68.75   71.25   73.75   76.25   78.75   81.25   83.75   86.25   88.75
##  [37]   91.25   93.75   96.25   98.75  101.25  103.75  106.25  108.75  111.25
##  [46]  113.75  116.25  118.75  121.25  123.75  126.25  128.75  131.25  133.75
##  [55]  136.25  138.75  141.25  143.75  146.25  148.75  151.25  153.75  156.25
##  [64]  158.75  161.25  163.75  166.25  168.75  171.25  173.75  176.25  178.75
##  [73] -178.75 -176.25 -173.75 -171.25 -168.75 -166.25 -163.75 -161.25 -158.75
##  [82] -156.25 -153.75 -151.25 -148.75 -146.25 -143.75 -141.25 -138.75 -136.25
##  [91] -133.75 -131.25 -128.75 -126.25 -123.75 -121.25 -118.75 -116.25 -113.75
## [100] -111.25 -108.75 -106.25 -103.75 -101.25  -98.75  -96.25  -93.75  -91.25
## [109]  -88.75  -86.25  -83.75  -81.25  -78.75  -76.25  -73.75  -71.25  -68.75
## [118]  -66.25  -63.75  -61.25  -58.75  -56.25  -53.75  -51.25  -48.75  -46.25
## [127]  -43.75  -41.25  -38.75  -36.25  -33.75  -31.25  -28.75  -26.25  -23.75
## [136]  -21.25  -18.75  -16.25  -13.75  -11.25   -8.75   -6.25   -3.75   -1.25
lat <- ncvar_get(ncin,"lat")
nlat <- dim(lat)
list(lat)
## [[1]]
##  [1]  88.75  86.25  83.75  81.25  78.75  76.25  73.75  71.25  68.75  66.25
## [11]  63.75  61.25  58.75  56.25  53.75  51.25  48.75  46.25  43.75  41.25
## [21]  38.75  36.25  33.75  31.25  28.75  26.25  23.75  21.25  18.75  16.25
## [31]  13.75  11.25   8.75   6.25   3.75   1.25  -1.25  -3.75  -6.25  -8.75
## [41] -11.25 -13.75 -16.25 -18.75 -21.25 -23.75 -26.25 -28.75 -31.25 -33.75
## [51] -36.25 -38.75 -41.25 -43.75 -46.25 -48.75 -51.25 -53.75 -56.25 -58.75
## [61] -61.25 -63.75 -66.25 -68.75 -71.25 -73.75 -76.25 -78.75 -81.25 -83.75
## [71] -86.25 -88.75
print(c(nlon,nlat))
## [1] 144  72
# get time
time <- ncvar_get(ncin,"time")
time
##  [1] -15769752 -15769008 -15768336 -15767592 -15766872 -15766128 -15765408
##  [8] -15764664 -15763920 -15763200 -15762456 -15761736
tunits <- ncatt_get(ncin,"time","units")
tunits
## $hasatt
## [1] TRUE
## 
## $value
## [1] "hours since 1800-01-01 00:00:0.0"
dim(time)
## [1] 12
nt <- dim(time)
nt
## [1] 12

The longitude values range from 0-360 instead of -180-180. Let’s fix that!

# Convert 0-360 to -180-180
lon[lon > 180] <- lon[lon > 180] - 360

# Update the longitude variable with the new values
ncvar_put(ncin, "lon", lon)

#confirm that the longitude values are updated
lon <- ncvar_get(ncin,"lon")
nlon <- dim(lon)
list(lon)
## [[1]]
##   [1]    1.25    3.75    6.25    8.75   11.25   13.75   16.25   18.75   21.25
##  [10]   23.75   26.25   28.75   31.25   33.75   36.25   38.75   41.25   43.75
##  [19]   46.25   48.75   51.25   53.75   56.25   58.75   61.25   63.75   66.25
##  [28]   68.75   71.25   73.75   76.25   78.75   81.25   83.75   86.25   88.75
##  [37]   91.25   93.75   96.25   98.75  101.25  103.75  106.25  108.75  111.25
##  [46]  113.75  116.25  118.75  121.25  123.75  126.25  128.75  131.25  133.75
##  [55]  136.25  138.75  141.25  143.75  146.25  148.75  151.25  153.75  156.25
##  [64]  158.75  161.25  163.75  166.25  168.75  171.25  173.75  176.25  178.75
##  [73] -178.75 -176.25 -173.75 -171.25 -168.75 -166.25 -163.75 -161.25 -158.75
##  [82] -156.25 -153.75 -151.25 -148.75 -146.25 -143.75 -141.25 -138.75 -136.25
##  [91] -133.75 -131.25 -128.75 -126.25 -123.75 -121.25 -118.75 -116.25 -113.75
## [100] -111.25 -108.75 -106.25 -103.75 -101.25  -98.75  -96.25  -93.75  -91.25
## [109]  -88.75  -86.25  -83.75  -81.25  -78.75  -76.25  -73.75  -71.25  -68.75
## [118]  -66.25  -63.75  -61.25  -58.75  -56.25  -53.75  -51.25  -48.75  -46.25
## [127]  -43.75  -41.25  -38.75  -36.25  -33.75  -31.25  -28.75  -26.25  -23.75
## [136]  -21.25  -18.75  -16.25  -13.75  -11.25   -8.75   -6.25   -3.75   -1.25

The netCDF file only has one variable: precipitation, so let’s make an array of that

# get precipitation variable
precip_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(precip_array)
## [1] 144  72  12

Let’s get the global attributes of our data

# get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")

Then let’s close the netCDF file because we have already read it in

#close the netCDF file
nc_close(ncin)

Now we are ready to reshape the netCDF file from a raster to a rectangular 2-D array.

# decode time
cf <- CFtime(tunits$value, calendar = "proleptic_gregorian", time + 15769752) # convert time to CFtime class, add offset
cf
## CF datum of origin:
##   Origin  : 1800-01-01 00:00:00
##   Units   : hours
##   Calendar: proleptic_gregorian
## CF time series:
##   Elements: [1800-01-01 .. 1800-12-01] (average of 728.727273 hours between 12 elements)
timestamps <- CFtimestamp(cf) # get character-string times
timestamps
##  [1] "1800-01-01T00:00:00" "1800-02-01T00:00:00" "1800-03-01T00:00:00"
##  [4] "1800-04-01T00:00:00" "1800-05-01T00:00:00" "1800-06-01T00:00:00"
##  [7] "1800-07-01T00:00:00" "1800-08-01T00:00:00" "1800-09-01T00:00:00"
## [10] "1800-10-01T00:00:00" "1800-11-01T00:00:00" "1800-12-01T00:00:00"
class(timestamps)
## [1] "character"
time_cf <- CFparse(cf, timestamps) # parse the string into date components
time_cf
class(time_cf)
## [1] "data.frame"

Some of the values of the precipitation variable are missing or not available, so let’s remove those and replace them with NA’s

# replace netCDF fill values with NA's
precip_array[precip_array==fillvalue$value] <- NA
length(na.omit(as.vector(precip_array[,,1])))
## [1] 10363

Now we are ready to create a data frame. First we will create a matrix with the rows of longitude and latitude pairs.

# create matrix -- reshape data
lonlat <- as.matrix(expand.grid(lon,lat))
dim(lonlat)
## [1] 10368     2

Then we will create a vector from the large array

# reshape the whole array into vector
precip_vec_long <- as.vector(precip_array)
length(precip_vec_long)
## [1] 124416

And now we will combine the precipitation vector and the longitude and latitude vector into a matrix

# reshape the vector into a matrix
precip_mat <- matrix(precip_vec_long, nrow=nlon*nlat, ncol=nt)
dim(precip_mat)
## [1] 10368    12
head(na.omit(precip_mat))
##           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]     [,7]
## [1,] 0.2866667 0.2650000 0.2156667 0.2270000 0.4836667 0.8870000 1.128000
## [2,] 0.3140000 0.2730000 0.2240000 0.2460000 0.5023333 0.9350001 1.176000
## [3,] 0.3083333 0.2706667 0.2203334 0.2443334 0.4953333 0.9240000 1.158667
## [4,] 0.3049999 0.2683333 0.2136667 0.2426667 0.4910000 0.8863333 1.146000
## [5,] 0.3013333 0.2673333 0.2096667 0.2410000 0.4863333 0.8749999 1.133667
## [6,] 0.2989999 0.2686666 0.2083334 0.2403333 0.4826666 0.8920000 1.126000
##           [,8]      [,9]     [,10]     [,11]     [,12]
## [1,] 0.7020000 0.5623333 0.3956667 0.2380000 0.2669999
## [2,] 0.7373334 0.6086666 0.4189999 0.2600000 0.2860000
## [3,] 0.7290000 0.6063334 0.4123333 0.2543333 0.2806667
## [4,] 0.7206668 0.6026667 0.4066667 0.2506667 0.2770000
## [5,] 0.7163334 0.6010000 0.4036666 0.2473333 0.2723333
## [6,] 0.7140000 0.6020000 0.4029999 0.2470000 0.2686667

Finally, we can create a data frame from the matrix we previously created

# create a data frame
lonlat <- as.matrix(expand.grid(lon,lat))
precip_df02 <- data.frame(cbind(lonlat,precip_mat))
names(precip_df02) <- c("lon","lat","precipJan","precipFeb","precipMar","precipApr", "precipMay","precipJun", "precipJul","precipAug","precipSep",  "precipOct","precipNov","precipDec")
head(na.omit(precip_df02, 20))
dim(na.omit(precip_df02, 20))
## [1] 10332    14
# write out the data frame
csvpath <- "/Users/elizalawrence/Documents/GEOG495/data/csv_files"
csvname <- "CMAP_precip.mon.ltm_2.csv"
csvfile <- paste(csvpath, csvname, sep="")
write.table(na.omit(precip_df02),csvfile, row.names=FALSE, sep=",")

Now that we have our data frame, we can finally visualize our data! First we need to load some more packages

# load more packages
library(ggplot2)
library(maps)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:terra':
## 
##     extract
library(ggthemes)
library(stars)
## Loading required package: abind

We need to reshape the data frame because of the type of map we are trying to make - a facet wrap

# Reshape the data frame into long format, excluding 'lon' and 'lat'
df_long <- gather(precip_df02, key = "month", value = "value", -lon, -lat)

To make sure our months are in sequential order instead of alphabetical we need to define the order and the full names

# Define the order of months
month_order <- c("precipJan", "precipFeb", "precipMar", "precipApr","precipMay",
                 "precipJun","precipJul","precipAug","precipSep","precipOct",
                 "precipNov","precipDec")  # Add more months as needed

month_names <- c("January", "February", "March", "April", "May", "June", "July",
                 "August", "September", "October", "November", "December")  # Corresponding month names
# Convert 'month' variable to factor with custom order
df_long$month <- factor(df_long$month, levels = month_order, labels = month_names)

We also want to put outlines of the world map on our map of precipitation

#get world map data 
# Specify the path to your shape file
shapefile_path <- "/Users/elizalawrence/Documents/GEOG495/data/ne_110m_coastline/ne_110m_coastline.shp"

# Read the shape file
coastline <- st_read(shapefile_path)
## Reading layer `ne_110m_coastline' from data source 
##   `/Users/elizalawrence/Documents/GEOG495/data/ne_110m_coastline/ne_110m_coastline.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 134 features and 3 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -85.60904 xmax: 180 ymax: 83.64513
## Geodetic CRS:  WGS 84

Let’s use ggplot to visualize the long term monthly means of global precipitation

# Create ggplot object with coastline and precipitation data
p <- ggplot() +
  geom_tile(data = df_long, aes(x = lon, y = lat, fill = value)) +  # Add precipitation data
  scale_fill_gradient(low = "white", high = "blue", name = "mm/day") +  # Color scale for precipitation
  labs(x = "Longitude", y = "Latitude", title = "Long Term (1981-2010) Mean Average Monthly Rate of Precipitation") +  # Labels and title
  facet_wrap(~ month, nrow = 4) +  # Facet wrap by month with 4 plots per row
  scale_x_continuous(breaks = seq(-120, 120, by = 60), labels = seq(-120, 120, by = 60), expand = c(0, 0)) +  # Adjust x-axis breaks
  scale_y_continuous(breaks = seq(-60, 60, by = 60), labels = seq(-60, 60, by = 60), expand = c(0, 0)) +  # Adjust y-axis breaks
  geom_sf(data = coastline, color = "grey80", inherit.aes = FALSE) +  # Add coastline
  theme_minimal() + # Minimal theme
  theme(axis.ticks = element_line(color = "black", linewidth = 0.5))  # Customize axis ticks

# Print the plot
print(p)