This RNotebook script reads a netCDF file with the Long Term Global Mean Average Monthly Rate of Precipitation and plots the twelve months.

First we need to load some libraries

# load libraries
library(ncdf4)
library(CFtime)
library(lattice)
library(RColorBrewer)
library(terra)
## terra 1.7.65

Let’s bring in our netCDF file and open it

# set path and file name
ncpath <- "/Users/elizalawrence/Documents/GEOG495/data/nc_files/"
ncname <- "CMAP_precip.mon.ltm"  # (long-term means)
ncfname <- paste(ncpath, ncname, ".nc", sep="")
dname <- "precip"

# open a netCDF file
ncin <- nc_open(ncfname, write = TRUE)
print(ncin)
## File /Users/elizalawrence/Documents/GEOG495/data/nc_files/CMAP_precip.mon.ltm.nc (NC_FORMAT_CLASSIC):
## 
##      3 variables (excluding dimension variables):
##         double climatology_bounds[nbnds,time]   
##             long_name: Climate Time Boundaries
##             units: hours since 1800-01-01 00:00:0.0
##         float precip[lon,lat,time]   
##             long_name: Long Term Mean Average Monthly Rate of Precipitation
##             valid_range: 0
##              valid_range: 70
##             units: mm/day
##             add_offset: 0
##             scale_factor: 1
##             missing_value: -9.96920996838687e+36
##             precision: 2
##             least_significant_digit: 2
##             var_desc: Precipitation
##             dataset: CPC Merged Analysis of Precipitation Enhanced
##             level_desc: Surface
##             statistic: Long Term Mean
##             parent_stat: Mean
##             actual_range: 0
##              actual_range: 22.3866691589355
##         short valid_yr_count[lon,lat,time]   
##             long_name: count of non-missing values used in mean
##             missing_value: 32767
##             add_offset: 0
##             scale_factor: 1
## 
##      4 dimensions:
##         lon  Size:144 
##             units: degrees_east
##             long_name: Longitude
##             actual_range: 1.25
##              actual_range: 358.75
##             standard_name: longitude
##             axis: X
##         lat  Size:72 
##             units: degrees_north
##             actual_range: 88.75
##              actual_range: -88.75
##             long_name: Latitude
##             standard_name: latitude
##             axis: Y
##         time  Size:12 
##             units: hours since 1800-01-01 00:00:0.0
##             long_name: Time
##             delta_t: 0000-01-00 00:00:00
##             avg_period: 0030-00-00 00:00:00
##             prev_avg_period: 0000-01-00 00:00:00
##             standard_name: time
##             axis: T
##             actual_range: -15769752
##              actual_range: -15761736
##             climatology: climatology_bounds
##             climo_period: 1981/01/01 - 2010/12/31
##             ltm_range: 1586616
##              ltm_range: 1848840
##             interpreted_actual_range: 0001/01/01 00:00:00 - 0001/12/01 00:00:00
##         nbnds  Size:2 (no dimvar)
## 
##     12 global attributes:
##         Conventions: COARDS
##         title: CPC Merged Analysis of Precipitation (includes NCEP Reanalysis)
##         platform: Analyses
##         source: ftp ftp.cpc.ncep.noaa.gov precip/cmap/monthly
##         dataset_title: CPC Merged Analysis of Precipitation
##         documentation: https://www.esrl.noaa.gov/psd/data/gridded/data.cmap.html
##         References: https://www.esrl.noaa.gov/psd/data/gridded/data.cmap.html
##         date_modified: 26 Feb 2019
##         version: V1905
##         history: Created 2019/05/09 by doMonthLTM
##         data_modified: 2019-05-09
##         not_missing_threshold_percent: minimum 3% values input to have non-missing output value

Let’s figure out our longitude, latitude, and time of the netCDF file

# get longitude and latitude
lon <- ncvar_get(ncin,"lon")
nlon <- dim(lon)
list(lon)
## [[1]]
##   [1]    1.25    3.75    6.25    8.75   11.25   13.75   16.25   18.75   21.25
##  [10]   23.75   26.25   28.75   31.25   33.75   36.25   38.75   41.25   43.75
##  [19]   46.25   48.75   51.25   53.75   56.25   58.75   61.25   63.75   66.25
##  [28]   68.75   71.25   73.75   76.25   78.75   81.25   83.75   86.25   88.75
##  [37]   91.25   93.75   96.25   98.75  101.25  103.75  106.25  108.75  111.25
##  [46]  113.75  116.25  118.75  121.25  123.75  126.25  128.75  131.25  133.75
##  [55]  136.25  138.75  141.25  143.75  146.25  148.75  151.25  153.75  156.25
##  [64]  158.75  161.25  163.75  166.25  168.75  171.25  173.75  176.25  178.75
##  [73] -178.75 -176.25 -173.75 -171.25 -168.75 -166.25 -163.75 -161.25 -158.75
##  [82] -156.25 -153.75 -151.25 -148.75 -146.25 -143.75 -141.25 -138.75 -136.25
##  [91] -133.75 -131.25 -128.75 -126.25 -123.75 -121.25 -118.75 -116.25 -113.75
## [100] -111.25 -108.75 -106.25 -103.75 -101.25  -98.75  -96.25  -93.75  -91.25
## [109]  -88.75  -86.25  -83.75  -81.25  -78.75  -76.25  -73.75  -71.25  -68.75
## [118]  -66.25  -63.75  -61.25  -58.75  -56.25  -53.75  -51.25  -48.75  -46.25
## [127]  -43.75  -41.25  -38.75  -36.25  -33.75  -31.25  -28.75  -26.25  -23.75
## [136]  -21.25  -18.75  -16.25  -13.75  -11.25   -8.75   -6.25   -3.75   -1.25
lat <- ncvar_get(ncin,"lat")
nlat <- dim(lat)
list(lat)
## [[1]]
##  [1]  88.75  86.25  83.75  81.25  78.75  76.25  73.75  71.25  68.75  66.25
## [11]  63.75  61.25  58.75  56.25  53.75  51.25  48.75  46.25  43.75  41.25
## [21]  38.75  36.25  33.75  31.25  28.75  26.25  23.75  21.25  18.75  16.25
## [31]  13.75  11.25   8.75   6.25   3.75   1.25  -1.25  -3.75  -6.25  -8.75
## [41] -11.25 -13.75 -16.25 -18.75 -21.25 -23.75 -26.25 -28.75 -31.25 -33.75
## [51] -36.25 -38.75 -41.25 -43.75 -46.25 -48.75 -51.25 -53.75 -56.25 -58.75
## [61] -61.25 -63.75 -66.25 -68.75 -71.25 -73.75 -76.25 -78.75 -81.25 -83.75
## [71] -86.25 -88.75
print(c(nlon,nlat))
## [1] 144  72
# get time
time <- ncvar_get(ncin,"time")
time
##  [1] -15769752 -15769008 -15768336 -15767592 -15766872 -15766128 -15765408
##  [8] -15764664 -15763920 -15763200 -15762456 -15761736
tunits <- ncatt_get(ncin,"time","units")
tunits
## $hasatt
## [1] TRUE
## 
## $value
## [1] "hours since 1800-01-01 00:00:0.0"
dim(time)
## [1] 12
nt <- dim(time)
nt
## [1] 12

The longitude values range from 0-360 instead of -180-180. Let’s fix that!

# Convert 0-360 to -180-180
lon[lon > 180] <- lon[lon > 180] - 360

# Update the longitude variable with the new values
ncvar_put(ncin, "lon", lon)

#confirm that the longitude values are updated
lon <- ncvar_get(ncin,"lon")
nlon <- dim(lon)
list(lon)
## [[1]]
##   [1]    1.25    3.75    6.25    8.75   11.25   13.75   16.25   18.75   21.25
##  [10]   23.75   26.25   28.75   31.25   33.75   36.25   38.75   41.25   43.75
##  [19]   46.25   48.75   51.25   53.75   56.25   58.75   61.25   63.75   66.25
##  [28]   68.75   71.25   73.75   76.25   78.75   81.25   83.75   86.25   88.75
##  [37]   91.25   93.75   96.25   98.75  101.25  103.75  106.25  108.75  111.25
##  [46]  113.75  116.25  118.75  121.25  123.75  126.25  128.75  131.25  133.75
##  [55]  136.25  138.75  141.25  143.75  146.25  148.75  151.25  153.75  156.25
##  [64]  158.75  161.25  163.75  166.25  168.75  171.25  173.75  176.25  178.75
##  [73] -178.75 -176.25 -173.75 -171.25 -168.75 -166.25 -163.75 -161.25 -158.75
##  [82] -156.25 -153.75 -151.25 -148.75 -146.25 -143.75 -141.25 -138.75 -136.25
##  [91] -133.75 -131.25 -128.75 -126.25 -123.75 -121.25 -118.75 -116.25 -113.75
## [100] -111.25 -108.75 -106.25 -103.75 -101.25  -98.75  -96.25  -93.75  -91.25
## [109]  -88.75  -86.25  -83.75  -81.25  -78.75  -76.25  -73.75  -71.25  -68.75
## [118]  -66.25  -63.75  -61.25  -58.75  -56.25  -53.75  -51.25  -48.75  -46.25
## [127]  -43.75  -41.25  -38.75  -36.25  -33.75  -31.25  -28.75  -26.25  -23.75
## [136]  -21.25  -18.75  -16.25  -13.75  -11.25   -8.75   -6.25   -3.75   -1.25

The netCDF file only has one variable: precipitation, so let’s make an array of that

# get precipitation variable
precip_array <- ncvar_get(ncin,dname)
dlname <- ncatt_get(ncin,dname,"long_name")
dunits <- ncatt_get(ncin,dname,"units")
fillvalue <- ncatt_get(ncin,dname,"_FillValue")
dim(precip_array)
## [1] 144  72  12

Let’s get the global attributes of our data

# get global attributes
title <- ncatt_get(ncin,0,"title")
institution <- ncatt_get(ncin,0,"institution")
datasource <- ncatt_get(ncin,0,"source")
references <- ncatt_get(ncin,0,"references")
history <- ncatt_get(ncin,0,"history")
Conventions <- ncatt_get(ncin,0,"Conventions")

Then let’s close the netCDF file because we have already read it in

#close the netCDF file
nc_close(ncin)

Now we are ready to reshape the netCDF file from a raster to a rectangular 2-D array.

# decode time
cf <- CFtime(tunits$value, calendar = "proleptic_gregorian", time + 15769752) # convert time to CFtime class, add offset
cf
## CF datum of origin:
##   Origin  : 1800-01-01 00:00:00
##   Units   : hours
##   Calendar: proleptic_gregorian
## CF time series:
##   Elements: [1800-01-01 .. 1800-12-01] (average of 728.727273 hours between 12 elements)
timestamps <- CFtimestamp(cf) # get character-string times
timestamps
##  [1] "1800-01-01T00:00:00" "1800-02-01T00:00:00" "1800-03-01T00:00:00"
##  [4] "1800-04-01T00:00:00" "1800-05-01T00:00:00" "1800-06-01T00:00:00"
##  [7] "1800-07-01T00:00:00" "1800-08-01T00:00:00" "1800-09-01T00:00:00"
## [10] "1800-10-01T00:00:00" "1800-11-01T00:00:00" "1800-12-01T00:00:00"
class(timestamps)
## [1] "character"
time_cf <- CFparse(cf, timestamps) # parse the string into date components
time_cf
class(time_cf)
## [1] "data.frame"

Some of the values of the precipitation variable are missing or not available, so let’s remove those and replace them with NA’s

# replace netCDF fill values with NA's
precip_array[precip_array==fillvalue$value] <- NA
length(na.omit(as.vector(precip_array[,,1])))
## [1] 10363

Now we are ready to create a data frame. First we will create a matrix with the rows of longitude and latitude pairs.

# create matrix -- reshape data
lonlat <- as.matrix(expand.grid(lon,lat))
dim(lonlat)
## [1] 10368     2

Then we will create a vector from the large array

# reshape the whole array into vector
precip_vec_long <- as.vector(precip_array)
length(precip_vec_long)
## [1] 124416

And now we will combine the precipitation vector and the longitude and latitude vector into a matrix

# reshape the vector into a matrix
precip_mat <- matrix(precip_vec_long, nrow=nlon*nlat, ncol=nt)
dim(precip_mat)
## [1] 10368    12
head(na.omit(precip_mat))
##           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]     [,7]
## [1,] 0.2866667 0.2650000 0.2156667 0.2270000 0.4836667 0.8870000 1.128000
## [2,] 0.3140000 0.2730000 0.2240000 0.2460000 0.5023333 0.9350001 1.176000
## [3,] 0.3083333 0.2706667 0.2203334 0.2443334 0.4953333 0.9240000 1.158667
## [4,] 0.3049999 0.2683333 0.2136667 0.2426667 0.4910000 0.8863333 1.146000
## [5,] 0.3013333 0.2673333 0.2096667 0.2410000 0.4863333 0.8749999 1.133667
## [6,] 0.2989999 0.2686666 0.2083334 0.2403333 0.4826666 0.8920000 1.126000
##           [,8]      [,9]     [,10]     [,11]     [,12]
## [1,] 0.7020000 0.5623333 0.3956667 0.2380000 0.2669999
## [2,] 0.7373334 0.6086666 0.4189999 0.2600000 0.2860000
## [3,] 0.7290000 0.6063334 0.4123333 0.2543333 0.2806667
## [4,] 0.7206668 0.6026667 0.4066667 0.2506667 0.2770000
## [5,] 0.7163334 0.6010000 0.4036666 0.2473333 0.2723333
## [6,] 0.7140000 0.6020000 0.4029999 0.2470000 0.2686667

Finally, we can create a data frame from the matrix we previously created

# create a data frame
lonlat <- as.matrix(expand.grid(lon,lat))
precip_df02 <- data.frame(cbind(lonlat,precip_mat))
names(precip_df02) <- c("lon","lat","precipJan","precipFeb","precipMar","precipApr", "precipMay","precipJun", "precipJul","precipAug","precipSep",  "precipOct","precipNov","precipDec")
head(na.omit(precip_df02, 20))
dim(na.omit(precip_df02, 20))
## [1] 10332    14
# write out the data frame
csvpath <- "/Users/elizalawrence/Documents/GEOG495/data/csv_files"
csvname <- "CMAP_precip.mon.ltm_2.csv"
csvfile <- paste(csvpath, csvname, sep="")
write.table(na.omit(precip_df02),csvfile, row.names=FALSE, sep=",")

Now that we have our data frame, we can finally visualize our data! First we need to load some more packages

# load more packages
library(ggplot2)
library(maps)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:terra':
## 
##     extract
library(ggthemes)
library(stars)
## Loading required package: abind

We need to reshape the data frame because of the type of map we are trying to make - a facet wrap

# Reshape the data frame into long format, excluding 'lon' and 'lat'
df_long <- gather(precip_df02, key = "month", value = "value", -lon, -lat)

To make sure our months are in sequential order instead of alphabetical we need to define the order and the full names

# Define the order of months
month_order <- c("precipJan", "precipFeb", "precipMar", "precipApr","precipMay",
                 "precipJun","precipJul","precipAug","precipSep","precipOct",
                 "precipNov","precipDec")  # Add more months as needed

month_names <- c("January", "February", "March", "April", "May", "June", "July",
                 "August", "September", "October", "November", "December")  # Corresponding month names
# Convert 'month' variable to factor with custom order
df_long$month <- factor(df_long$month, levels = month_order, labels = month_names)

We also want to put outlines of the world map on our map of precipitation

#get world map data 
# Specify the path to your shape file
shapefile_path <- "/Users/elizalawrence/Documents/GEOG495/data/ne_110m_coastline/ne_110m_coastline.shp"

# Read the shape file
coastline <- st_read(shapefile_path)
## Reading layer `ne_110m_coastline' from data source 
##   `/Users/elizalawrence/Documents/GEOG495/data/ne_110m_coastline/ne_110m_coastline.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 134 features and 3 fields
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -85.60904 xmax: 180 ymax: 83.64513
## Geodetic CRS:  WGS 84

Let’s use ggplot to visualize the long term monthly means of global precipitation

# Create ggplot object with coastline and precipitation data
p <- ggplot() +
  geom_tile(data = df_long, aes(x = lon, y = lat, fill = value)) +  # Add precipitation data
  scale_fill_gradient(low = "white", high = "blue", name = "mm/day") +  # Color scale for precipitation
  labs(x = "Longitude", y = "Latitude", title = "Long Term (1981-2010) Mean Average Monthly Rate of Precipitation") +  # Labels and title
  facet_wrap(~ month, nrow = 4) +  # Facet wrap by month with 4 plots per row
  scale_x_continuous(breaks = seq(-120, 120, by = 60), labels = seq(-120, 120, by = 60), expand = c(0, 0)) +  # Adjust x-axis breaks
  scale_y_continuous(breaks = seq(-60, 60, by = 60), labels = seq(-60, 60, by = 60), expand = c(0, 0)) +  # Adjust y-axis breaks
  geom_sf(data = coastline, color = "grey80", inherit.aes = FALSE) +  # Add coastline
  theme_minimal() + # Minimal theme
  theme(axis.ticks = element_line(color = "black", linewidth = 0.5))  # Customize axis ticks

# Print the plot
print(p)

Discussion: From this map, we are able to see the long term means of global precipitation compared by month using the facet feature! The maps show that the areas near the equator at 0 degrees latitude, receive on a long term mean average, the most amount rain per month. Additionally, areas over the ocean receive more rain per month, than land masses do. Additionally, longitudes of 60 degrees or higher and -60 degrees or lower receive more rain per month. The area with a longitude of 60 to 120 degrees and a latitude of 15 degrees or so, is southern Asia. It also seems that the more rainy months are July, August, and September, especially near southern Asia which makes sense because that is when it is most humid in that region, and when monsoons occur. The northern part of South America has a pretty steady level of precipitation across all months. Northern Asia (Russia), hardly receives any rain in the months of November-March, but then begins to see more precipitation in the summer months.

We can also look at the long-term mean monthly rate of precipitation at a single point, such as Eugene.

First, we find the grid cell closest to Eugene

# get indices of the grid cell closest to Eugene
tlon <- -123.1167; tlat <- 44.0833
j <- which.min(abs(lon-tlon))
k <- which.min(abs(lat-tlat))
print(c(j, lon[j], k, lat[k]))
## [1]   95.00 -123.75   19.00   43.75

Then, we create a precipitation time series for the single grid cell from the large precipitation array

# get time series for the closest grid point
precip_ts_eugene <- precip_array[j, k, ]

Then, we turn our month variable into a factor and organize it so they are in time order versus alphabetical

# month
month_names <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
month <- rep(month_names)
head(month); tail(month)
## [1] "Jan" "Feb" "Mar" "Apr" "May" "Jun"
## [1] "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"
month <- factor(month, levels=month_names)
class(month)
## [1] "factor"

Then, we can create a data frame of just the precipitation data closest to Eugene

#create a data frame of the precipitation data closest to Eugene
precip_ts_eugene_df <- data.frame(precip_ts_eugene, month)
names(precip_ts_eugene_df) <- c("Precip", "Month")
head(precip_ts_eugene_df)

Then, we can create a simple plot!

#plot of long-term mean precipitation by month closest to Eugene 
ggplot(precip_ts_eugene_df, aes(x = Month, y = Precip)) +
  geom_point(size = 0.75) +
  labs(x = "Month", y = "Precipitation (mm/day)", title = "Long Term (1981-2010) Mean Average Rate of Precipitation of Grid Cell Closest to Eugene, OR")  # Labels and title

The plot shows that the average rate of precipitation is highest in November and December (over 5.5 mm/day). The rate steadily declines from the months of January until July and then begins increasing again in August. The lowest rate of precipitation occurs in July with approximately 0.25 mm/day. That means that we will soon be done with heavy precipitation rates as summer approaches!