Subsetting with extract_model#

# Setup custom styling for this jupyter notebook
from IPython.core.display import HTML
from pathlib import Path

def css_styling():
    buf = Path('./css/custom.css').read_text()
    style_header = f'<style>\n{buf}\n</style>\n'
    return HTML(style_header)
css_styling()
from matplotlib import pyplot as plt

def figure(*args, figsize=(18, 8), facecolor='white', **kwargs):
    """Return a new figure."""
    return plt.subplots(*args, figsize=figsize, facecolor=facecolor, **kwargs)
from matplotlib import tri
from matplotlib import patches
import xarray as xr
import netCDF4 as nc4
import numpy as np
import extract_model as em
from extract_model.grids.triangular_mesh import UnstructuredGridSubset
# A helper function to make plotting variables from unstructured data easier.
def plot_unstructured_variable(ds, varname, triangulation_varname='nv', xvarname='x', yvarname='y', vmin=None, vmax=None, facecolor='white', figsize=(24, 8), buf=None, bbox=None, fig=None, ax=None):
    """Plot variable from an unstructured grid."""
    x = ds[xvarname][:].to_numpy()
    y = ds[yvarname][:].to_numpy()
    nv = ds[triangulation_varname][:].to_numpy().T - 1
    triang = tri.Triangulation(x, y, nv)
    x_vertices = x[nv]
    xmin = np.min(x_vertices.flatten())
    xmax = np.max(x_vertices.flatten())
    y_vertices = y[nv]
    ymin = np.min(y_vertices.flatten())
    ymax = np.max(y_vertices.flatten())
    C = ds[varname][:]
    if fig is None and ax is None:
        fig, ax = plt.subplots(facecolor=facecolor, figsize=figsize)
    cbar = ax.tripcolor(triang, C)
    fig.colorbar(cbar)
    if buf is None:
        buf = 0.05
    ax.set_xlim([xmin-buf, xmax+buf])
    ax.set_ylim([ymin-buf, ymax+buf])
    ax.triplot(triang, color='gray', linewidth=0.2)
    ax.grid(linewidth=0.5)
    ax.set_title(ds[varname].long_name)
    ax.set_xlabel(ds[xvarname].long_name)
    ax.set_ylabel(ds[yvarname].long_name)
    if bbox is not None:
        rect = patches.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], linewidth=2, edgecolor='r', facecolor='none')
        # Add the patch to the Axes
        ax.add_patch(rect)
    return fig, ax

Lake Erie Operational Forecast System (LEOFS) FVCOM#

Initialize the dataset object, but use our engine so that we can load FVCOM data into an xarray Dataset.

url = 'https://www.ncei.noaa.gov/thredds/dodsC/model-leofs/2022/07/nos.leofs.fields.n006.20220720.t00z.nc'
# We use a custom engine to support loading unstructured grids into xarray. The
# current version of xarray doesn't support coordinate variables having multiple
# dimensions, and technically it's not allowed according to the NUG and I think
# it's forbidden by CF as well. The custom version engine titled
# triangularmesh_netcdf allows clients to rename variables after the data
# arrays are constructed but before the xarray Dataset is assembled. This
# allows us to load the unstructured model output and rename the z coordinate
# variables siglay and siglev to sigma_layers and sigma_levels respectively.

# We also drop the Itime and Itime2 variables which are redundant to the time
# coordinate variable. These variables also interfere with cf-xarray and
# decoding times.

# MOST importantly, we specify the chunk size to be 1,N along the time
# coordinate. Without specifying a good chunk size, operations will take
# prohibitively long, and by default xarray makes extremely poor choices for
# chunksizes on unstructured grids, or at the very least with FVCOM.

ds = xr.open_dataset(url,
                     engine='triangularmesh_netcdf',
                     decode_times=True,
                     preload_varmap={'siglay': 'sigma_layers', 'siglev': 'sigma_levels'},
                     drop_variables=['Itime', 'Itime2'],
                     chunks={'time':1})
plot_unstructured_variable(ds.isel(time=0, siglay=-1), 'temp', xvarname='lon', yvarname='lat', figsize=(24,8))
(<Figure size 1728x576 with 2 Axes>,
 <AxesSubplot: title={'center': 'temperature'}, xlabel='nodal longitude', ylabel='nodal latitude'>)
_images/eee9c5d9f1102f96e74317e7ca344645866aa2a178aa3ca2bfec516f1df9fa31.png

Now consider that we desire to view only the southwestern portion of Lake Erie.

bbox = (276.4, 41.5, 277.4, 42.1)
plot_unstructured_variable(ds.isel(time=0, siglay=-1), 'temp', xvarname='lon', yvarname='lat', bbox=bbox, figsize=(24,8))
(<Figure size 1728x576 with 2 Axes>,
 <AxesSubplot: title={'center': 'temperature'}, xlabel='nodal longitude', ylabel='nodal latitude'>)
_images/3d2d0517476a0dd7aa489c6cd9952431b4557425abd3e23706e5c5d9932fdcde.png

Subsetting the dataset is simple with the ds.em.sub_bbox method.

%%time
ds_ss = ds.em.sub_bbox(bbox=bbox)
CPU times: user 2.82 s, sys: 153 ms, total: 2.97 s
Wall time: 3.62 s

Plotting the new dataset we can see that only the relevant cells from the model output are available, any cells wholly external to the region of interest are discarded from the subsetted dataset.

plot_unstructured_variable(ds_ss.isel(time=0, siglay=-1), 'temp', xvarname='lon', yvarname='lat', figsize=(24,8))
(<Figure size 1728x576 with 2 Axes>,
 <AxesSubplot: title={'center': 'temperature'}, xlabel='nodal longitude', ylabel='nodal latitude'>)
_images/0bb87cd26442f8dbafd35fd69819e68b78b04761d1d9949d8f5a223369200ce1.png

Now that we have subsetted the data to a region of interest we can write that data to disk.

The data saved to disk will not match the model output exactly. The siglay and siglev variables have been renamed to sigma_layers and sigma_levels respectively. The variables Itime and Itime2 have been discarded.
%%time
ds_ss.to_netcdf('/tmp/fvcom-subset.nc')
CPU times: user 2.28 s, sys: 255 ms, total: 2.53 s
Wall time: 18.7 s

Northern Golf of Mexico Operational Forecast System (NGOFS2) FVCOM#

url = 'https://www.ncei.noaa.gov/thredds/dodsC/model-ngofs2-files/2022/07/nos.ngofs2.fields.n006.20220725.t03z.nc'
ds = xr.open_dataset(url, engine='triangularmesh_netcdf', decode_times=True, preload_varmap={'siglay': 'sigma_layers', 'siglev': 'sigma_levels'}, drop_variables=['Itime', 'Itime2'], chunks={'time':1})
plot_unstructured_variable(ds.isel(time=0, siglay=-1), 'temp', xvarname='lon', yvarname='lat', figsize=(24,8))
(<Figure size 1728x576 with 2 Axes>,
 <AxesSubplot: title={'center': 'temperature'}, xlabel='nodal longitude', ylabel='nodal latitude'>)
_images/3819146c81246a808bf84507099042327c87478c0d09952098bd1b82442c507d.png

Consider that we want to subset onto a region around southern Louisiana.

bbox = (268, 28, 270, 30)

plot_unstructured_variable(ds.isel(time=0, siglay=-1), 'temp', xvarname='lon', yvarname='lat', bbox=bbox, figsize=(24,8))
(<Figure size 1728x576 with 2 Axes>,
 <AxesSubplot: title={'center': 'temperature'}, xlabel='nodal longitude', ylabel='nodal latitude'>)
_images/da82097183b955fcb62668b33a97056041be9c261b821e07e5da1308d97d6b8e.png
%%time
ds_ss = ds.em.sub_bbox(bbox=bbox, model_type='FVCOM')
CPU times: user 1.77 s, sys: 551 ms, total: 2.32 s
Wall time: 8.9 s
bbox = (268.5, 29.25, 269, 29.75)
plot_unstructured_variable(ds_ss.isel(time=0, siglay=-1), 'temp', xvarname='lon', yvarname='lat', bbox=bbox, figsize=(24,8))
(<Figure size 1728x576 with 2 Axes>,
 <AxesSubplot: title={'center': 'temperature'}, xlabel='nodal longitude', ylabel='nodal latitude'>)
_images/537d7e1ceea425837ffc4979ec3e4698adb16ba10be8651745fc4bf6de19c19a.png
ds_ss = ds_ss.em.sub_bbox(bbox=bbox)
plot_unstructured_variable(ds_ss.isel(time=0, siglay=-1), 'temp', xvarname='lon', yvarname='lat', bbox=bbox, figsize=(24,8))
(<Figure size 1728x576 with 2 Axes>,
 <AxesSubplot: title={'center': 'temperature'}, xlabel='nodal longitude', ylabel='nodal latitude'>)
_images/8031576e219af9937b3af238b69f2e1c87f59f03c3c1a42e10dec5080e2eff37.png

Subsetting Columbia River Estuary Operational Forecast System (CREOFS) SELFE#

url = 'https://www.ncei.noaa.gov/thredds/dodsC/model-creofs-files/2022/07/nos.creofs.fields.n006.20220728.t09z.nc'
ds = xr.open_dataset(url, chunks={'time': 1})

Looking at the western portion of the Columbia River, we find a section of interest.

bbox = (-123.9, 46.1, -123.6, 46.3)
plot_unstructured_variable(ds.isel(time=0, nv=-1), 'temp', triangulation_varname='ele', xvarname='lon', yvarname='lat', bbox=bbox, figsize=(16,18))
(<Figure size 1152x1296 with 2 Axes>,
 <AxesSubplot: title={'center': 'temperature'}, xlabel='nodal longitude', ylabel='nodal latitude'>)
_images/b0d80185c1f759efa03a678df106f23a9b41bb0b759cc33c529fe578f2af769b.png

Subsetting is the same as before.

%%time
ds_ss = ds.em.sub_grid(bbox)
CPU times: user 128 ms, sys: 7.54 ms, total: 135 ms
Wall time: 137 ms

Plotting the subsetted data we see only the region of interest.

plot_unstructured_variable(ds_ss.isel(time=0, nv=-1), 'temp', triangulation_varname='ele', xvarname='lon', yvarname='lat', buf=0, bbox=bbox, figsize=(18,16))
(<Figure size 1296x1152 with 2 Axes>,
 <AxesSubplot: title={'center': 'temperature'}, xlabel='nodal longitude', ylabel='nodal latitude'>)
_images/fc5b790a9f3b43374d9639660c83423920dadbc1c10d785c0b2fbb70189c4dc8.png