Generically access model output#
import cf_xarray
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import xcmocean
import cmocean.cm as cmo
import extract_model as em
<frozen importlib._bootstrap>:219: RuntimeWarning: scipy._lib.messagestream.MessageStream size changed, may indicate binary incompatibility. Expected 56 from C header, got 64 from PyObject
ROMS#
# open an example dataset from xarray's tutorials
ds = xr.tutorial.open_dataset('ROMS_example.nc', chunks={'ocean_time': 1})
# normally could run the `preprocess` code as part of reading in the dataset
# but with the tutorial model output, run it separately:
ds = em.preprocess(ds)
ds
<xarray.Dataset> Dimensions: (ocean_time: 2, s_rho: 30, eta_rho: 191, xi_rho: 371) Coordinates: Cs_r (s_rho) float64 dask.array<chunksize=(30,), meta=np.ndarray> lon_rho (eta_rho, xi_rho) float64 dask.array<chunksize=(191, 371), meta=np.ndarray> hc float64 20.0 h (eta_rho, xi_rho) float64 dask.array<chunksize=(191, 371), meta=np.ndarray> lat_rho (eta_rho, xi_rho) float64 dask.array<chunksize=(191, 371), meta=np.ndarray> Vtransform int32 2 * ocean_time (ocean_time) datetime64[ns] 2001-08-01 2001-08-08 * s_rho (s_rho) float64 -0.9833 -0.95 -0.9167 ... -0.05 -0.01667 * xi_rho (xi_rho) int64 0 1 2 3 4 5 6 7 ... 364 365 366 367 368 369 370 * eta_rho (eta_rho) int64 0 1 2 3 4 5 6 7 ... 184 185 186 187 188 189 190 z_rho (ocean_time, s_rho, eta_rho, xi_rho) float64 dask.array<chunksize=(1, 30, 191, 371), meta=np.ndarray> Data variables: salt (ocean_time, s_rho, eta_rho, xi_rho) float32 dask.array<chunksize=(1, 30, 191, 371), meta=np.ndarray> zeta (ocean_time, eta_rho, xi_rho) float32 dask.array<chunksize=(1, 191, 371), meta=np.ndarray> Attributes: (12/34) file: ../output_20yr_obc/2001/ocean_his_0015.nc format: netCDF-4/HDF5 file Conventions: CF-1.4 type: ROMS/TOMS history file title: TXLA ROMS hindcast run with dyes and oxygen rst_file: ../output_20yr_obc/2001/ocean_rst.nc ... ... compiler_flags: -heap-arrays -fp-model fast -mt_mpi -ip -O3 -msse2 -free tiling: 010x012 history: Tue Jul 24 11:04:43 2018: /opt/nco/ncks -D 4 -t 8 /cop... ana_file: /home/d.kobashi/TXLA_ROMS_reana/Functionals/ana_btflux... CPP_options: TXLA2, ANA_BPFLUX, ANA_BSFLUX, ANA_BTFLUX, ANA_NUDGCOE... NCO: netCDF Operators version 4.7.6-alpha04 (Homepage = htt...
Note that the preprocessing code sets up a ROMS dataset so that it can be used with cf-xarray
. For example, axis and coordinate variables have been identified:
ds.cf
Coordinates:
- CF Axes: * X: ['xi_rho']
* Y: ['eta_rho']
* Z: ['s_rho']
* T: ['ocean_time']
- CF Coordinates: longitude: ['lon_rho']
latitude: ['lat_rho']
vertical: ['z_rho']
* time: ['ocean_time']
- Cell Measures: area, volume: n/a
- Standard Names: latitude: ['lat_rho']
longitude: ['lon_rho']
* ocean_s_coordinate_g2: ['s_rho']
* time: ['ocean_time']
- Bounds: n/a
Data Variables:
- Cell Measures: area, volume: n/a
- Standard Names: sea_surface_elevation: ['zeta']
sea_water_practical_salinity: ['salt']
- Bounds: n/a
Variable to use, by standard_name:
zeta = 'sea_surface_elevation'
salt = 'sea_water_practical_salinity'
Subset numerical domain#
Use .em.sub_grid()
to narrow the model area down using a bounding box on a Dataset which respects the horizontal structure of multiple grids. Currently only is relevant for ROMS models but will run on any ROMS model or models with a single longitude/latitude set of coordinates.
Resulting area of model will not be exactly the bounding box if the domain is curvilinear.
ds_sub = ds.em.sub_grid([-92, 27, -90, 29])
ds_sub.cf[zeta].cf.isel(T=0).cf.plot(x='longitude', y='latitude')
<matplotlib.collections.QuadMesh at 0x7fbfa182a7c0>
Note that this is an unusual ROMS Dataset because it has only one horizontal grid.
ds_sub
<xarray.Dataset> Dimensions: (ocean_time: 2, s_rho: 30, eta_rho: 100, xi_rho: 144) Coordinates: Cs_r (s_rho) float64 dask.array<chunksize=(30,), meta=np.ndarray> lon_rho (eta_rho, xi_rho) float64 dask.array<chunksize=(100, 144), meta=np.ndarray> hc float64 20.0 h (eta_rho, xi_rho) float64 dask.array<chunksize=(100, 144), meta=np.ndarray> lat_rho (eta_rho, xi_rho) float64 dask.array<chunksize=(100, 144), meta=np.ndarray> Vtransform int32 2 * ocean_time (ocean_time) datetime64[ns] 2001-08-01 2001-08-08 * s_rho (s_rho) float64 -0.9833 -0.95 -0.9167 ... -0.05 -0.01667 * xi_rho (xi_rho) int64 98 99 100 101 102 103 ... 236 237 238 239 240 241 * eta_rho (eta_rho) int64 0 1 2 3 4 5 6 7 8 ... 91 92 93 94 95 96 97 98 99 z_rho (ocean_time, s_rho, eta_rho, xi_rho) float64 dask.array<chunksize=(1, 30, 100, 144), meta=np.ndarray> Data variables: salt (ocean_time, s_rho, eta_rho, xi_rho) float32 dask.array<chunksize=(1, 30, 100, 144), meta=np.ndarray> zeta (ocean_time, eta_rho, xi_rho) float32 dask.array<chunksize=(1, 100, 144), meta=np.ndarray> Attributes: (12/34) file: ../output_20yr_obc/2001/ocean_his_0015.nc format: netCDF-4/HDF5 file Conventions: CF-1.4 type: ROMS/TOMS history file title: TXLA ROMS hindcast run with dyes and oxygen rst_file: ../output_20yr_obc/2001/ocean_rst.nc ... ... compiler_flags: -heap-arrays -fp-model fast -mt_mpi -ip -O3 -msse2 -free tiling: 010x012 history: Tue Jul 24 11:04:43 2018: /opt/nco/ncks -D 4 -t 8 /cop... ana_file: /home/d.kobashi/TXLA_ROMS_reana/Functionals/ana_btflux... CPP_options: TXLA2, ANA_BPFLUX, ANA_BSFLUX, ANA_BTFLUX, ANA_NUDGCOE... NCO: netCDF Operators version 4.7.6-alpha04 (Homepage = htt...
Subset to a horizontal box#
Use .em.sub_bbox()
to narrow the model area down using a bounding box on either a Dataset or DataArray. There is no expectation of multiple horizontal grids having the “correct” relationship to each other.
Dataset#
In the case of a Dataset, all map-based variables are filtered using the same bounding box.
ds.em.sub_bbox([-92, 27, -90, 29], drop=True).cf[salt].cf.isel(T=0).cf.sel(Z=0, method='nearest')
<xarray.DataArray 'salt' (eta_rho: 100, xi_rho: 144)> dask.array<getitem, shape=(100, 144), dtype=float32, chunksize=(100, 144), chunktype=numpy.ndarray> Coordinates: ocean_time datetime64[ns] 2001-08-01 s_rho float64 -0.01667 * xi_rho (xi_rho) int64 98 99 100 101 102 103 ... 236 237 238 239 240 241 * eta_rho (eta_rho) int64 0 1 2 3 4 5 6 7 8 ... 91 92 93 94 95 96 97 98 99 Attributes: long_name: salinity time: ocean_time field: salinity, scalar, series standard_name: sea_water_practical_salinity
DataArray#
ds.cf[salt].em.sub_bbox([-92, 27, -90, 29], drop=True).cf.isel(T=0, Z=-1).cf.plot(x='longitude', y='latitude')
<matplotlib.collections.QuadMesh at 0x7fbf81ce3fd0>
grid point (interpolation and selecting nearest)#
Interpolate to a single existing horizontal grid point (and any additional depth and time values for that location) and compare it with method selecting the nearest point to demonstrate we get the same value.
%%time
varname = salt
# Set up a single lon/lat location
j, i = 50, 10
longitude = float(ds.cf[varname].cf['longitude'][j,i])
latitude = float(ds.cf[varname].cf['latitude'][j,i])
# Interpolation
da_out = ds.cf[varname].em.interp2d(longitude, latitude)
# Selection of nearest location in 2D
da_check = ds.cf[varname].em.sel2dcf(longitude=longitude, latitude=latitude).squeeze()
assert np.allclose(da_out, da_check)
CPU times: user 4.18 s, sys: 164 ms, total: 4.34 s
Wall time: 4.36 s
You could also select a time and/or depth index or interpolate in time and/or depth at the same time:
# Select time index and depth index
ds.cf[varname].em.interp2d(longitude, latitude, iT=0, iZ=0)
<xarray.DataArray 'salt' ()> dask.array<getitem, shape=(), dtype=float32, chunksize=(), chunktype=numpy.ndarray> Coordinates: ocean_time datetime64[ns] 2001-08-01 s_rho float64 -0.9833 Cs_r float64 dask.array<chunksize=(), meta=np.ndarray> hc float64 20.0 Vtransform int32 2 lat float64 28.23 lon float64 -93.59 z_rho float64 dask.array<chunksize=(), meta=np.ndarray> Attributes: long_name: salinity time: ocean_time field: salinity, scalar, series standard_name: sea_water_practical_salinity
ds.cf[varname].cf
Coordinates:
- CF Axes: * X: ['xi_rho']
* Y: ['eta_rho']
* Z: ['s_rho']
* T: ['ocean_time']
- CF Coordinates: longitude: ['lon_rho']
latitude: ['lat_rho']
vertical: ['z_rho']
* time: ['ocean_time']
- Cell Measures: area, volume: n/a
- Standard Names: latitude: ['lat_rho']
longitude: ['lon_rho']
* ocean_s_coordinate_g2: ['s_rho']
* time: ['ocean_time']
- Bounds: n/a
# Interpolate to time value and depth value
ds.cf[varname].em.interp2d(longitude, latitude, T=ds.cf['T'][0], Z=-10)
<xarray.DataArray 'salt' ()> dask.array<dask_aware_interpnd, shape=(), dtype=float32, chunksize=(), chunktype=numpy.ndarray> Coordinates: s_rho float64 -0.4475 Cs_r float64 dask.array<chunksize=(), meta=np.ndarray> hc float64 20.0 Vtransform int32 2 lat float64 28.23 lon float64 -93.59 ocean_time datetime64[ns] 2001-08-01 z_rho int64 -10 Attributes: long_name: salinity time: ocean_time field: salinity, scalar, series standard_name: sea_water_practical_salinity
The interpolation is faster the second time the regridder is used — it is saved by the extract_model
accessor and reused if the lon/lat locations to be interpolated to are the same. Here we interpolate to salinity and it is faster than it was the first time it was used for interpolation the sea surface elevation.
%%time
varname = zeta
# Set up a single lon/lat location
j, i = 50, 10
longitude = float(ds.cf[varname].cf['longitude'][j,i])
latitude = float(ds.cf[varname].cf['latitude'][j,i])
# Interpolation
da_out = ds.cf[varname].em.interp2d(longitude, latitude)
# Selection of nearest location in 2D
da_check = ds.cf[varname].em.sel2dcf(longitude=longitude, latitude=latitude).squeeze()
assert np.allclose(da_out, da_check)
CPU times: user 956 ms, sys: 34.3 ms, total: 990 ms
Wall time: 1 s
not grid point#
inside domain (interpolation and selecting nearest)#
For a selected location that is not a grid point (so we can’t check it exactly), we show here both interpolating to that location horizontally and selecting the nearest point to that location.
The square in the right hand side plot shows the nearest point selected using .em.sel2d()
and the circle shows the interpolated value at the exact selected location using .em.interp2d()
.
varname = zeta
# sel
longitude = -91.49
latitude = 28.510
# isel
iZ = None
iT = 0
isel = dict(T=iT)
# Interpolation
da_out = ds.cf[varname].em.interp2d(longitude, latitude, iT=iT, iZ=iZ)
# Selection of nearest location in 2D
da_sel = ds.cf[varname].em.sel2dcf(longitude=longitude, latitude=latitude, distances_name="distance").cf.isel(T=iT).squeeze()
# Plot
cmap = ds.cf[varname].cmo.seq
dacheck = ds.cf[varname].cf.isel(isel)
fig, axes = plt.subplots(1, 2, figsize=(15,5))
dacheck.cmo.cfplot(ax=axes[0], x='longitude', y='latitude')
axes[0].scatter(da_out.cf['longitude'], da_out.cf['latitude'], s=50, c=da_out,
vmin=dacheck.min().values, vmax=dacheck.max().values, cmap=cmap, edgecolors='k')
# make smaller area of model to show
# want model output only within the box defined by these lat/lon values
dacheck_min = dacheck.em.sub_bbox([-91.52, 28.49, -91.49, 28.525], drop=True)
dacheck_min.cmo.cfplot(ax=axes[1], x='longitude', y='latitude')
# interpolation
axes[1].scatter(da_out.cf['longitude'], da_out.cf['latitude'], s=50, c=da_out,
vmin=dacheck_min.min().values, vmax=dacheck_min.max().values,
cmap=cmap, edgecolors='k')
# selection
axes[1].scatter(da_sel.cf['longitude'], da_sel.cf['latitude'], s=50, c=da_sel.cf[varname],
vmin=dacheck_min.min().values, vmax=dacheck_min.max().values,
cmap=cmap, edgecolors='k', marker='s')
<matplotlib.collections.PathCollection at 0x7fbfc254cb50>
We input the extra keyword argument distances_name
into the call ds.cf[varname].em.sel2dcf
in order to also return the distance between the requested location and the returned model location. This value is shown here in km:
da_sel["distance"]
<xarray.DataArray 'distance' ()> array(0.51134) Coordinates: ocean_time datetime64[ns] 2001-08-01 xi_rho int64 136 eta_rho int64 54 lon_rho float64 dask.array<chunksize=(), meta=np.ndarray> hc float64 20.0 h float64 dask.array<chunksize=(), meta=np.ndarray> Vtransform int32 2 lat_rho float64 dask.array<chunksize=(), meta=np.ndarray> Attributes: units: km
outside domain#
Don’t extrapolate
This is commented out since it purposefully raises an error:
ValueError: Longitude outside of available domain. Use extrap=True to extrapolate.
# varname = zeta
# # sel
# longitude = -166
# latitude = 48
# sel = dict(longitude=longitude, latitude=latitude)
# # isel
# iZ = 0
# iT = 0
# isel = dict(Z=iZ, T=iT)
# da_out = ds.cf[varname].em.interp2d(longitude, latitude, iT=iT, iZ=iZ, extrap=False)
# da_out
Extrapolate
varname = zeta
# sel
longitude = -89
latitude = 28.3
sel = dict(longitude=longitude, latitude=latitude)
# isel
iZ = None
iT = 0
isel = dict(T=iT)
da_out = ds.cf[varname].em.interp2d(longitude, latitude, iT=iT, iZ=iZ, extrap=True)
# plot
cmap = ds.cf[varname].cmo.seq
dacheck = ds.cf[varname].cf.isel(isel)
fig, ax = plt.subplots(1,1)
dacheck.cmo.cfplot(ax=ax, x='longitude', y='latitude')
ax.scatter(da_out.cf['longitude'], da_out.cf['latitude'], s=50, c=da_out,
vmin=dacheck.min().values, vmax=dacheck.max().values, cmap=cmap, edgecolors='k')
<matplotlib.collections.PathCollection at 0x7fbfa1e57700>
points (locstream, interpolation)#
Interpolate to unstructured pairs of lon/lat locations instead of grids of lon/lat locations, using locstream
. Choose grid points so that we can check the accuracy of the results.
varname = zeta
# sel
# this creates 12 pairs of lon/lat points that
# align with grid points so we can check the
# interpolation
longitude = ds.cf[varname].cf['longitude'].isel(eta_rho=60, xi_rho=slice(None,None,10))
latitude = ds.cf[varname].cf['latitude'].isel(eta_rho=60, xi_rho=slice(None,None,10))
sel = dict(X=longitude.xi_rho, Y=longitude.eta_rho)
# isel
iZ = None
iT = 0
isel = dict(T=iT)
da_out = ds.cf[varname].em.interp2d(longitude, latitude, iT=iT, iZ=iZ, locstream=True)
# check
da_check = ds.cf[varname].cf.isel(isel).cf.sel(sel)
assert np.allclose(da_out, da_check, equal_nan=True)
It is not currently possible to interpolate in depth with both more than one time and location.
This cell is commented out because it purposefully returns an error:
NotImplementedError: Currently it is not possible to interpolate in depth with more than 1 other (time) dimension.
# ds.cf[salt].em.interp2d(longitude, latitude, Z=-10, locstream=True)
grid of known locations (interpolation)#
varname = zeta
# sel
longitude = ds.cf[varname].cf['longitude'][:-50:20,:-200:100]
latitude = ds.cf[varname].cf['latitude'][:-50:20,:-200:100]
sel = dict(X=longitude.xi_rho, Y=longitude.eta_rho)
# isel
iZ = None
iT = 0
isel = dict(T=iT)
da_out = ds.cf[varname].em.interp2d(longitude, latitude, iT=iT, iZ=iZ, locstream=False)
# check
da_check = ds.cf[varname].cf.sel(sel).cf.isel(isel)
assert np.allclose(da_out, da_check)
grid of new locations (interpolation, regridding)#
varname = zeta
# sel
longitude = np.linspace(ds.cf[varname].cf['longitude'].min(), ds.cf[varname].cf['longitude'].max(), 30)
latitude = np.linspace(ds.cf[varname].cf['latitude'].min(), ds.cf[varname].cf['latitude'].max(), 30)
# isel
iZ = None
iT = 0
isel = dict(T=iT)
da_out = ds.cf[varname].em.interp2d(longitude, latitude, iT=iT, iZ=iZ, locstream=False, extrap=False, extrap_val=np.nan)
# plot
cmap = cmo.delta
dacheck = ds.cf[varname].cf.isel(isel)
fig, axes = plt.subplots(1,2, figsize=(10,4))
dacheck.cmo.cfplot(ax=axes[0], x='longitude', y='latitude')
da_out.cmo.cfplot(ax=axes[1], x='longitude', y='latitude')
<matplotlib.collections.QuadMesh at 0x7fbf8254f970>
HYCOM#
# url = ['http://tds.hycom.org/thredds/dodsC/GLBy0.08/latest']
# ds = xr.open_mfdataset(url, preprocess=em.preprocess, drop_variables='tau')
# ds.isel(time=slice(0,2)).sel(lat=slice(-20, 30), lon=slice(140,190)).to_netcdf('hycom.nc')
# ds = xr.open_mfdataset('hycom.nc', preprocess=em.preprocess)
url = 'http://tds.hycom.org/thredds/dodsC/GLBy0.08/latest'
ds = xr.open_dataset(url, drop_variables='tau')["water_u"].isel(time=slice(0,2), depth=0).sel(lat=slice(-20, 30), lon=slice(140,190))
ds = em.preprocess(ds)
ds = ds.load()
ds
<xarray.DataArray 'water_u' (time: 2, lat: 1251, lon: 626)> array([[[ nan, nan, nan, ..., 0.002 , 0.008 , 0.011 ], [ nan, nan, nan, ..., -0.022 , -0.013 , -0.008 ], [ nan, nan, nan, ..., -0.056 , -0.044 , -0.028 ], ..., [-0.24200001, -0.23400001, -0.231 , ..., 0.19800001, 0.163 , 0.142 ], [-0.2 , -0.194 , -0.18800001, ..., 0.142 , 0.11800001, 0.109 ], [-0.163 , -0.155 , -0.148 , ..., 0.097 , 0.08800001, 0.09200001]], [[ nan, nan, nan, ..., 0.024 , 0.028 , 0.028 ], [ nan, nan, nan, ..., -0.006 , 0.001 , 0.006 ], [ nan, nan, nan, ..., -0.042 , -0.031 , -0.015 ], ..., [-0.22900002, -0.21900001, -0.20500001, ..., 0.132 , 0.094 , 0.07600001], [-0.177 , -0.17300001, -0.16600001, ..., 0.081 , 0.057 , 0.053 ], [-0.127 , -0.128 , -0.126 , ..., 0.043 , 0.035 , 0.043 ]]], dtype=float32) Coordinates: * lat (lat) float64 -20.0 -19.96 -19.92 -19.88 ... 29.92 29.96 30.0 * lon (lon) float64 140.0 140.1 140.2 140.2 ... 189.8 189.8 189.9 190.0 depth float64 0.0 * time (time) datetime64[ns] 2023-01-21T12:00:00 2023-01-21T15:00:00 time_run (time) datetime64[ns] 2023-01-21T12:00:00 2023-01-21T12:00:00 Attributes: _CoordinateAxes: time_run time depth lat lon units: m/s long_name: Eastward Water Velocity standard_name: eastward_sea_water_velocity NAVO_code: 17
ds.cf
Coordinates:
- CF Axes: * X: ['lon']
* Y: ['lat']
Z: ['depth']
* T: ['time']
- CF Coordinates: * longitude: ['lon']
* latitude: ['lat']
vertical: ['depth']
* time: ['time']
- Cell Measures: area, volume: n/a
- Standard Names: depth: ['depth']
forecast_reference_time: ['time_run']
* latitude: ['lat']
* longitude: ['lon']
* time: ['time']
- Bounds: n/a
grid point#
# sel
longitude = float(ds.cf['X'][100])
latitude = float(ds.cf['Y'][150])
sel = dict(longitude=longitude, latitude=latitude)
# isel
iZ = None
iT = None
# isel = dict(Z=iZ)
da_out = ds.em.interp2d(longitude, latitude, iT=iT, iZ=iZ)
# check
da_check = ds.cf.sel(sel)#.cf.isel(isel)
assert np.allclose(da_out, da_check)
not grid point#
inside domain#
# sel
longitude = 155
latitude = 5
sel = dict(longitude=longitude, latitude=latitude)
# isel
iZ = None
iT = 0
isel = dict(T=iT)
da_out = ds.em.interp2d(longitude, latitude, iT=iT, iZ=iZ)
# plot
cmap = cmo.delta
dacheck = ds.cf.isel(isel)
fig, ax = plt.subplots(1,1)
dacheck.cmo.plot(ax=ax)
ax.scatter(da_out.cf['longitude'], da_out.cf['latitude'], s=50, c=da_out,
vmin=dacheck.min().values, vmax=dacheck.max().values, cmap=cmap, edgecolors='k')
<matplotlib.collections.PathCollection at 0x7fbf824e8550>
outside domain#
Don’t extrapolate
This purposefully raises an error so is commented out:
ValueError: Longitude outside of available domain. Use extrap=True to extrapolate.
# # sel
# longitude = -166
# latitude = 48
# sel = dict(longitude=longitude, latitude=latitude)
# # isel
# iZ = None
# iT = 0
# isel = dict(T=iT)
# da_out = ds.em.interp2d(longitude, latitude, iT=iT, iZ=iZ, extrap=False)
# da_out = em.select(**kwargs)
# da_out
Extrapolate
# sel
longitude = 139
latitude = 0
sel = dict(longitude=longitude, latitude=latitude)
# isel
iZ = None
iT = 0
isel = dict(T=iT)
da_out = ds.em.interp2d(longitude, latitude, iT=iT, iZ=iZ, extrap=True)
# plot
cmap = cmo.delta
dacheck = ds.cf.isel(isel)
fig, ax = plt.subplots(1,1)
dacheck.cmo.plot(ax=ax)
ax.scatter(da_out.cf['longitude'], da_out.cf['latitude'], s=50, c=da_out,
vmin=dacheck.min().values, vmax=dacheck.max().values, cmap=cmap, edgecolors='k')
ax.set_xlim(138,190)
(138.0, 190.0)
points (locstream)#
Unstructured pairs of lon/lat locations instead of grids of lon/lat locations, using locstream
.
# sel
# this creates 12 pairs of lon/lat points that
# align with grid points so we can check the
# interpolation
longitude = ds.cf['X'][::40].values
latitude = ds.cf['Y'][::80].values
# selecting individual lon/lat locations with advanced xarray indexing
sel = dict(longitude=xr.DataArray(longitude, dims="pts"), latitude=xr.DataArray(latitude, dims="pts"))
# isel
iZ = None
iT = 0
isel = dict(T=iT)
da_out = ds.em.interp2d(longitude, latitude, iT=iT, iZ=iZ, locstream=True)
# check
da_check = ds.cf.isel(isel).cf.sel(sel)
assert np.allclose(da_out, da_check, equal_nan=True)
grid of known locations#
# sel
longitude = ds.cf['X'][100::500]
latitude = ds.cf['Y'][100::500]
sel = dict(longitude=longitude, latitude=latitude)
# isel
iZ = None
iT = None
# isel = dict(Z=iZ)
da_out = ds.em.interp2d(longitude, latitude, iT=iT, iZ=iZ, locstream=False)
# check
da_check = ds.cf.sel(sel)#.cf.isel(isel)
assert np.allclose(da_out, da_check)
grid of new locations#
# sel
longitude = np.linspace(ds.cf['X'].min(), ds.cf['X'].max(), 30)
latitude = np.linspace(ds.cf['Y'].min(), ds.cf['Y'].max(), 30)
sel = dict(longitude=longitude, latitude=latitude)
# isel
iZ = None
iT = 0
isel = dict(T=iT)
da_out = ds.em.interp2d(longitude, latitude, iT=iT, iZ=iZ, locstream=False)
# kwargs = dict(da, longitude=longitude, latitude=latitude, iT=T, iZ=Z)
# da_out = em.select(**kwargs)
# plot
cmap = cmo.delta
dacheck = ds.cf.isel(isel)
fig, axes = plt.subplots(1,2, figsize=(10,4))
dacheck.cmo.plot(ax=axes[0])
da_out.cmo.plot(ax=axes[1])
<matplotlib.collections.QuadMesh at 0x7fbfc349bee0>
POM#
try:
url = "https://www.ncei.noaa.gov/thredds/dodsC/model-loofs-agg/Aggregated_LOOFS_Fields_Forecast_best.ncd"
# url = ['https://opendap.co-ops.nos.noaa.gov/thredds/dodsC/LOOFS/fmrc/Aggregated_7_day_LOOFS_Fields_Forecast_best.ncd']
# ds = xr.open_mfdataset(url, preprocess=em.preprocess, chunks=None)
ds= xr.open_dataset(url)
ds = em.utils.preprocess_pom(ds, interp_vertical=False)
except OSError:
import pandas as pd
today = pd.Timestamp.today()
url = [today.strftime('https://opendap.co-ops.nos.noaa.gov/thredds/dodsC/NOAA/LOOFS/MODELS/%Y/%m/%d/glofs.loofs.fields.nowcast.%Y%m%d.t00z.nc'),
today.strftime('https://opendap.co-ops.nos.noaa.gov/thredds/dodsC/NOAA/LOOFS/MODELS/%Y/%m/%d/glofs.loofs.fields.nowcast.%Y%m%d.t06z.nc')]
ds = xr.open_mfdataset(url, preprocess=em.preprocess)
ds = ds["zeta"].isel(time=slice(0,2)).load()
ds
<xarray.DataArray 'zeta' (time: 2, ny: 25, nx: 61)> array([[[ nan, nan, nan, ..., nan, nan, nan], [ nan, nan, nan, ..., nan, nan, nan], [ nan, nan, 0.5127469, ..., nan, nan, nan], ..., [ nan, nan, nan, ..., nan, nan, nan], [ nan, nan, nan, ..., nan, nan, nan], [ nan, nan, nan, ..., nan, nan, nan]], [[ nan, nan, nan, ..., nan, nan, nan], [ nan, nan, nan, ..., nan, nan, nan], [ nan, nan, 0.5261942, ..., nan, nan, nan], ..., [ nan, nan, nan, ..., nan, nan, nan], [ nan, nan, nan, ..., nan, nan, nan], [ nan, nan, nan, ..., nan, nan, nan]]], dtype=float32) Coordinates: lon (ny, nx) float32 -79.79 -79.73 -79.67 ... -76.19 -76.13 -76.07 lat (ny, nx) float32 43.14 43.14 43.15 43.15 ... 44.22 44.22 44.22 * time (time) datetime64[ns] 2018-01-01T00:59:45 2018-01-01T02:00:13.1... time_run (time) datetime64[ns] 2018-01-01T00:59:45 2018-01-01T00:59:45 * nx (nx) int64 0 1 2 3 4 5 6 7 8 9 ... 51 52 53 54 55 56 57 58 59 60 * ny (ny) int64 0 1 2 3 4 5 6 7 8 9 ... 15 16 17 18 19 20 21 22 23 24 Attributes: units: meters long_name: Height Above Model Sea Level reference: model_sea_level standard_name: sea_surface_elevation
ds.cf
Coordinates:
- CF Axes: * X: ['nx']
* Y: ['ny']
* T: ['time']
Z: n/a
- CF Coordinates: longitude: ['lon']
latitude: ['lat']
* time: ['time']
vertical: n/a
- Cell Measures: area, volume: n/a
- Standard Names: forecast_reference_time: ['time_run']
latitude: ['lat']
longitude: ['lon']
* time: ['time']
- Bounds: n/a
grid point#
%%time
# Set up a single lon/lat location
j, i = 10, 10
longitude = float(ds.cf['longitude'][j,i])
latitude = float(ds.cf['latitude'][j,i])
# Select-by-index a time index and no vertical index (zeta has none)
# also lon/lat by index
Z = None
iT = 0
isel = dict(T=iT, X=i, Y=j)
da_out = ds.em.interp2d(longitude, latitude, iT=iT, iZ=Z)
# check work
da_check = ds.cf.isel(isel)
assert np.allclose(da_out, da_check)
CPU times: user 143 ms, sys: 2.23 ms, total: 146 ms
Wall time: 145 ms
This is faster the second time the regridder is used — it is saved by the extract_model
accessor and reused if the lon/lat locations to be interpolated to are the same.
not grid point#
inside domain#
# sel
longitude = -78.0
latitude = 43.6
# isel
iZ = None
iT = 1
isel = dict(T=iT)
da_out = ds.em.interp2d(longitude, latitude, iT=iT, iZ=iZ)
# plot
cmap = ds.cmo.seq
dacheck = ds.cf.isel(isel)
fig, ax = plt.subplots(1,1)
dacheck.cmo.cfplot(ax=ax, x='longitude', y='latitude')
ax.scatter(da_out.cf['longitude'], da_out.cf['latitude'], s=50, c=da_out,
vmin=dacheck.min().values, vmax=dacheck.max().values, cmap=cmap, edgecolors='k')
<matplotlib.collections.PathCollection at 0x7fbfc2c23a90>
points (locstream)#
Unstructured pairs of lon/lat locations instead of grids of lon/lat locations, using locstream
.
# sel
# this creates 12 pairs of lon/lat points that
# align with grid points so we can check the
# interpolation
longitude = ds.cf['longitude'].cf.isel(Y=20, X=slice(None, None, 10))
latitude = ds.cf['latitude'].cf.isel(Y=20, X=slice(None, None, 10))
sel = dict(X=longitude.cf['X'], Y=longitude.cf['Y'])
# isel
iZ = None
iT = 0
isel = dict(T=iT)
da_out = ds.em.interp2d(longitude, latitude, iT=iT, iZ=iZ, locstream=True)
# check
da_check = ds.cf.isel(isel).cf.sel(sel)
assert np.allclose(da_out, da_check, equal_nan=True)
grid of new locations#
# sel
longitude = np.linspace(ds.cf['longitude'].min(), ds.cf['longitude'].max(), 15)
latitude = np.linspace(ds.cf['latitude'].min(), ds.cf['latitude'].max(), 15)
# isel
iZ = None
iT = 1
isel = dict(T=iT)
da_out = ds.em.interp2d(longitude, latitude, iT=iT, iZ=iZ, locstream=False, extrap=False, extrap_val=np.nan)
# plot
cmap = cmo.delta
dacheck = ds.cf.isel(isel)
fig, axes = plt.subplots(1,2, figsize=(10,4))
dacheck.cmo.cfplot(ax=axes[0], x='longitude', y='latitude')
da_out.cmo.cfplot(ax=axes[1], x='longitude', y='latitude')
<matplotlib.collections.QuadMesh at 0x7fbf82a61cd0>