Interactive Data Exploration
Contents
Interactive Data Exploration¶
This notebook demonstrates how the functions and techniques we covered in the first notebook can be combined to build interactive data exploration tools. The panel enables exploration of LIS output using an interactive map.
Note: some cells below take several minutes to run and some aspects of the interactive widgets may not work in the rendered version on the Hackweek site.
Import Libraries¶
import numpy as np
import pandas as pd
import geopandas
import xarray as xr
import fsspec
import s3fs
from datetime import datetime as dt
from scipy.spatial import distance
import holoviews as hv, geoviews as gv
from geoviews import opts
from geoviews import tile_sources as gvts
from datashader.colors import viridis
import datashader
from holoviews.operation.datashader import datashade, shade, dynspread, spread, rasterize
from holoviews.streams import Selection1D, Params
import panel as pn
import param as pm
import hvplot.pandas
import hvplot.xarray
import warnings
import holoviews.operation.datashader as hd
warnings.filterwarnings("ignore")
pn.extension()
pn.param.ParamMethod.loading_indicator =True
# create S3 filesystem object
s3 = s3fs.S3FileSystem()
# define S3 bucket name
bucket = "s3://eis-dh-hydro/SNOWEX-HACKWEEK"
# set holoviews backend to Bokeh
gv.extension('bokeh')
Load Data¶
SNOTEL Sites info¶
# create dictionary linking state names and abbreviations
snotel = {"AZ" : "arizona",
"CO" : "colorado",
"ID" : "idaho",
"MT" : "montana",
"NM" : "newmexico",
"UT" : "utah",
"WY" : "wyoming"}
# load SNOTEL site metadata for sites in the given state
def load_site(state):
# define path to file
key = f"SNOTEL/snotel_{state}.csv"
# load csv into pandas DataFrame
df = pd.read_csv(s3.open(f'{bucket}/{key}', mode='r'))
return df
SNOTEL Depth & SWE¶
def load_snotel_txt(state, var):
# define path to file
key = f"SNOTEL/snotel_{state}{var}_20162020.txt"
# open text file
fh = s3.open(f"{bucket}/{key}")
# read each line and note those that begin with '#'
lines = fh.readlines()
skips = sum(1 for ln in lines if ln.decode('ascii').startswith('#'))
# load txt file into pandas DataFrame (skipping lines beginning with '#')
df = pd.read_csv(s3.open(f"{bucket}/{key}"), skiprows=skips)
# convert Date column from str to pandas datetime objects
df['Date'] = pd.to_datetime(df['Date'])
return df
# load SNOTEL depth & swe into dictionaries
# define empty dicts
snotel_depth = {}
snotel_swe = {}
# loop over states and load SNOTEL data
for state in snotel.keys():
print(f"Loading state {state}")
snotel_depth[state] = load_snotel_txt(state, 'depth')
snotel_swe[state] = load_snotel_txt(state, 'swe')
SNODAS Depth & SWE¶
Like the LIS output we have been working with, a sample of SNODAS data is available on our S3 bucket in Zarr format. We can therefore load the SNODAS just as we load the LIS data.
# load snodas depth data
key = "SNODAS/snodas_snowdepth_20161001_20200930.zarr"
snodas_depth = xr.open_zarr(s3.get_mapper(f"{bucket}/{key}"), consolidated=True)
# load snodas swe data
key = "SNODAS/snodas_swe_20161001_20200930.zarr"
snodas_swe = xr.open_zarr(s3.get_mapper(f"{bucket}/{key}"), consolidated=True)
LIS Outputs¶
Next we’ll load the LIS outputs. First, we’ll define the helper function we saw in the previous notebook that adds lat
and lon
as coordinate variables. We’ll use this immediately upon loading the data.
def add_latlon_coords(dataset: xr.Dataset)->xr.Dataset:
"""Adds lat/lon as dimensions and coordinates to an xarray.Dataset object."""
# get attributes from dataset
attrs = dataset.attrs
# get x, y resolutions
dx = .001 #round(float(attrs['DX']), 3)
dy = .001 #round(float(attrs['DY']), 3)
# get grid cells in x, y dimensions
ew_len = len(dataset['east_west'])
ns_len = len(dataset['north_south'])
#ew_len = len(dataset['lon'])
#ns_len = len(dataset['lat'])
# get lower-left lat and lon
ll_lat = round(float(attrs['SOUTH_WEST_CORNER_LAT']), 3)
ll_lon = round(float(attrs['SOUTH_WEST_CORNER_LON']), 3)
# calculate upper-right lat and lon
ur_lat = 41.5122 #ll_lat + (dy * ns_len)
ur_lon = -103.9831 #ll_lon + (dx * ew_len)
# define the new coordinates
coords = {
# create an arrays containing the lat/lon at each gridcell
'lat': np.linspace(ll_lat, ur_lat, ns_len, dtype=np.float32, endpoint=False).round(4),
'lon': np.linspace(ll_lon, ur_lon, ew_len, dtype=np.float32, endpoint=False).round(4)
}
lon_attrs = dataset.lon.attrs
lat_attrs = dataset.lat.attrs
# rename the original lat and lon variables
dataset = dataset.rename({'lon':'orig_lon', 'lat':'orig_lat'})
# rename the grid dimensions to lat and lon
dataset = dataset.rename({'north_south': 'lat', 'east_west': 'lon'})
# assign the coords above as coordinates
dataset = dataset.assign_coords(coords)
dataset.lon.attrs = lon_attrs
dataset.lat.attrs = lat_attrs
return dataset
Load the LIS data and apply add_latlon_coords()
:
# LIS surfacemodel DA_10km
#key = "2022/ZARR/SURFACEMODEL/LIS_HIST_default_chunks.d01.zarr"
key = "2022/ZARR/SURFACEMODEL/LIS_HIST_rechunkedV4.d01.zarr"
s3_mapper = s3.get_mapper(f"{bucket}/{key}")
lis_sf = xr.open_zarr(s3_mapper, consolidated=True)
lis_sf = add_latlon_coords(lis_sf)
drop_vars = ['orig_lat', 'orig_lon']
lis_sf = lis_sf.drop(drop_vars)
Working with the full LIS output dataset can be slow and consume lots of memory. Here we temporally subset the data to a shorter window of time. The full dataset contains daily values from 09/02/2016 to 4/22/2021. Feel free to explore the full dataset by modifying the time_range
variable below and re-running all cells that follow.
# subset LIS data for winter 2020
time_range = slice('2019-10-01', '2020-04-30')
lis_sf = lis_sf.sel(time=time_range)
In the next cell, we extract the data variable names and timesteps from the LIS outputs. These will be used to define the widget options.
# gather metadata from LIS
# get variable names:string
vnames = list(lis_sf.data_vars)
print(vnames)
# get time-stamps:string
tstamps = list(np.datetime_as_string(lis_sf.time.values, 'D'))
print(len(tstamps), tstamps[0], tstamps[-1])
By default, the holoviews
plotting library automatically adjusts the range of plot colorbars based on the range of values in the data being plotted. This may not be ideal when comparing data on different timesteps. In the next cell we assign the upper and lower bounds for each data variable which we’ll later use to set a static colorbar range.
cmap_lims = {'SM_SWE_inst': (-1.3111445262836696e-10, 3.0499227046966553),
'SM_SnowDensity_inst': (99.99999237060547, 550.0),
'SM_SnowDepth_inst': (-1.0889861234986142e-09, 6.424593448638916)}
cmap_lims
Interactive widget to explore model output¶
The cell below creates a panel
layout for exploring LIS output rasters. Select a variable using the drop down and then use the date slider to scrub back and forth in time!
%%time
# start and end dates
start_date = '2020-01-01'
end_date = '2020-01-07'
date_fmt = '%Y-%m-%d'
b = dt.strptime(start_date, date_fmt)
e = dt.strptime(end_date, date_fmt)
lis_sub = lis_sf.sel(time=slice(start_date, end_date)).load()
# date widget (slider & key in)
# define date widgets
date_slider = pn.widgets.DateSlider(start=b, end=e, value=b, name="LIS Model Date")
dt_input = pn.widgets.DatetimeInput(name='LIS Model Date Input', value=b, format=date_fmt)
date_stream = Params(date_slider, ['value'], rename={'value':'date'})
# variable widget
var_select = pn.widgets.Select(options=vnames, name="LIS Variable List")
var_stream = Params(var_select, ['value'], rename={'value':'vname'})
#colorbar widget
color_bar_range = pn.widgets.RangeSlider(name="Color Bar Range", start=0, end=1, value=(0., 1.), step=0.01)
cbar_strem = Params(color_bar_range, ['value'], rname={'value':'clim'})
# base map widget
map_layer= pn.widgets.RadioButtonGroup(
name='Base map layer',
options=['Open Street Map', 'Satellite Imagery'],
value='Satellite Imagery',
button_type='primary',
background='#f307eb')
# lis output display callback function
# returns plot of LIS output when date/variable is changed
def var_layer(vname, date, clim):
t_stamp = dt.strftime(date, '%Y-%m-%d')
dssm = lis_sub[vname].sel(time=t_stamp)
image = dssm.hvplot(geo=True)
#clim = cmap_lims[vname]
return image.opts(clim=clim)
# watches date widget for updates
@pn.depends(dt_input.param.value, watch=True)
def _update_date(dt_input):
date_slider.value=dt_input
@pn.depends(var_select.param.value, watch=True)
def _update_range(var_select):
vname=var_select
t=cmap_lims[vname]
color_bar_range.start=t[0]
color_bar_range.end=t[1]
color_bar_range.value=t
# updates basemap on widget change
def update_map(maps):
tile = gvts.OSM if maps=='Open Street Map' else gvts.EsriImagery
return tile.opts(alpha=0.7)
# link widgets to callback functions
streams = dict(vname=var_select.param.value, date=date_slider.param.value, clim=color_bar_range.param.value)
dmap = hv.DynamicMap(var_layer, streams=streams)
dtile = hv.DynamicMap(update_map, streams=dict(maps=map_layer.param.value))
# create panel layout of widgets and plot
pn.Column(var_select, date_slider, dt_input, map_layer,color_bar_range,
dtile*rasterize(dmap, aggregator=datashader.mean()).opts(cmap=viridis,colorbar=True,width=800, height=600))
Thank you for joining us for this tutorial. We hope that you are now more familiar with NASA’s Land Information System and how to use Python to explore and use the model simulation output LIS generates. For more information please see the links under More information below
More Information¶
Links
References
Kumar, S.V., C.D. Peters-Lidard, Y. Tian, P.R. Houser, J. Geiger, S. Olden, L. Lighty, J.L. Eastman, B. Doty, P. Dirmeyer, J. Adams, K. Mitchell, E. F. Wood, and J. Sheffield, 2006: Land Information System - An interoperable framework for high resolution land surface modeling. Environ. Modeling & Software, 21, 1402-1415, doi:10.1016/j.envsoft.2005.07.004
Peters-Lidard, C.D., P.R. Houser, Y. Tian, S.V. Kumar, J. Geiger, S. Olden, L. Lighty, B. Doty, P. Dirmeyer, J. Adams, K. Mitchell, E.F. Wood, and J. Sheffield, 2007: High-performance Earth system modeling with NASA/GSFC’s Land Information System. Innovations in Systems and Software Engineering, 3(3), 157-165, doi:10.1007/s11334-007-0028-x