{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Reconstruction over a smaller domain using GraphEM" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Expected time to run through: 10 mins**\n", "\n", "This tutorial demonstrates how to get a reconstruction over a smaller domain (30N-70N, 130W-70W) using GraphEM, leveraging HadCRUT4 and the PAGES2k proxy database." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Test data preparation\n", "\n", "To go through this tutorial, please prepare test data following the steps:\n", "\n", "Download the test case named “PAGES2k_HadCRUT” with this [link](https://drive.google.com/drive/folders/12V0Ny4eO2HKU40-Iy5Jiw2SAfVNzXXcd?usp=sharing).\n", "Create a directory named “testcases” in the same directory where this notebook sits.\n", "Put the unzipped direcotry “PAGES2k_HadCRUT_cropped_domain” into “testcases”.\n", "\n", "Below, we first load some useful packages, including our `GraphEM`." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "import LMRt\n", "import GraphEM\n", "import os\n", "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Low-level workflow" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "job = GraphEM.ReconJob()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m\u001b[36mGraphEM: job.load_configs() >>> loading reconstruction configurations from: ./testcases/PAGES2k_HadCRUT_cropped_domain/configs.yml\u001b[0m\n", "\u001b[1m\u001b[32mGraphEM: job.load_configs() >>> job.configs created\u001b[0m\n", "\u001b[1m\u001b[36mGraphEM: job.load_configs() >>> job.configs[\"job_dirpath\"] = /Users/fzhu/Github/GraphEM/docsrc/tutorial/testcases/PAGES2k_HadCRUT_cropped_domain/recon\u001b[0m\n", "\u001b[1m\u001b[32mGraphEM: job.load_configs() >>> /Users/fzhu/Github/GraphEM/docsrc/tutorial/testcases/PAGES2k_HadCRUT_cropped_domain/recon created\u001b[0m\n", "{'anom_period': [1951, 1980],\n", " 'calib_period': [1930, 2000],\n", " 'job_dirpath': '/Users/fzhu/Github/GraphEM/docsrc/tutorial/testcases/PAGES2k_HadCRUT_cropped_domain/recon',\n", " 'job_id': 'GraphEM_tutorial',\n", " 'obs_path': {'tas': './data/obs/HadCRUT.5.0.1.0.analysis.anomalies.ensemble_mean.nc'},\n", " 'obs_varname': {'lat': 'latitude', 'lon': 'longitude', 'tas': 'tas_mean'},\n", " 'proxydb_path': './data/proxy/pages2k_dataset.pkl',\n", " 'ptype_list': ['coral.d18O',\n", " 'coral.SrCa',\n", " 'coral.calc',\n", " 'tree.TRW',\n", " 'tree.MXD'],\n", " 'recon_period': [1500, 2000]}\n" ] } ], "source": [ "job.load_configs('./testcases/PAGES2k_HadCRUT_cropped_domain/configs.yml', verbose=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m\u001b[36mGraphEM: job.load_proxydb() >>> job.configs[\"proxydb_path\"] = /Users/fzhu/Github/GraphEM/docsrc/tutorial/testcases/PAGES2k_HadCRUT_cropped_domain/data/proxy/pages2k_dataset.pkl\u001b[0m\n", "\u001b[1m\u001b[32mGraphEM: job.load_proxydb() >>> 692 records loaded\u001b[0m\n", "\u001b[1m\u001b[32mGraphEM: job.load_proxydb() >>> job.proxydb created\u001b[0m\n" ] } ], "source": [ "job.load_proxydb(verbose=True)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Proxy Database Overview\n", "-----------------------\n", " Source: /Users/fzhu/Github/GraphEM/docsrc/tutorial/testcases/PAGES2k_HadCRUT_cropped_domain/data/proxy/pages2k_dataset.pkl\n", " Size: 111\n", "Proxy types: {'tree.TRW': 70, 'tree.MXD': 41}\n" ] } ], "source": [ "# filter the proxy database by spatial region larger than the target (30N-70N, 130W-70W); using (10N-90N, 150W-60W)\n", "assim_pids = []\n", "for pid, pobj in job.proxydb.records.items():\n", " if pobj.lat >= 10 and pobj.lat <= 90 and pobj.lon >= np.mod(-150, 360) and pobj.lon <= np.mod(-60, 360):\n", " assim_pids.append(pid)\n", " \n", "job.proxydb.filter_pids(assim_pids, inplace=True)\n", "print(job.proxydb)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m\u001b[36mGraphEM: job.seasonalize_proxydb() >>> job.configs[\"ptype_season\"] = {'tree.TRW': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 'tree.MXD': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]}\u001b[0m\n", "\u001b[1m\u001b[36mGraphEM: job.seasonalize_proxydb() >>> seasonalizing proxy records according to: {'tree.TRW': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 'tree.MXD': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]}\u001b[0m\n", "\u001b[1m\u001b[32mGraphEM: job.seasonalize_proxydb() >>> 111 records remaining\u001b[0m\n", "\u001b[1m\u001b[32mGraphEM: job.seasonalize_proxydb() >>> job.proxydb updated\u001b[0m\n", "\u001b[1m\u001b[36mGraphEM: job.filter_proxydb() >>> filtering proxy records according to: ['coral.d18O', 'coral.SrCa', 'coral.calc', 'tree.TRW', 'tree.MXD']\u001b[0m\n", "\u001b[1m\u001b[32mGraphEM: job.filter_proxydb() >>> 111 records remaining\u001b[0m\n" ] } ], "source": [ "ptype_season = {}\n", "for k, v in job.proxydb.type_dict.items():\n", " ptype_season[k] = list(range(1, 13)) # annual\n", " \n", "job.seasonalize_proxydb(ptype_season, verbose=True)\n", "job.filter_proxydb(verbose=True)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m\u001b[36mGraphEM: job.load_obs() >>> loading instrumental observation fields from: {'tas': '/Users/fzhu/Github/GraphEM/docsrc/tutorial/testcases/PAGES2k_HadCRUT_cropped_domain/data/obs/HadCRUT.5.0.1.0.analysis.anomalies.ensemble_mean.nc'}\u001b[0m\n", "\u001b[1m\u001b[32mGraphEM: job.load_obs() >>> job.obs created\u001b[0m\n" ] } ], "source": [ "job.load_obs(varname_dict={'lat': 'latitude', 'lon': 'longitude', 'tas': 'tas_mean'}, verbose=True)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m\u001b[30mGraphEM: job.seasonalize_obs() >>> seasonalized obs w/ season [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]\u001b[0m\n", "Dataset Overview\n", "-----------------------\n", "\n", " Name: tas\n", " Source: /Users/fzhu/Github/GraphEM/docsrc/tutorial/testcases/PAGES2k_HadCRUT_cropped_domain/data/obs/HadCRUT.5.0.1.0.analysis.anomalies.ensemble_mean.nc\n", " Shape: time:171, lat:36, lon:72\n", "\u001b[1m\u001b[32mGraphEM: job.seasonalize_obs() >>> job.obs updated\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/fzhu/Github/LMRt/LMRt/utils.py:261: RuntimeWarning: Mean of empty slice\n", " tmp = np.nanmean(var[inds, ...], axis=0)\n" ] } ], "source": [ "job.seasonalize_obs(verbose=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cropping the domain" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dataset Overview\n", "-----------------------\n", "\n", " Name: tas\n", " Source: /Users/fzhu/Github/GraphEM/docsrc/tutorial/testcases/PAGES2k_HadCRUT_cropped_domain/data/obs/HadCRUT.5.0.1.0.analysis.anomalies.ensemble_mean.nc\n", " Shape: time:171, lat:36, lon:72\n" ] } ], "source": [ "print(job.obs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The current domain is global.\n", "We may call the `job.obs.crop()` method with the `domain_range` parameter to crop the domain.\n", "\n", "There are two ways of cropping the domain:\n", "+ crop only the latitudes with the input argument `domain_range = [lat_min, lat_max]`\n", "+ crop both the latitudes and longitudes the input argument `domain_range = [lat_min, lat_max, lon_min, lon_max]`" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dataset Overview\n", "-----------------------\n", "\n", " Name: tas\n", " Source: /Users/fzhu/Github/GraphEM/docsrc/tutorial/testcases/PAGES2k_HadCRUT_cropped_domain/data/obs/HadCRUT.5.0.1.0.analysis.anomalies.ensemble_mean.nc\n", " Shape: time:171, lat:8, lon:12\n" ] } ], "source": [ "# crop the 30N-70N, 70W-130W region\n", "\n", "job.obs.crop([30, 70, np.mod(-130, 360), np.mod(-70, 360)], inplace=True)\n", "print(job.obs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data preparation" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m\u001b[32mGraphEM: job.prep_data() >>> job.recon_time created\u001b[0m\n", "\u001b[1m\u001b[32mGraphEM: job.prep_data() >>> job.calib_time created\u001b[0m\n", "\u001b[1m\u001b[32mGraphEM: job.prep_data() >>> job.calib_idx created\u001b[0m\n", "\u001b[1m\u001b[32mGraphEM: job.prep_data() >>> job.temp created\u001b[0m\n", "\u001b[1m\u001b[32mGraphEM: job.prep_data() >>> job.df_proxy created\u001b[0m\n", "\u001b[1m\u001b[32mGraphEM: job.prep_data() >>> job.proxy created\u001b[0m\n", "\u001b[1m\u001b[32mGraphEM: job.prep_data() >>> job.lonlat created\u001b[0m\n" ] } ], "source": [ "job.prep_data(verbose=True)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | NAm_153 | \n", "NAm_165 | \n", "NAm_046 | \n", "NAm_193 | \n", "NAm_194 | \n", "NAm_188 | \n", "NAm_156 | \n", "NAm_154 | \n", "NAm_122 | \n", "NAm_186 | \n", "... | \n", "NAm_172 | \n", "NAm_201 | \n", "NAm_202 | \n", "NAm_136 | \n", "NAm_137 | \n", "NAm_203 | \n", "NAm_204 | \n", "NAm_108 | \n", "NAm_018 | \n", "NAm_143 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1500 | \n", "NaN | \n", "NaN | \n", "1.026 | \n", "NaN | \n", "NaN | \n", "0.613 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.764 | \n", "NaN | \n", "0.814 | \n", "0.901 | \n", "NaN | \n", "0.769 | \n", "NaN | \n", "
1501 | \n", "NaN | \n", "NaN | \n", "1.058 | \n", "NaN | \n", "NaN | \n", "0.592 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.928 | \n", "NaN | \n", "0.946 | \n", "0.863 | \n", "NaN | \n", "1.060 | \n", "NaN | \n", "
1502 | \n", "NaN | \n", "NaN | \n", "1.088 | \n", "NaN | \n", "NaN | \n", "0.633 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.937 | \n", "NaN | \n", "0.969 | \n", "0.914 | \n", "NaN | \n", "0.953 | \n", "NaN | \n", "
1503 | \n", "NaN | \n", "NaN | \n", "0.875 | \n", "NaN | \n", "NaN | \n", "0.675 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.775 | \n", "NaN | \n", "1.109 | \n", "0.927 | \n", "NaN | \n", "1.230 | \n", "NaN | \n", "
1504 | \n", "NaN | \n", "NaN | \n", "1.139 | \n", "NaN | \n", "NaN | \n", "0.882 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.834 | \n", "NaN | \n", "1.090 | \n", "0.838 | \n", "NaN | \n", "1.203 | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
1996 | \n", "1.346 | \n", "NaN | \n", "1.373 | \n", "NaN | \n", "NaN | \n", "0.823 | \n", "NaN | \n", "1.159 | \n", "NaN | \n", "1.354 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.773 | \n", "0.978 | \n", "0.648 | \n", "1.018 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1997 | \n", "NaN | \n", "NaN | \n", "1.153 | \n", "NaN | \n", "NaN | \n", "0.564 | \n", "NaN | \n", "1.358 | \n", "NaN | \n", "1.134 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.602 | \n", "1.017 | \n", "0.752 | \n", "0.976 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1998 | \n", "NaN | \n", "NaN | \n", "1.369 | \n", "NaN | \n", "NaN | \n", "0.886 | \n", "NaN | \n", "1.575 | \n", "NaN | \n", "1.368 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.974 | \n", "1.029 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1999 | \n", "NaN | \n", "NaN | \n", "1.502 | \n", "NaN | \n", "NaN | \n", "0.798 | \n", "NaN | \n", "1.050 | \n", "NaN | \n", "0.830 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.702 | \n", "1.031 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2000 | \n", "NaN | \n", "NaN | \n", "1.674 | \n", "NaN | \n", "NaN | \n", "0.720 | \n", "NaN | \n", "1.237 | \n", "NaN | \n", "1.254 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.998 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
501 rows × 111 columns
\n", "