[1]:
%matplotlib inline

Open In Colab

View Source on GitHub

Quadica dataset

This notebook shows how to fetch Quadica dataset from Ebling et al., 2022

[2]:

import pandas as pd import matplotlib.pyplot as plt from easy_mpl import hist, ridge from ai4water.datasets import Quadica from easy_mpl.utils import create_subplots from ai4water.utils.utils import get_version_info

**********Tensorflow models could not be imported **********

D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\sklearn\experimental\enable_hist_gradient_boosting.py:16: UserWarning: Since version 1.0, it is not needed to import enable_hist_gradient_boosting anymore. HistGradientBoostingClassifier and HistGradientBoostingRegressor are now stable and can be normally imported from sklearn.ensemble.
  warnings.warn(
[3]:
for k,v in get_version_info().items():
    print(k, v)
python 3.8.12 | packaged by conda-forge | (default, Oct 12 2021, 21:19:05) [MSC v.1916 64 bit (AMD64)]
os nt
ai4water 1.06
easy_mpl 0.21.3
SeqMetrics 1.3.3
numpy 1.22.2
pandas 1.4.0
matplotlib 3.5.1
h5py 3.6.0
joblib 1.2.0
sklearn 1.2.1
xarray 0.21.1
netCDF4 1.5.8
seaborn 0.12.0
[4]:
dataset = Quadica()

avg_temp = dataset.avg_temp()
print(avg_temp.shape)

    Not downloading the data since the directory
    D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\data\Quadica already exists.
    Use overwrite=True to remove previously saved files and download again
(828, 1386)
[5]:
avg_temp.head()
[5]:
1 2 3 4 8 9 10 11 12 16 ... 651 655 660 1002 1007 1012 1013 1277 1279 1281
Year_Month
1950-01-01 -2.297333 -2.459127 -2.342236 -2.014634 -1.537954 -1.470925 -2.055828 -2.097191 -2.068334 -1.822387 ... -1.627405 -2.270952 -2.310768 0.328745 0.124732 -0.292483 0.567522 -4.408611 -4.568310 -4.675208
1950-10-01 7.961588 8.154172 8.073226 8.199285 7.843346 7.887170 8.030769 8.013243 8.092491 7.503624 ... 8.462639 8.232144 8.088379 8.616307 8.907722 8.254993 8.664533 4.981876 4.807424 4.693177
1950-11-01 4.387634 4.434822 4.156425 4.526889 4.255621 4.334019 4.359056 4.304337 4.374317 3.918750 ... 4.312323 4.267094 4.137416 4.930104 5.048923 4.665080 4.964220 1.274203 1.065288 0.925376
1950-12-01 -1.291138 -1.312995 -0.986580 -1.682717 -1.870502 -1.807383 -1.876309 -1.883287 -1.772402 -1.421752 ... -0.261073 -0.820888 -0.987980 -0.022088 0.577974 -0.173298 0.067284 -4.234557 -4.388704 -4.488470
1950-02-01 2.526310 2.694384 2.198610 2.699495 2.684751 2.752606 2.562898 2.520741 2.582700 2.225678 ... 2.099005 2.235437 2.112160 2.358829 2.370221 2.440856 2.475693 0.020008 -0.113062 -0.189216

5 rows × 1386 columns

pet

[6]:
pet = dataset.pet()
print(pet.shape)
(828, 1386)

precipitation

[7]:
pcp = dataset.precipitation()
print(pcp.shape)
(828, 1386)

monthly median values

[8]:
mon_medians = dataset.monthly_medians()
print(mon_medians.shape)
(16629, 18)
[9]:
mon_medians.head()
[9]:
OBJECTID Month n_Q median_Q n_NO3 median_NO3N n_NMin median_NMin n_TN median_TN n_PO4 median_PO4P n_TP median_TP n_DOC median_DOC n_TOC median_TOC
0 1 1 0 NaN 11 1.700 11 1.960 11 3.60 11 0.0250 11 0.1180 0 NaN 11 6.60
1 1 2 0 NaN 12 1.740 12 1.975 12 4.30 12 0.0285 12 0.1375 0 NaN 12 6.85
2 1 3 0 NaN 11 1.900 11 2.100 11 4.70 11 0.0220 11 0.0880 0 NaN 11 7.50
3 1 4 0 NaN 10 1.405 10 1.580 10 2.95 10 0.0150 10 0.1115 0 NaN 10 7.00
4 1 5 0 NaN 11 1.000 11 1.260 11 2.60 11 0.0280 11 0.1550 0 NaN 11 9.00
[10]:
wrtds_mon = dataset.wrtds_monthly()
print(wrtds_mon.shape)
(50186, 47)

catchment attributes

[11]:
cat_attrs = dataset.catchment_attributes()
print(cat_attrs.shape)
(1386, 113)
[12]:
print(cat_attrs.columns)
Index(['OBJECTID', 'Station', 'Area_km2', 'f_AreaGer', 'dem.mean',
       'dem.median', 'slo.mean', 'slo.median', 'twi.mean', 'twi.med',
       ...
       'flashi', 'BFI', 'P_mm', 'P_SIsw', 'P_SI', 'P_lambda', 'P_alpha',
       'PET_mm', 'AI', 'T_mean'],
      dtype='object', length=113)
[13]:
dataset.catchment_attributes(stations=[1,2,3])
[13]:
OBJECTID Station Area_km2 f_AreaGer dem.mean dem.median slo.mean slo.median twi.mean twi.med ... flashi BFI P_mm P_SIsw P_SI P_lambda P_alpha PET_mm AI T_mean
0 1 BB_AMFL_0010 21.65 1.0 74.683632 72.135452 0.750141 0.678210 15.002993 14.357248 ... NaN NaN 589.535167 1.338849 2.044286 0.322334 5.007660 760.654789 1.290294 9.425497
1 2 BB_AZMFL_0010 50.47 1.0 61.898052 56.878677 1.157724 0.823584 14.753934 14.246800 ... 0.0 0.878186 544.733603 1.661279 2.726427 0.306447 4.866778 774.804494 1.422743 9.381932
2 3 BB_BAFL_0010 56.19 1.0 48.056680 50.443848 0.973699 0.846759 14.805566 14.195425 ... NaN NaN 535.680048 1.813461 3.034012 0.308551 4.753558 719.133840 1.342496 8.983454

3 rows × 113 columns

monthly data

[14]:
dyn, cat = dataset.fetch_monthly(max_nan_tol=None)
print(dyn.shape)
(29484, 33)
[15]:
dyn['OBJECTID'].unique()
[15]:
array([ 333,  334,  335,  336,  337,  340,  341,  342,  345,  346,  347,
        348,  349,  350,  352,  355,  358,  359,  360,  362,  363,  364,
        365,  368,  370,  373,  374,  376,  380,  381,  391,  393,  637,
        663,  667,  673,  678,  686,  687,  688,  690,  692,  696,  701,
        705,  711,  716,  718,  722,  723,  728,  730,  734,  735,  736,
        737,  739,  740,  742,  744,  745,  746,  750,  752,  754,  769,
        773,  774,  775,  776,  778,  782,  783,  785,  786,  787,  789,
        796,  797,  874,  885,  899,  985,  986,  991, 1011, 1016, 1017,
       1019, 1082, 1113, 1186, 1237, 1238, 1255, 1270, 1271, 1275, 1287,
       1303, 1332, 1467, 1473, 1482, 1495, 1570, 1571, 1573, 1672, 1677,
       1678, 1679, 1680, 1683, 1688, 1690, 1691], dtype=int64)
[16]:
print(dyn.columns)
Index(['median_C_NO3', 'median_C_NMin', 'median_FNC_TP', 'median_FNC_PO4',
       'mean_FNFlux_TN', 'median_FNC_NO3', 'mean_FNFlux_DOC', 'mean_FNFlux_TP',
       'median_C_TP', 'mean_Flux_TOC', 'median_FNC_NMin', 'mean_FNFlux_PO4',
       'mean_Flux_DOC', 'mean_Flux_NMin', 'mean_FNFlux_NO3',
       'mean_FNFlux_NMin', 'median_FNC_TOC', 'mean_Flux_TN', 'median_C_TOC',
       'mean_Flux_NO3', 'mean_Flux_PO4', 'mean_Flux_TP', 'median_FNC_DOC',
       'median_C_TN', 'mean_FNFlux_TOC', 'median_Q', 'median_C_DOC',
       'median_C_PO4', 'median_FNC_TN', 'OBJECTID', 'avg_temp', 'precip',
       'pet'],
      dtype='object')
[17]:
print(dyn.isna().sum())
median_C_NO3         2691
median_C_NMin        9161
median_FNC_TP        1819
median_FNC_PO4       1988
mean_FNFlux_TN      18880
median_FNC_NO3       2709
mean_FNFlux_DOC     16361
mean_FNFlux_TP       1819
median_C_TP          1819
mean_Flux_TOC       15456
median_FNC_NMin      9161
mean_FNFlux_PO4      1988
mean_Flux_DOC       16361
mean_Flux_NMin       9161
mean_FNFlux_NO3      2709
mean_FNFlux_NMin     9161
median_FNC_TOC      15469
mean_Flux_TN        18880
median_C_TOC        15456
mean_Flux_NO3        2691
mean_Flux_PO4        1988
mean_Flux_TP         1819
median_FNC_DOC      16361
median_C_TN         18880
mean_FNFlux_TOC     15469
median_Q               13
median_C_DOC        16361
median_C_PO4         1988
median_FNC_TN       18880
OBJECTID                0
avg_temp                0
precip                  0
pet                     0
dtype: int64
[18]:
print(cat.shape)
(29484, 113)

monthly TN

[19]:
dyn, cat = dataset.fetch_monthly(features="TN", max_nan_tol=0)
print(dyn.shape)
(6300, 9)
[20]:
dyn.head()
[20]:
median_C_TN mean_Flux_TN mean_FNFlux_TN median_Q median_FNC_TN OBJECTID avg_temp precip pet
1993-01-01 7.973254 4854.350816 3785.002788 6.70 8.143008 663 3.807984 121.793169 11.415899
1993-02-01 7.955991 3698.383160 3531.640525 5.29 8.062551 663 8.473467 116.131558 28.869268
1993-03-01 8.138089 2249.559645 3076.825302 3.17 7.878655 663 1.430167 35.333157 9.847851
1993-04-01 7.665461 2272.942794 2298.055504 3.28 7.780824 663 4.333394 180.090165 8.050768
1993-05-01 7.843202 1551.660935 1607.637873 2.28 7.807650 663 0.830066 30.062856 13.271998
[21]:
dyn.tail()
[21]:
median_C_TN mean_Flux_TN mean_FNFlux_TN median_Q median_FNC_TN OBJECTID avg_temp precip pet
2013-08-01 3.035308 6315.751380 7280.583175 21.62 3.065682 1019 11.212706 148.730218 90.947478
2013-09-01 3.288561 12811.035546 8107.321967 38.94 3.194456 1019 16.430328 74.409189 130.848008
2013-10-01 3.444130 30742.345242 12261.944444 84.51 3.452440 1019 20.729773 43.103508 154.377919
2013-11-01 3.732225 45608.256567 22491.901904 136.87 3.742268 1019 18.462523 54.366963 122.983270
2013-12-01 4.176698 30066.266276 37932.025054 61.38 3.963254 1019 14.342434 96.153852 70.408549
[22]:
print(dyn.isna().sum())
median_C_TN       0
mean_Flux_TN      0
mean_FNFlux_TN    0
median_Q          0
median_FNC_TN     0
OBJECTID          0
avg_temp          0
precip            0
pet               0
dtype: int64
[23]:
dyn['OBJECTID'].unique()
[23]:
array([ 663,  673,  678,  686,  687,  688,  690,  728,  730,  734,  744,
        745,  746,  750,  754,  782,  783,  785,  786,  985,  986,  991,
       1016, 1017, 1019], dtype=int64)
[24]:
print(len(dyn['OBJECTID'].unique()))
25
[25]:
print(cat.shape)
(6300, 113)
[26]:
df = pd.concat([grp['median_C_TN'] for idx,grp in dyn.groupby('OBJECTID')], axis=1)
df.columns = dyn['OBJECTID'].unique()
_ = ridge(df, figsize=(10, 10), color="GnBu", title="median_C_TN")
../../_images/_notebooks_datasets_quadica_32_0.png

monthly TP

[27]:
dyn, cat = dataset.fetch_monthly(features="TP", max_nan_tol=0)
print(dyn.shape)
(21420, 9)
[28]:
dyn['OBJECTID'].unique()
[28]:
array([ 334,  335,  336,  337,  340,  341,  342,  345,  347,  350,  352,
        355,  358,  359,  360,  362,  363,  364,  365,  368,  370,  374,
        376,  380,  381,  391,  663,  673,  678,  686,  687,  688,  690,
        692,  696,  701,  705,  711,  716,  718,  722,  723,  728,  730,
        734,  735,  736,  737,  739,  740,  742,  744,  745,  746,  750,
        754,  769,  773,  776,  778,  782,  783,  785,  786,  874,  885,
        899,  985,  986,  991, 1016, 1017, 1019, 1082, 1113, 1186, 1271,
       1275, 1570, 1571, 1573, 1677, 1678, 1680, 1683], dtype=int64)
[29]:
print(len(dyn['OBJECTID'].unique()))
85
[30]:
dyn.head()
[30]:
mean_Flux_TP mean_FNFlux_TP median_C_TP median_FNC_TP median_Q OBJECTID avg_temp precip pet
1993-01-01 396.554076 320.694272 0.074464 0.062897 53.20 334 1.593407 80.646032 14.305556
1993-02-01 119.576067 273.244117 0.046901 0.054044 25.15 334 5.956323 105.239667 32.282010
1993-03-01 247.820224 436.670327 0.045366 0.060441 33.50 334 -1.057203 38.820471 10.558618
1993-04-01 245.905231 447.722065 0.054396 0.064261 52.10 334 1.455312 126.850398 12.198744
1993-05-01 213.419345 686.400592 0.051911 0.069937 47.70 334 -2.350673 26.185855 13.697404
[31]:
dyn.tail()
[31]:
mean_Flux_TP mean_FNFlux_TP median_C_TP median_FNC_TP median_Q OBJECTID avg_temp precip pet
2013-08-01 68.195512 83.103094 0.171760 0.174829 4.33 1683 10.229236 147.140054 87.170391
2013-09-01 83.325140 90.632953 0.137811 0.150251 6.11 1683 14.173561 99.925348 117.320530
2013-10-01 56.504054 92.819774 0.116680 0.124092 5.25 1683 18.329772 42.926368 146.748663
2013-11-01 111.452591 133.620403 0.095941 0.105120 11.85 1683 16.463807 49.014143 116.420306
2013-12-01 77.797496 149.404412 0.081465 0.090173 9.61 1683 11.541938 78.161588 58.856660
[32]:
print(dyn.isna().sum())
mean_Flux_TP      0
mean_FNFlux_TP    0
median_C_TP       0
median_FNC_TP     0
median_Q          0
OBJECTID          0
avg_temp          0
precip            0
pet               0
dtype: int64
[33]:
print(cat.shape)
(21420, 113)

monthly TOC

[34]:
dyn, cat = dataset.fetch_monthly(features="TOC", max_nan_tol=0)
print(dyn.shape)
(5796, 9)
[35]:
dyn['OBJECTID'].unique()
[35]:
array([ 352,  355,  358,  359,  370,  374,  796,  797,  985,  991, 1016,
       1019, 1473, 1482, 1570, 1571, 1573, 1677, 1678, 1680, 1683, 1688,
       1690], dtype=int64)
[36]:
print(len(dyn['OBJECTID'].unique()))

grouper = dyn.groupby("OBJECTID")



fig, axes = create_subplots(grouper.ngroups, figsize=(12, 10))
for (idx, grp), ax in zip(grouper, axes.flat):
    hist(grp['median_C_TOC'], ax=ax, show=False, ax_kws=dict(title=idx))
plt.show()
23
../../_images/_notebooks_datasets_quadica_44_1.png
[37]:
df = pd.concat([grp['median_C_TOC'] for idx,grp in dyn.groupby('OBJECTID')], axis=1)
df.columns = dyn['OBJECTID'].unique()

_ = ridge(df, figsize=(10, 10), color="GnBu", title="median_C_TOC")
../../_images/_notebooks_datasets_quadica_45_0.png
[38]:
dyn.head()
[38]:
median_FNC_TOC mean_Flux_TOC mean_FNFlux_TOC median_C_TOC median_Q OBJECTID avg_temp precip pet
1993-01-01 4.205242 849.784539 1768.257729 3.582176 2.49 352 1.874273 45.230158 13.236276
1993-02-01 4.473708 553.020333 2043.161550 3.076343 1.94 352 8.082227 70.054926 34.415847
1993-03-01 4.774344 802.060684 2314.616099 3.596575 2.30 352 0.408168 25.903097 10.312989
1993-04-01 4.386409 545.261698 1247.634582 3.678589 1.71 352 2.823309 119.545130 11.483053
1993-05-01 4.580450 411.800177 959.253376 3.825010 1.24 352 -2.816553 20.795173 9.571560
[39]:
dyn.tail()
[39]:
median_FNC_TOC mean_Flux_TOC mean_FNFlux_TOC median_C_TOC median_Q OBJECTID avg_temp precip pet
2013-08-01 3.352540 439.230872 577.937111 3.252773 1.560 1690 10.204866 236.756149 85.705574
2013-09-01 3.508932 571.347176 840.649765 3.384114 1.845 1690 14.447392 54.103719 119.770443
2013-10-01 3.564205 640.434776 859.957624 3.476477 2.160 1690 18.357580 52.262876 144.086108
2013-11-01 3.800113 1367.471649 1534.585561 3.774498 3.695 1690 16.688504 39.944479 114.876081
2013-12-01 3.686678 1278.301345 2066.823176 3.517463 3.950 1690 11.801535 101.380651 59.491577
[40]:
print(dyn.isna().sum())
median_FNC_TOC     0
mean_Flux_TOC      0
mean_FNFlux_TOC    0
median_C_TOC       0
median_Q           0
OBJECTID           0
avg_temp           0
precip             0
pet                0
dtype: int64
[41]:
print(cat.shape)
(5796, 113)

monthly DOC

[42]:
dyn, cat = dataset.fetch_monthly(features="DOC", max_nan_tol=0)
print(dyn.shape)
(6804, 9)
[43]:
dyn['OBJECTID'].unique()
[43]:
array([ 663,  678,  690,  696,  701,  705,  711,  718,  722,  723,  728,
        734,  744,  745,  746,  750,  754,  776,  782,  783,  785,  786,
       1016, 1017, 1019, 1082, 1271], dtype=int64)
[44]:
print(len(dyn['OBJECTID'].unique()))
27
[45]:
dyn.head()
[45]:
median_FNC_DOC mean_Flux_DOC median_C_DOC median_Q mean_FNFlux_DOC OBJECTID avg_temp precip pet
1993-01-01 7.570729 5290.522451 8.168849 6.70 3880.725444 663 3.807984 121.793169 11.415899
1993-02-01 7.409652 3562.398652 7.576350 5.29 3470.252080 663 8.473467 116.131558 28.869268
1993-03-01 7.138509 1840.949964 6.624830 3.17 3071.222351 663 1.430167 35.333157 9.847851
1993-04-01 6.763954 2064.170897 6.769762 3.28 2187.148516 663 4.333394 180.090165 8.050768
1993-05-01 6.355921 1291.672996 6.305964 2.28 1380.674341 663 0.830066 30.062856 13.271998
[46]:
dyn.tail()
[46]:
median_FNC_DOC mean_Flux_DOC median_C_DOC median_Q mean_FNFlux_DOC OBJECTID avg_temp precip pet
2013-08-01 4.061765 3158.773568 4.031046 7.952220 4779.806779 1271 10.167641 163.066607 87.326095
2013-09-01 4.048447 2794.135418 3.949723 7.275374 5032.150952 1271 13.999010 186.180472 116.162897
2013-10-01 3.936584 2445.844458 3.826177 5.771638 4011.788115 1271 17.790892 33.659651 144.276601
2013-11-01 4.007867 2187.576948 3.717356 6.491699 6221.995187 1271 16.185475 72.816926 115.142294
2013-12-01 3.923625 3324.340569 3.687420 9.265053 6167.013957 1271 11.191066 75.990604 55.997746
[47]:
print(dyn.isna().sum())
median_FNC_DOC     0
mean_Flux_DOC      0
median_C_DOC       0
median_Q           0
mean_FNFlux_DOC    0
OBJECTID           0
avg_temp           0
precip             0
pet                0
dtype: int64
[48]:
print(cat.shape)
(6804, 113)