[1]:
%matplotlib inline
Quadica dataset
This notebook shows how to fetch Quadica dataset from Ebling et al., 2022
[2]:
import pandas as pd
import matplotlib.pyplot as plt
from easy_mpl import hist, ridge
from ai4water.datasets import Quadica
from easy_mpl.utils import create_subplots
from ai4water.utils.utils import get_version_info
**********Tensorflow models could not be imported **********
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\sklearn\experimental\enable_hist_gradient_boosting.py:16: UserWarning: Since version 1.0, it is not needed to import enable_hist_gradient_boosting anymore. HistGradientBoostingClassifier and HistGradientBoostingRegressor are now stable and can be normally imported from sklearn.ensemble.
warnings.warn(
[3]:
for k,v in get_version_info().items():
print(k, v)
python 3.8.12 | packaged by conda-forge | (default, Oct 12 2021, 21:19:05) [MSC v.1916 64 bit (AMD64)]
os nt
ai4water 1.06
easy_mpl 0.21.3
SeqMetrics 1.3.3
numpy 1.22.2
pandas 1.4.0
matplotlib 3.5.1
h5py 3.6.0
joblib 1.2.0
sklearn 1.2.1
xarray 0.21.1
netCDF4 1.5.8
seaborn 0.12.0
Not downloading the data since the directory
D:\C\Anaconda3\envs\ai4w_dataset\lib\site-packages\ai4water\datasets\data\Quadica already exists.
Use overwrite=True to remove previously saved files and download again
(828, 1386)
[5]:
avg_temp.head()
[5]:
1 | 2 | 3 | 4 | 8 | 9 | 10 | 11 | 12 | 16 | ... | 651 | 655 | 660 | 1002 | 1007 | 1012 | 1013 | 1277 | 1279 | 1281 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Year_Month | |||||||||||||||||||||
1950-01-01 | -2.297333 | -2.459127 | -2.342236 | -2.014634 | -1.537954 | -1.470925 | -2.055828 | -2.097191 | -2.068334 | -1.822387 | ... | -1.627405 | -2.270952 | -2.310768 | 0.328745 | 0.124732 | -0.292483 | 0.567522 | -4.408611 | -4.568310 | -4.675208 |
1950-10-01 | 7.961588 | 8.154172 | 8.073226 | 8.199285 | 7.843346 | 7.887170 | 8.030769 | 8.013243 | 8.092491 | 7.503624 | ... | 8.462639 | 8.232144 | 8.088379 | 8.616307 | 8.907722 | 8.254993 | 8.664533 | 4.981876 | 4.807424 | 4.693177 |
1950-11-01 | 4.387634 | 4.434822 | 4.156425 | 4.526889 | 4.255621 | 4.334019 | 4.359056 | 4.304337 | 4.374317 | 3.918750 | ... | 4.312323 | 4.267094 | 4.137416 | 4.930104 | 5.048923 | 4.665080 | 4.964220 | 1.274203 | 1.065288 | 0.925376 |
1950-12-01 | -1.291138 | -1.312995 | -0.986580 | -1.682717 | -1.870502 | -1.807383 | -1.876309 | -1.883287 | -1.772402 | -1.421752 | ... | -0.261073 | -0.820888 | -0.987980 | -0.022088 | 0.577974 | -0.173298 | 0.067284 | -4.234557 | -4.388704 | -4.488470 |
1950-02-01 | 2.526310 | 2.694384 | 2.198610 | 2.699495 | 2.684751 | 2.752606 | 2.562898 | 2.520741 | 2.582700 | 2.225678 | ... | 2.099005 | 2.235437 | 2.112160 | 2.358829 | 2.370221 | 2.440856 | 2.475693 | 0.020008 | -0.113062 | -0.189216 |
5 rows × 1386 columns
pet
(828, 1386)
precipitation
(828, 1386)
monthly median values
[8]:
mon_medians = dataset.monthly_medians()
print(mon_medians.shape)
(16629, 18)
[9]:
mon_medians.head()
[9]:
OBJECTID | Month | n_Q | median_Q | n_NO3 | median_NO3N | n_NMin | median_NMin | n_TN | median_TN | n_PO4 | median_PO4P | n_TP | median_TP | n_DOC | median_DOC | n_TOC | median_TOC | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 1 | 0 | NaN | 11 | 1.700 | 11 | 1.960 | 11 | 3.60 | 11 | 0.0250 | 11 | 0.1180 | 0 | NaN | 11 | 6.60 |
1 | 1 | 2 | 0 | NaN | 12 | 1.740 | 12 | 1.975 | 12 | 4.30 | 12 | 0.0285 | 12 | 0.1375 | 0 | NaN | 12 | 6.85 |
2 | 1 | 3 | 0 | NaN | 11 | 1.900 | 11 | 2.100 | 11 | 4.70 | 11 | 0.0220 | 11 | 0.0880 | 0 | NaN | 11 | 7.50 |
3 | 1 | 4 | 0 | NaN | 10 | 1.405 | 10 | 1.580 | 10 | 2.95 | 10 | 0.0150 | 10 | 0.1115 | 0 | NaN | 10 | 7.00 |
4 | 1 | 5 | 0 | NaN | 11 | 1.000 | 11 | 1.260 | 11 | 2.60 | 11 | 0.0280 | 11 | 0.1550 | 0 | NaN | 11 | 9.00 |
(50186, 47)
catchment attributes
(1386, 113)
[12]:
print(cat_attrs.columns)
Index(['OBJECTID', 'Station', 'Area_km2', 'f_AreaGer', 'dem.mean',
'dem.median', 'slo.mean', 'slo.median', 'twi.mean', 'twi.med',
...
'flashi', 'BFI', 'P_mm', 'P_SIsw', 'P_SI', 'P_lambda', 'P_alpha',
'PET_mm', 'AI', 'T_mean'],
dtype='object', length=113)
[13]:
dataset.catchment_attributes(stations=[1,2,3])
[13]:
OBJECTID | Station | Area_km2 | f_AreaGer | dem.mean | dem.median | slo.mean | slo.median | twi.mean | twi.med | ... | flashi | BFI | P_mm | P_SIsw | P_SI | P_lambda | P_alpha | PET_mm | AI | T_mean | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | BB_AMFL_0010 | 21.65 | 1.0 | 74.683632 | 72.135452 | 0.750141 | 0.678210 | 15.002993 | 14.357248 | ... | NaN | NaN | 589.535167 | 1.338849 | 2.044286 | 0.322334 | 5.007660 | 760.654789 | 1.290294 | 9.425497 |
1 | 2 | BB_AZMFL_0010 | 50.47 | 1.0 | 61.898052 | 56.878677 | 1.157724 | 0.823584 | 14.753934 | 14.246800 | ... | 0.0 | 0.878186 | 544.733603 | 1.661279 | 2.726427 | 0.306447 | 4.866778 | 774.804494 | 1.422743 | 9.381932 |
2 | 3 | BB_BAFL_0010 | 56.19 | 1.0 | 48.056680 | 50.443848 | 0.973699 | 0.846759 | 14.805566 | 14.195425 | ... | NaN | NaN | 535.680048 | 1.813461 | 3.034012 | 0.308551 | 4.753558 | 719.133840 | 1.342496 | 8.983454 |
3 rows × 113 columns
monthly data
[14]:
dyn, cat = dataset.fetch_monthly(max_nan_tol=None)
print(dyn.shape)
(29484, 33)
[15]:
dyn['OBJECTID'].unique()
[15]:
array([ 333, 334, 335, 336, 337, 340, 341, 342, 345, 346, 347,
348, 349, 350, 352, 355, 358, 359, 360, 362, 363, 364,
365, 368, 370, 373, 374, 376, 380, 381, 391, 393, 637,
663, 667, 673, 678, 686, 687, 688, 690, 692, 696, 701,
705, 711, 716, 718, 722, 723, 728, 730, 734, 735, 736,
737, 739, 740, 742, 744, 745, 746, 750, 752, 754, 769,
773, 774, 775, 776, 778, 782, 783, 785, 786, 787, 789,
796, 797, 874, 885, 899, 985, 986, 991, 1011, 1016, 1017,
1019, 1082, 1113, 1186, 1237, 1238, 1255, 1270, 1271, 1275, 1287,
1303, 1332, 1467, 1473, 1482, 1495, 1570, 1571, 1573, 1672, 1677,
1678, 1679, 1680, 1683, 1688, 1690, 1691], dtype=int64)
[16]:
print(dyn.columns)
Index(['median_C_NO3', 'median_C_NMin', 'median_FNC_TP', 'median_FNC_PO4',
'mean_FNFlux_TN', 'median_FNC_NO3', 'mean_FNFlux_DOC', 'mean_FNFlux_TP',
'median_C_TP', 'mean_Flux_TOC', 'median_FNC_NMin', 'mean_FNFlux_PO4',
'mean_Flux_DOC', 'mean_Flux_NMin', 'mean_FNFlux_NO3',
'mean_FNFlux_NMin', 'median_FNC_TOC', 'mean_Flux_TN', 'median_C_TOC',
'mean_Flux_NO3', 'mean_Flux_PO4', 'mean_Flux_TP', 'median_FNC_DOC',
'median_C_TN', 'mean_FNFlux_TOC', 'median_Q', 'median_C_DOC',
'median_C_PO4', 'median_FNC_TN', 'OBJECTID', 'avg_temp', 'precip',
'pet'],
dtype='object')
[17]:
print(dyn.isna().sum())
median_C_NO3 2691
median_C_NMin 9161
median_FNC_TP 1819
median_FNC_PO4 1988
mean_FNFlux_TN 18880
median_FNC_NO3 2709
mean_FNFlux_DOC 16361
mean_FNFlux_TP 1819
median_C_TP 1819
mean_Flux_TOC 15456
median_FNC_NMin 9161
mean_FNFlux_PO4 1988
mean_Flux_DOC 16361
mean_Flux_NMin 9161
mean_FNFlux_NO3 2709
mean_FNFlux_NMin 9161
median_FNC_TOC 15469
mean_Flux_TN 18880
median_C_TOC 15456
mean_Flux_NO3 2691
mean_Flux_PO4 1988
mean_Flux_TP 1819
median_FNC_DOC 16361
median_C_TN 18880
mean_FNFlux_TOC 15469
median_Q 13
median_C_DOC 16361
median_C_PO4 1988
median_FNC_TN 18880
OBJECTID 0
avg_temp 0
precip 0
pet 0
dtype: int64
[18]:
print(cat.shape)
(29484, 113)
monthly TN
[19]:
dyn, cat = dataset.fetch_monthly(features="TN", max_nan_tol=0)
print(dyn.shape)
(6300, 9)
[20]:
dyn.head()
[20]:
median_C_TN | mean_Flux_TN | mean_FNFlux_TN | median_Q | median_FNC_TN | OBJECTID | avg_temp | precip | pet | |
---|---|---|---|---|---|---|---|---|---|
1993-01-01 | 7.973254 | 4854.350816 | 3785.002788 | 6.70 | 8.143008 | 663 | 3.807984 | 121.793169 | 11.415899 |
1993-02-01 | 7.955991 | 3698.383160 | 3531.640525 | 5.29 | 8.062551 | 663 | 8.473467 | 116.131558 | 28.869268 |
1993-03-01 | 8.138089 | 2249.559645 | 3076.825302 | 3.17 | 7.878655 | 663 | 1.430167 | 35.333157 | 9.847851 |
1993-04-01 | 7.665461 | 2272.942794 | 2298.055504 | 3.28 | 7.780824 | 663 | 4.333394 | 180.090165 | 8.050768 |
1993-05-01 | 7.843202 | 1551.660935 | 1607.637873 | 2.28 | 7.807650 | 663 | 0.830066 | 30.062856 | 13.271998 |
[21]:
dyn.tail()
[21]:
median_C_TN | mean_Flux_TN | mean_FNFlux_TN | median_Q | median_FNC_TN | OBJECTID | avg_temp | precip | pet | |
---|---|---|---|---|---|---|---|---|---|
2013-08-01 | 3.035308 | 6315.751380 | 7280.583175 | 21.62 | 3.065682 | 1019 | 11.212706 | 148.730218 | 90.947478 |
2013-09-01 | 3.288561 | 12811.035546 | 8107.321967 | 38.94 | 3.194456 | 1019 | 16.430328 | 74.409189 | 130.848008 |
2013-10-01 | 3.444130 | 30742.345242 | 12261.944444 | 84.51 | 3.452440 | 1019 | 20.729773 | 43.103508 | 154.377919 |
2013-11-01 | 3.732225 | 45608.256567 | 22491.901904 | 136.87 | 3.742268 | 1019 | 18.462523 | 54.366963 | 122.983270 |
2013-12-01 | 4.176698 | 30066.266276 | 37932.025054 | 61.38 | 3.963254 | 1019 | 14.342434 | 96.153852 | 70.408549 |
[22]:
print(dyn.isna().sum())
median_C_TN 0
mean_Flux_TN 0
mean_FNFlux_TN 0
median_Q 0
median_FNC_TN 0
OBJECTID 0
avg_temp 0
precip 0
pet 0
dtype: int64
[23]:
dyn['OBJECTID'].unique()
[23]:
array([ 663, 673, 678, 686, 687, 688, 690, 728, 730, 734, 744,
745, 746, 750, 754, 782, 783, 785, 786, 985, 986, 991,
1016, 1017, 1019], dtype=int64)
25
[25]:
print(cat.shape)
(6300, 113)
[26]:
df = pd.concat([grp['median_C_TN'] for idx,grp in dyn.groupby('OBJECTID')], axis=1)
df.columns = dyn['OBJECTID'].unique()
_ = ridge(df, figsize=(10, 10), color="GnBu", title="median_C_TN")
monthly TP
[27]:
dyn, cat = dataset.fetch_monthly(features="TP", max_nan_tol=0)
print(dyn.shape)
(21420, 9)
[28]:
dyn['OBJECTID'].unique()
[28]:
array([ 334, 335, 336, 337, 340, 341, 342, 345, 347, 350, 352,
355, 358, 359, 360, 362, 363, 364, 365, 368, 370, 374,
376, 380, 381, 391, 663, 673, 678, 686, 687, 688, 690,
692, 696, 701, 705, 711, 716, 718, 722, 723, 728, 730,
734, 735, 736, 737, 739, 740, 742, 744, 745, 746, 750,
754, 769, 773, 776, 778, 782, 783, 785, 786, 874, 885,
899, 985, 986, 991, 1016, 1017, 1019, 1082, 1113, 1186, 1271,
1275, 1570, 1571, 1573, 1677, 1678, 1680, 1683], dtype=int64)
85
[30]:
dyn.head()
[30]:
mean_Flux_TP | mean_FNFlux_TP | median_C_TP | median_FNC_TP | median_Q | OBJECTID | avg_temp | precip | pet | |
---|---|---|---|---|---|---|---|---|---|
1993-01-01 | 396.554076 | 320.694272 | 0.074464 | 0.062897 | 53.20 | 334 | 1.593407 | 80.646032 | 14.305556 |
1993-02-01 | 119.576067 | 273.244117 | 0.046901 | 0.054044 | 25.15 | 334 | 5.956323 | 105.239667 | 32.282010 |
1993-03-01 | 247.820224 | 436.670327 | 0.045366 | 0.060441 | 33.50 | 334 | -1.057203 | 38.820471 | 10.558618 |
1993-04-01 | 245.905231 | 447.722065 | 0.054396 | 0.064261 | 52.10 | 334 | 1.455312 | 126.850398 | 12.198744 |
1993-05-01 | 213.419345 | 686.400592 | 0.051911 | 0.069937 | 47.70 | 334 | -2.350673 | 26.185855 | 13.697404 |
[31]:
dyn.tail()
[31]:
mean_Flux_TP | mean_FNFlux_TP | median_C_TP | median_FNC_TP | median_Q | OBJECTID | avg_temp | precip | pet | |
---|---|---|---|---|---|---|---|---|---|
2013-08-01 | 68.195512 | 83.103094 | 0.171760 | 0.174829 | 4.33 | 1683 | 10.229236 | 147.140054 | 87.170391 |
2013-09-01 | 83.325140 | 90.632953 | 0.137811 | 0.150251 | 6.11 | 1683 | 14.173561 | 99.925348 | 117.320530 |
2013-10-01 | 56.504054 | 92.819774 | 0.116680 | 0.124092 | 5.25 | 1683 | 18.329772 | 42.926368 | 146.748663 |
2013-11-01 | 111.452591 | 133.620403 | 0.095941 | 0.105120 | 11.85 | 1683 | 16.463807 | 49.014143 | 116.420306 |
2013-12-01 | 77.797496 | 149.404412 | 0.081465 | 0.090173 | 9.61 | 1683 | 11.541938 | 78.161588 | 58.856660 |
[32]:
print(dyn.isna().sum())
mean_Flux_TP 0
mean_FNFlux_TP 0
median_C_TP 0
median_FNC_TP 0
median_Q 0
OBJECTID 0
avg_temp 0
precip 0
pet 0
dtype: int64
[33]:
print(cat.shape)
(21420, 113)
monthly TOC
[34]:
dyn, cat = dataset.fetch_monthly(features="TOC", max_nan_tol=0)
print(dyn.shape)
(5796, 9)
[35]:
dyn['OBJECTID'].unique()
[35]:
array([ 352, 355, 358, 359, 370, 374, 796, 797, 985, 991, 1016,
1019, 1473, 1482, 1570, 1571, 1573, 1677, 1678, 1680, 1683, 1688,
1690], dtype=int64)
[36]:
23
[37]:
df = pd.concat([grp['median_C_TOC'] for idx,grp in dyn.groupby('OBJECTID')], axis=1)
df.columns = dyn['OBJECTID'].unique()
_ = ridge(df, figsize=(10, 10), color="GnBu", title="median_C_TOC")
[38]:
dyn.head()
[38]:
median_FNC_TOC | mean_Flux_TOC | mean_FNFlux_TOC | median_C_TOC | median_Q | OBJECTID | avg_temp | precip | pet | |
---|---|---|---|---|---|---|---|---|---|
1993-01-01 | 4.205242 | 849.784539 | 1768.257729 | 3.582176 | 2.49 | 352 | 1.874273 | 45.230158 | 13.236276 |
1993-02-01 | 4.473708 | 553.020333 | 2043.161550 | 3.076343 | 1.94 | 352 | 8.082227 | 70.054926 | 34.415847 |
1993-03-01 | 4.774344 | 802.060684 | 2314.616099 | 3.596575 | 2.30 | 352 | 0.408168 | 25.903097 | 10.312989 |
1993-04-01 | 4.386409 | 545.261698 | 1247.634582 | 3.678589 | 1.71 | 352 | 2.823309 | 119.545130 | 11.483053 |
1993-05-01 | 4.580450 | 411.800177 | 959.253376 | 3.825010 | 1.24 | 352 | -2.816553 | 20.795173 | 9.571560 |
[39]:
dyn.tail()
[39]:
median_FNC_TOC | mean_Flux_TOC | mean_FNFlux_TOC | median_C_TOC | median_Q | OBJECTID | avg_temp | precip | pet | |
---|---|---|---|---|---|---|---|---|---|
2013-08-01 | 3.352540 | 439.230872 | 577.937111 | 3.252773 | 1.560 | 1690 | 10.204866 | 236.756149 | 85.705574 |
2013-09-01 | 3.508932 | 571.347176 | 840.649765 | 3.384114 | 1.845 | 1690 | 14.447392 | 54.103719 | 119.770443 |
2013-10-01 | 3.564205 | 640.434776 | 859.957624 | 3.476477 | 2.160 | 1690 | 18.357580 | 52.262876 | 144.086108 |
2013-11-01 | 3.800113 | 1367.471649 | 1534.585561 | 3.774498 | 3.695 | 1690 | 16.688504 | 39.944479 | 114.876081 |
2013-12-01 | 3.686678 | 1278.301345 | 2066.823176 | 3.517463 | 3.950 | 1690 | 11.801535 | 101.380651 | 59.491577 |
[40]:
print(dyn.isna().sum())
median_FNC_TOC 0
mean_Flux_TOC 0
mean_FNFlux_TOC 0
median_C_TOC 0
median_Q 0
OBJECTID 0
avg_temp 0
precip 0
pet 0
dtype: int64
[41]:
print(cat.shape)
(5796, 113)
monthly DOC
[42]:
dyn, cat = dataset.fetch_monthly(features="DOC", max_nan_tol=0)
print(dyn.shape)
(6804, 9)
[43]:
dyn['OBJECTID'].unique()
[43]:
array([ 663, 678, 690, 696, 701, 705, 711, 718, 722, 723, 728,
734, 744, 745, 746, 750, 754, 776, 782, 783, 785, 786,
1016, 1017, 1019, 1082, 1271], dtype=int64)
27
[45]:
dyn.head()
[45]:
median_FNC_DOC | mean_Flux_DOC | median_C_DOC | median_Q | mean_FNFlux_DOC | OBJECTID | avg_temp | precip | pet | |
---|---|---|---|---|---|---|---|---|---|
1993-01-01 | 7.570729 | 5290.522451 | 8.168849 | 6.70 | 3880.725444 | 663 | 3.807984 | 121.793169 | 11.415899 |
1993-02-01 | 7.409652 | 3562.398652 | 7.576350 | 5.29 | 3470.252080 | 663 | 8.473467 | 116.131558 | 28.869268 |
1993-03-01 | 7.138509 | 1840.949964 | 6.624830 | 3.17 | 3071.222351 | 663 | 1.430167 | 35.333157 | 9.847851 |
1993-04-01 | 6.763954 | 2064.170897 | 6.769762 | 3.28 | 2187.148516 | 663 | 4.333394 | 180.090165 | 8.050768 |
1993-05-01 | 6.355921 | 1291.672996 | 6.305964 | 2.28 | 1380.674341 | 663 | 0.830066 | 30.062856 | 13.271998 |
[46]:
dyn.tail()
[46]:
median_FNC_DOC | mean_Flux_DOC | median_C_DOC | median_Q | mean_FNFlux_DOC | OBJECTID | avg_temp | precip | pet | |
---|---|---|---|---|---|---|---|---|---|
2013-08-01 | 4.061765 | 3158.773568 | 4.031046 | 7.952220 | 4779.806779 | 1271 | 10.167641 | 163.066607 | 87.326095 |
2013-09-01 | 4.048447 | 2794.135418 | 3.949723 | 7.275374 | 5032.150952 | 1271 | 13.999010 | 186.180472 | 116.162897 |
2013-10-01 | 3.936584 | 2445.844458 | 3.826177 | 5.771638 | 4011.788115 | 1271 | 17.790892 | 33.659651 | 144.276601 |
2013-11-01 | 4.007867 | 2187.576948 | 3.717356 | 6.491699 | 6221.995187 | 1271 | 16.185475 | 72.816926 | 115.142294 |
2013-12-01 | 3.923625 | 3324.340569 | 3.687420 | 9.265053 | 6167.013957 | 1271 | 11.191066 | 75.990604 | 55.997746 |
[47]:
print(dyn.isna().sum())
median_FNC_DOC 0
mean_Flux_DOC 0
median_C_DOC 0
median_Q 0
mean_FNFlux_DOC 0
OBJECTID 0
avg_temp 0
precip 0
pet 0
dtype: int64
[48]:
print(cat.shape)
(6804, 113)