.. _sphx_glr_gallery_misc_rec_groupby_demo.py: ================ Rec Groupby Demo ================ .. rst-class:: sphx-glr-script-out Out:: loading /tmp/build_docs/lib/python3.6/site-packages/matplotlib/mpl-data/sample_data/msft.csv summary by years years rcnt rmean rmedian rsigma 2003 65 0.002 0.001 0.016 summary by months months rcnt rmean rmedian rsigma 6 8 -0.002 -0.002 0.012 7 22 0.002 0.003 0.018 8 21 0.000 0.000 0.010 9 14 0.009 0.013 0.019 summary by year and month years months rcnt rmean rmedian rsigma 2003 6 8 -0.002 -0.002 0.012 2003 7 22 0.002 0.003 0.018 2003 8 21 0.000 0.000 0.010 2003 9 14 0.009 0.013 0.019 summary by volume volcode rcnt rmean rmedian rsigma 5 65 0.002 0.001 0.016 | .. code-block:: python from __future__ import print_function import numpy as np import matplotlib.mlab as mlab import matplotlib.cbook as cbook datafile = cbook.get_sample_data('msft.csv', asfileobj=False) print('loading', datafile) r = mlab.csv2rec(datafile) r.sort() def daily_return(prices): 'an array of daily returns from price array' g = np.zeros_like(prices) g[1:] = (prices[1:] - prices[:-1])/prices[:-1] return g def volume_code(volume): 'code the continuous volume data categorically' ind = np.searchsorted([1e5, 1e6, 5e6, 10e6, 1e7], volume) return ind # a list of (dtype_name, summary_function, output_dtype_name). # rec_summarize will call on each function on the indicated recarray # attribute, and the result assigned to output name in the return # record array. summaryfuncs = ( ('date', lambda x: [thisdate.year for thisdate in x], 'years'), ('date', lambda x: [thisdate.month for thisdate in x], 'months'), ('date', lambda x: [thisdate.weekday() for thisdate in x], 'weekday'), ('adj_close', daily_return, 'dreturn'), ('volume', volume_code, 'volcode'), ) rsum = mlab.rec_summarize(r, summaryfuncs) # stats is a list of (dtype_name, function, output_dtype_name). # rec_groupby will summarize the attribute identified by the # dtype_name over the groups in the groupby list, and assign the # result to the output_dtype_name stats = ( ('dreturn', len, 'rcnt'), ('dreturn', np.mean, 'rmean'), ('dreturn', np.median, 'rmedian'), ('dreturn', np.std, 'rsigma'), ) # you can summarize over a single variable, like years or months print('summary by years') ry = mlab.rec_groupby(rsum, ('years',), stats) print(mlab. rec2txt(ry)) print('summary by months') rm = mlab.rec_groupby(rsum, ('months',), stats) print(mlab.rec2txt(rm)) # or over multiple variables like years and months print('summary by year and month') rym = mlab.rec_groupby(rsum, ('years', 'months'), stats) print(mlab.rec2txt(rym)) print('summary by volume') rv = mlab.rec_groupby(rsum, ('volcode',), stats) print(mlab.rec2txt(rv)) **Total running time of the script:** ( 0 minutes 0.022 seconds) .. only :: html .. container:: sphx-glr-footer .. container:: sphx-glr-download :download:`Download Python source code: rec_groupby_demo.py ` .. container:: sphx-glr-download :download:`Download Jupyter notebook: rec_groupby_demo.ipynb ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_