Source code for calplot.calplot

"""
Calendar heatmaps from Pandas time series data.

Plot Pandas time series data sampled by day in a heatmap per calendar year.
"""

import calendar
import datetime
from dateutil.relativedelta import relativedelta

import numpy as np
import pandas as pd

from matplotlib.colors import ColorConverter, ListedColormap
from matplotlib.patches import Polygon
import matplotlib.pyplot as plt

[docs]def yearplot(data, year=None, how='sum', vmin=None, vmax=None, cmap='viridis', fillcolor='whitesmoke', linewidth=1, linecolor=None, edgecolor='gray', daylabels=calendar.day_abbr[:], dayticks=True, dropzero=None, textformat=None, textfiller='', textcolor='black', monthlabels=calendar.month_abbr[1:], monthlabeloffset=15, monthticks=True, ax=None, **kwargs): """ Plot one year from a timeseries as a calendar heatmap. Parameters ---------- data : Series Data for the plot. Must be indexed by a DatetimeIndex. year : integer Only data indexed by this year will be plotted. If `None`, the first year for which there is data will be plotted. how : string Method for resampling data by day. If `None`, assume data is already sampled by day and don't resample. Otherwise, this is passed to Pandas `Series.resample`. vmin, vmax : floats Values to anchor the colormap. If `None`, min and max are used after resampling data by day. cmap : matplotlib colormap name or object The mapping from data values to color space. fillcolor : matplotlib color Color to use for days without data. linewidth : float Width of the lines that will divide each day. linecolor : color Color of the lines that will divide each day. If `None`, the axes background color is used, or 'white' if it is transparent. daylabels : list Strings to use as labels for days, must be of length 7. dayticks : list or int or bool If `True`, label all days. If `False`, don't label days. If a list, only label days with these indices. If an integer, label every n day. dropzero : bool If `True`, don't fill a color for days with a zero value. monthlabels : list Strings to use as labels for months, must be of length 12. monthlabeloffset : integer Day offset for labels for months to adjust horizontal alignment. monthticks : list or int or bool If `True`, label all months. If `False`, don't label months. If a list, only label months with these indices. If an integer, label every n month. edgecolor : color Color of the lines that will divide months. textformat : string Text format string for grid cell text textfiller : string Fallback text for grid cell text for cells with no data textcolor : color Color of the grid cell text ax : matplotlib Axes Axes in which to draw the plot, otherwise use the currently-active Axes. kwargs : other keyword arguments All other keyword arguments are passed to matplotlib `ax.pcolormesh`. Returns ------- ax : matplotlib Axes Axes object with the calendar heatmap. """ if year is None: year = data.index.sort_values()[0].year if how is None: # Assume already sampled by day. by_day = data else: # Sample by day. by_day = data.resample('D').agg(how) # Default to dropping zero values for a series with over 50% of rows being zero. if not (dropzero is False) and (by_day[by_day == 0].count() > 0.5 * by_day.count()): dropzero = True if dropzero: by_day = by_day.replace({0: np.nan}).dropna() # Min and max per day. if vmin is None: vmin = by_day.min() if vmax is None: vmax = by_day.max() if ax is None: ax = plt.gca() if linecolor is None: # Unfortunately, linecolor cannot be transparent, as it is drawn on # top of the heatmap cells. Therefore it is only possible to mimic # transparent lines by setting them to the axes background color. This # of course won't work when the axes itself has a transparent # background so in that case we default to white which will usually be # the figure or canvas background color. linecolor = ax.get_facecolor() if ColorConverter().to_rgba(linecolor)[-1] == 0: linecolor = 'white' # Filter on year. try: # could be empty due to `dropzero` by_day = by_day[str(year)] except KeyError: pass # Add missing days. by_day = by_day.reindex( pd.date_range(start=str(year), end=str(year + 1), freq='D', tz=by_day.index.tzinfo)[:-1]) # Create data frame we can pivot later. by_day = pd.DataFrame({'data': by_day, 'fill': 1, 'day': by_day.index.dayofweek, 'week': by_day.index.isocalendar().week}) # There may be some days assigned to previous year's last week or # next year's first week. We create new week numbers for them so # the ordering stays intact and week/day pairs unique. by_day.loc[(by_day.index.month == 1) & (by_day.week > 50), 'week'] = 0 by_day.loc[(by_day.index.month == 12) & (by_day.week < 10), 'week'] \ = by_day.week.max() + 1 # Pivot data on day and week and mask NaN days. plot_data = by_day.pivot(index='day', columns='week', values='data').values[::-1] plot_data = np.ma.masked_where(np.isnan(plot_data), plot_data) # Do the same for all days of the year, not just those we have data for. fill_data = by_day.pivot(index='day', columns='week', values='fill').values[::-1] fill_data = np.ma.masked_where(np.isnan(fill_data), fill_data) # Draw heatmap for all days of the year with fill color. ax.pcolormesh(fill_data, vmin=0, vmax=1, cmap=ListedColormap([fillcolor])) # Draw heatmap. kwargs['linewidth'] = linewidth kwargs['edgecolors'] = linecolor ax.pcolormesh(plot_data, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs) # Limit heatmap to our data. ax.set(xlim=(0, plot_data.shape[1]), ylim=(0, plot_data.shape[0])) # Square cells. ax.set_aspect('equal') # Remove spines and ticks. for side in ('top', 'right', 'left', 'bottom'): ax.spines[side].set_visible(False) for axis in (ax.xaxis, ax.yaxis): axis.set_tick_params(which='both', length=0) # Get indices for monthlabels. if monthticks is True: monthticks = range(len(monthlabels)) elif monthticks is False: monthticks = [] # Get indices for daylabels. if dayticks is True: dayticks = range(len(daylabels)) elif dayticks is False: dayticks = [] ax.set_xlabel('') ax.set_xticks([by_day.loc[pd.Timestamp( datetime.date(year, i + 1, monthlabeloffset))].week for i in monthticks]) ax.set_xticklabels([monthlabels[i] for i in monthticks]) ax.set_ylabel('') ax.yaxis.set_ticks_position('right') ax.set_yticks([6 - i + 0.5 for i in dayticks]) ax.set_yticklabels([daylabels[i] for i in dayticks], rotation='horizontal', va='center') # Text in mesh grid if format is specified. if textformat is not None: for y in range(plot_data.shape[0]): for x in range(plot_data.shape[1]): content = '' masked = plot_data[y, x] if masked is np.ma.masked: if fill_data[y, x] == 1: content = textfiller else: content = textformat.format(masked) ax.text(x + 0.5, y + 0.5, content, color=textcolor, ha='center', va='center') # Month borders code credited to https://github.com/rougier/calendar-heatmap xticks = [] start = datetime.datetime(year, 1, 1).weekday() for month in range(1, 13): first = datetime.datetime(year, month, 1) last = first + relativedelta(months=1, days=-1) y0 = 7 - first.weekday() y1 = 7 - last.weekday() x0 = (int(first.strftime('%j'))+start-1)//7 x1 = (int(last.strftime('%j'))+start-1)//7 P = [(x0, y0), (x0+1, y0), (x0+1, 7), (x1+1, 7), (x1+1, y1-1), (x1, y1-1), (x1, 0), (x0, 0) ] xticks.append(x0 + (x1-x0+1)/2) poly = Polygon(P, edgecolor=edgecolor, facecolor='None', linewidth=linewidth, zorder=20, clip_on=False) ax.add_artist(poly) return ax
[docs]def calplot(data, how='sum', yearlabels=True, yearascending=True, yearlabel_kws=None, subplot_kws=None, gridspec_kws=None, figsize=None, fig_kws=None, colorbar=None, suptitle=None, suptitle_kws=None, tight_layout=True, **kwargs): """ Plot a timeseries as a calendar heatmap. Parameters ---------- data : Series Data for the plot. Must be indexed by a DatetimeIndex. how : string Method for resampling data by day. If `None`, assume data is already sampled by day and don't resample. Otherwise, this is passed to Pandas `Series.resample`. figsize : (float, float) Size of figure for the plot. suptitle : string Title for the plot. yearlabels : bool Whether or not to draw the year label for each subplot. yearascending : bool Sort the calendar in ascending or descending order. yearlabel_kws : dict Keyword arguments passed to the matplotlib `set_ylabel` call which is used to draw the year for each subplot. subplot_kws : dict Keyword arguments passed to the matplotlib `subplots` call. gridspec_kws : dict Keyword arguments passed to the matplotlib `GridSpec` constructor used to create the grid the subplots are placed on. fig_kws : dict Keyword arguments passed to the matplotlib `subplots` call. suptitle_kws : dict Keyword arguments passed to the matplotlib `suptitle` call. kwargs : other keyword arguments All other keyword arguments are passed to `yearplot`. Returns ------- fig, axes : matplotlib Figure and Axes Tuple where `fig` is the matplotlib Figure object `axes` is an array of matplotlib Axes objects with the calendar heatmaps, one per year. """ if yearlabel_kws is None: yearlabel_kws = dict() if subplot_kws is None: subplot_kws = dict() if gridspec_kws is None: gridspec_kws = dict() if fig_kws is None: fig_kws = dict() if suptitle_kws is None: suptitle_kws = dict() years = np.unique(data.index.year) if not yearascending: years = years[::-1] if colorbar is None: colorbar = data.nunique() > 1 if figsize is None: figsize = (10+(colorbar*2.5), 1.7*len(years)) fig, axes = plt.subplots(nrows=len(years), ncols=1, squeeze=False, figsize=figsize, subplot_kw=subplot_kws, gridspec_kw=gridspec_kws, **fig_kws) axes = axes.T[0] # We explicitely resample by day only once. This is an optimization. by_day = data if how is not None: by_day = by_day.resample('D').agg(how) ylabel_kws = dict( fontsize=30, color='gray', fontname='Helvetica', fontweight='bold', ha='center') ylabel_kws.update(yearlabel_kws) max_weeks = 0 for year, ax in zip(years, axes): yearplot(by_day, year=year, how=None, ax=ax, **kwargs) max_weeks = max(max_weeks, ax.get_xlim()[1]) if yearlabels: ax.set_ylabel(str(year), **ylabel_kws) # In a leap year it might happen that we have 54 weeks (e.g., 2012). # Here we make sure the width is consistent over all years. for ax in axes: ax.set_xlim(0, max_weeks) stitle_kws = dict() if tight_layout: plt.tight_layout() stitle_kws.update({'y': 1}) if colorbar: if tight_layout: stitle_kws.update({'x': 0.425, 'y': 1.03}) if len(years) == 1: fig.colorbar(axes[0].get_children()[1], ax=axes.ravel().tolist(), orientation='vertical') else: fig.subplots_adjust(right=0.8) cax = fig.add_axes([0.85, 0.025, 0.02, 0.95]) fig.colorbar(axes[0].get_children()[1], cax=cax, orientation='vertical') stitle_kws.update(suptitle_kws) plt.suptitle(suptitle, **stitle_kws) return fig, axes