"""
Calendar heatmaps from Pandas time series data.
Plot Pandas time series data sampled by day in a heatmap per calendar year.
"""
import calendar
import datetime
from dateutil.relativedelta import relativedelta
import numpy as np
import pandas as pd
from matplotlib.colors import ColorConverter, ListedColormap
from matplotlib.patches import Polygon
import matplotlib.pyplot as plt
[docs]def yearplot(data, year=None, how='sum',
vmin=None, vmax=None,
cmap='viridis', fillcolor='whitesmoke',
linewidth=1, linecolor=None, edgecolor='gray',
daylabels=calendar.day_abbr[:], dayticks=True,
dropzero=None,
textformat=None, textfiller='', textcolor='black',
monthlabels=calendar.month_abbr[1:], monthlabeloffset=15,
monthticks=True,
ax=None, **kwargs):
"""
Plot one year from a timeseries as a calendar heatmap.
Parameters
----------
data : Series
Data for the plot. Must be indexed by a DatetimeIndex.
year : integer
Only data indexed by this year will be plotted. If `None`, the first
year for which there is data will be plotted.
how : string
Method for resampling data by day. If `None`, assume data is already
sampled by day and don't resample. Otherwise, this is passed to Pandas
`Series.resample`.
vmin, vmax : floats
Values to anchor the colormap. If `None`, min and max are used after
resampling data by day.
cmap : matplotlib colormap name or object
The mapping from data values to color space.
fillcolor : matplotlib color
Color to use for days without data.
linewidth : float
Width of the lines that will divide each day.
linecolor : color
Color of the lines that will divide each day. If `None`, the axes
background color is used, or 'white' if it is transparent.
daylabels : list
Strings to use as labels for days, must be of length 7.
dayticks : list or int or bool
If `True`, label all days. If `False`, don't label days. If a list,
only label days with these indices. If an integer, label every n day.
dropzero : bool
If `True`, don't fill a color for days with a zero value.
monthlabels : list
Strings to use as labels for months, must be of length 12.
monthlabeloffset : integer
Day offset for labels for months to adjust horizontal alignment.
monthticks : list or int or bool
If `True`, label all months. If `False`, don't label months. If a
list, only label months with these indices. If an integer, label every
n month.
edgecolor : color
Color of the lines that will divide months.
textformat : string
Text format string for grid cell text
textfiller : string
Fallback text for grid cell text for cells with no data
textcolor : color
Color of the grid cell text
ax : matplotlib Axes
Axes in which to draw the plot, otherwise use the currently-active
Axes.
kwargs : other keyword arguments
All other keyword arguments are passed to matplotlib `ax.pcolormesh`.
Returns
-------
ax : matplotlib Axes
Axes object with the calendar heatmap.
"""
if year is None:
year = data.index.sort_values()[0].year
if how is None:
# Assume already sampled by day.
by_day = data
else:
# Sample by day.
by_day = data.resample('D').agg(how)
# Default to dropping zero values for a series with over 50% of rows being zero.
if not (dropzero is False) and (by_day[by_day == 0].count() > 0.5 * by_day.count()):
dropzero = True
if dropzero:
by_day = by_day.replace({0: np.nan}).dropna()
# Min and max per day.
if vmin is None:
vmin = by_day.min()
if vmax is None:
vmax = by_day.max()
if ax is None:
ax = plt.gca()
if linecolor is None:
# Unfortunately, linecolor cannot be transparent, as it is drawn on
# top of the heatmap cells. Therefore it is only possible to mimic
# transparent lines by setting them to the axes background color. This
# of course won't work when the axes itself has a transparent
# background so in that case we default to white which will usually be
# the figure or canvas background color.
linecolor = ax.get_facecolor()
if ColorConverter().to_rgba(linecolor)[-1] == 0:
linecolor = 'white'
# Filter on year.
try:
# could be empty due to `dropzero`
by_day = by_day[str(year)]
except KeyError:
pass
# Add missing days.
by_day = by_day.reindex(
pd.date_range(start=str(year), end=str(year + 1),
freq='D', tz=by_day.index.tzinfo)[:-1])
# Create data frame we can pivot later.
by_day = pd.DataFrame({'data': by_day,
'fill': 1,
'day': by_day.index.dayofweek,
'week': by_day.index.isocalendar().week})
# There may be some days assigned to previous year's last week or
# next year's first week. We create new week numbers for them so
# the ordering stays intact and week/day pairs unique.
by_day.loc[(by_day.index.month == 1) & (by_day.week > 50), 'week'] = 0
by_day.loc[(by_day.index.month == 12) & (by_day.week < 10), 'week'] \
= by_day.week.max() + 1
# Pivot data on day and week and mask NaN days.
plot_data = by_day.pivot(index='day', columns='week', values='data').values[::-1]
plot_data = np.ma.masked_where(np.isnan(plot_data), plot_data)
# Do the same for all days of the year, not just those we have data for.
fill_data = by_day.pivot(index='day', columns='week', values='fill').values[::-1]
fill_data = np.ma.masked_where(np.isnan(fill_data), fill_data)
# Draw heatmap for all days of the year with fill color.
ax.pcolormesh(fill_data, vmin=0, vmax=1, cmap=ListedColormap([fillcolor]))
# Draw heatmap.
kwargs['linewidth'] = linewidth
kwargs['edgecolors'] = linecolor
ax.pcolormesh(plot_data, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs)
# Limit heatmap to our data.
ax.set(xlim=(0, plot_data.shape[1]), ylim=(0, plot_data.shape[0]))
# Square cells.
ax.set_aspect('equal')
# Remove spines and ticks.
for side in ('top', 'right', 'left', 'bottom'):
ax.spines[side].set_visible(False)
for axis in (ax.xaxis, ax.yaxis):
axis.set_tick_params(which='both', length=0)
# Get indices for monthlabels.
if monthticks is True:
monthticks = range(len(monthlabels))
elif monthticks is False:
monthticks = []
# Get indices for daylabels.
if dayticks is True:
dayticks = range(len(daylabels))
elif dayticks is False:
dayticks = []
ax.set_xlabel('')
ax.set_xticks([by_day.loc[pd.Timestamp(
datetime.date(year, i + 1, monthlabeloffset))].week
for i in monthticks])
ax.set_xticklabels([monthlabels[i] for i in monthticks])
ax.set_ylabel('')
ax.yaxis.set_ticks_position('right')
ax.set_yticks([6 - i + 0.5 for i in dayticks])
ax.set_yticklabels([daylabels[i] for i in dayticks], rotation='horizontal',
va='center')
# Text in mesh grid if format is specified.
if textformat is not None:
for y in range(plot_data.shape[0]):
for x in range(plot_data.shape[1]):
content = ''
masked = plot_data[y, x]
if masked is np.ma.masked:
if fill_data[y, x] == 1:
content = textfiller
else:
content = textformat.format(masked)
ax.text(x + 0.5, y + 0.5, content, color=textcolor,
ha='center', va='center')
# Month borders code credited to https://github.com/rougier/calendar-heatmap
xticks = []
start = datetime.datetime(year, 1, 1).weekday()
for month in range(1, 13):
first = datetime.datetime(year, month, 1)
last = first + relativedelta(months=1, days=-1)
y0 = 7 - first.weekday()
y1 = 7 - last.weekday()
x0 = (int(first.strftime('%j'))+start-1)//7
x1 = (int(last.strftime('%j'))+start-1)//7
P = [(x0, y0),
(x0+1, y0),
(x0+1, 7),
(x1+1, 7),
(x1+1, y1-1),
(x1, y1-1),
(x1, 0),
(x0, 0) ]
xticks.append(x0 + (x1-x0+1)/2)
poly = Polygon(P, edgecolor=edgecolor, facecolor='None',
linewidth=linewidth, zorder=20, clip_on=False)
ax.add_artist(poly)
return ax
[docs]def calplot(data, how='sum',
yearlabels=True, yearascending=True,
yearlabel_kws=None, subplot_kws=None, gridspec_kws=None,
figsize=None, fig_kws=None, colorbar=None,
suptitle=None, suptitle_kws=None,
tight_layout=True, **kwargs):
"""
Plot a timeseries as a calendar heatmap.
Parameters
----------
data : Series
Data for the plot. Must be indexed by a DatetimeIndex.
how : string
Method for resampling data by day. If `None`, assume data is already
sampled by day and don't resample. Otherwise, this is passed to Pandas
`Series.resample`.
figsize : (float, float)
Size of figure for the plot.
suptitle : string
Title for the plot.
yearlabels : bool
Whether or not to draw the year label for each subplot.
yearascending : bool
Sort the calendar in ascending or descending order.
yearlabel_kws : dict
Keyword arguments passed to the matplotlib `set_ylabel` call which is
used to draw the year for each subplot.
subplot_kws : dict
Keyword arguments passed to the matplotlib `subplots` call.
gridspec_kws : dict
Keyword arguments passed to the matplotlib `GridSpec` constructor used
to create the grid the subplots are placed on.
fig_kws : dict
Keyword arguments passed to the matplotlib `subplots` call.
suptitle_kws : dict
Keyword arguments passed to the matplotlib `suptitle` call.
kwargs : other keyword arguments
All other keyword arguments are passed to `yearplot`.
Returns
-------
fig, axes : matplotlib Figure and Axes
Tuple where `fig` is the matplotlib Figure object `axes` is an array
of matplotlib Axes objects with the calendar heatmaps, one per year.
"""
if yearlabel_kws is None:
yearlabel_kws = dict()
if subplot_kws is None:
subplot_kws = dict()
if gridspec_kws is None:
gridspec_kws = dict()
if fig_kws is None:
fig_kws = dict()
if suptitle_kws is None:
suptitle_kws = dict()
years = np.unique(data.index.year)
if not yearascending:
years = years[::-1]
if colorbar is None:
colorbar = data.nunique() > 1
if figsize is None:
figsize = (10+(colorbar*2.5), 1.7*len(years))
fig, axes = plt.subplots(nrows=len(years), ncols=1, squeeze=False,
figsize=figsize,
subplot_kw=subplot_kws,
gridspec_kw=gridspec_kws, **fig_kws)
axes = axes.T[0]
# We explicitely resample by day only once. This is an optimization.
by_day = data
if how is not None:
by_day = by_day.resample('D').agg(how)
ylabel_kws = dict(
fontsize=30,
color='gray',
fontname='Helvetica',
fontweight='bold',
ha='center')
ylabel_kws.update(yearlabel_kws)
max_weeks = 0
for year, ax in zip(years, axes):
yearplot(by_day, year=year, how=None, ax=ax, **kwargs)
max_weeks = max(max_weeks, ax.get_xlim()[1])
if yearlabels:
ax.set_ylabel(str(year), **ylabel_kws)
# In a leap year it might happen that we have 54 weeks (e.g., 2012).
# Here we make sure the width is consistent over all years.
for ax in axes:
ax.set_xlim(0, max_weeks)
stitle_kws = dict()
if tight_layout:
plt.tight_layout()
stitle_kws.update({'y': 1})
if colorbar:
if tight_layout:
stitle_kws.update({'x': 0.425, 'y': 1.03})
if len(years) == 1:
fig.colorbar(axes[0].get_children()[1], ax=axes.ravel().tolist(),
orientation='vertical')
else:
fig.subplots_adjust(right=0.8)
cax = fig.add_axes([0.85, 0.025, 0.02, 0.95])
fig.colorbar(axes[0].get_children()[1], cax=cax, orientation='vertical')
stitle_kws.update(suptitle_kws)
plt.suptitle(suptitle, **stitle_kws)
return fig, axes