Source code for upsetplot.plotting

import typing
import warnings

import matplotlib
import numpy as np
import pandas as pd
from matplotlib import colors, patches
from matplotlib import pyplot as plt

from . import util
from .reformat import _get_subset_mask, query

# prevents ImportError on matplotlib versions >3.5.2
try:
    from matplotlib.tight_layout import get_renderer

    RENDERER_IMPORTED = True
except ImportError:
    RENDERER_IMPORTED = False


def _process_data(
    df,
    *,
    sort_by,
    sort_categories_by,
    subset_size,
    sum_over,
    min_subset_size=None,
    max_subset_size=None,
    max_subset_rank=None,
    min_degree=None,
    max_degree=None,
    reverse=False,
    include_empty_subsets=False,
):
    results = query(
        df,
        sort_by=sort_by,
        sort_categories_by=sort_categories_by,
        subset_size=subset_size,
        sum_over=sum_over,
        min_subset_size=min_subset_size,
        max_subset_size=max_subset_size,
        max_subset_rank=max_subset_rank,
        min_degree=min_degree,
        max_degree=max_degree,
        include_empty_subsets=include_empty_subsets,
    )

    df = results.data
    agg = results.subset_sizes

    # add '_bin' to df indicating index in agg
    # XXX: ugly!
    def _pack_binary(X):
        X = pd.DataFrame(X)
        # use objects if arbitrary precision integers are needed
        dtype = np.object_ if X.shape[1] > 62 else np.uint64
        out = pd.Series(0, index=X.index, dtype=dtype)
        for _, col in X.items():
            out *= 2
            out += col
        return out

    df_packed = _pack_binary(df.index.to_frame())
    data_packed = _pack_binary(agg.index.to_frame())
    df["_bin"] = pd.Series(df_packed).map(
        pd.Series(
            np.arange(len(data_packed))[:: -1 if reverse else 1], index=data_packed
        )
    )
    if reverse:
        agg = agg[::-1]

    return results.total, df, agg, results.category_totals


def _multiply_alpha(c, mult):
    r, g, b, a = colors.to_rgba(c)
    a *= mult
    return colors.to_hex((r, g, b, a), keep_alpha=True)


class _Transposed:
    """Wrap an object in order to transpose some plotting operations

    Attributes of obj will be mapped.
    Keyword arguments when calling obj will be mapped.

    The mapping is not recursive: callable attributes need to be _Transposed
    again.
    """

    def __init__(self, obj):
        self.__obj = obj

    def __getattr__(self, key):
        return getattr(self.__obj, self._NAME_TRANSPOSE.get(key, key))

    def __call__(self, *args, **kwargs):
        return self.__obj(
            *args, **{self._NAME_TRANSPOSE.get(k, k): v for k, v in kwargs.items()}
        )

    _NAME_TRANSPOSE = {
        "align_xlabels": "align_ylabels",
        "align_ylabels": "align_xlabels",
        "bar": "barh",
        "barh": "bar",
        "bottom": "left",
        "get_figheight": "get_figwidth",
        "get_figwidth": "get_figheight",
        "get_xlim": "get_ylim",
        "get_ylim": "get_xlim",
        "height": "width",
        "hlines": "vlines",
        "hspace": "wspace",
        "left": "bottom",
        "right": "top",
        "set_autoscalex_on": "set_autoscaley_on",
        "set_autoscaley_on": "set_autoscalex_on",
        "set_figheight": "set_figwidth",
        "set_figwidth": "set_figheight",
        "set_xlabel": "set_ylabel",
        "set_xlim": "set_ylim",
        "set_ylabel": "set_xlabel",
        "set_ylim": "set_xlim",
        "sharex": "sharey",
        "sharey": "sharex",
        "top": "right",
        "vlines": "hlines",
        "width": "height",
        "wspace": "hspace",
        "xaxis": "yaxis",
        "yaxis": "xaxis",
    }


def _transpose(obj):
    if isinstance(obj, str):
        return _Transposed._NAME_TRANSPOSE.get(obj, obj)
    return _Transposed(obj)


def _identity(obj):
    return obj


[docs]class UpSet:
    """Manage the data and drawing for a basic UpSet plot

    Primary public method is :meth:`plot`.

    Parameters
    ----------
    data : pandas.Series or pandas.DataFrame
        Elements associated with categories (a DataFrame), or the size of each
        subset of categories (a Series).
        Should have MultiIndex where each level is binary,
        corresponding to category membership.
        If a DataFrame, `sum_over` must be a string or False.
    orientation : {'horizontal' (default), 'vertical'}
        If horizontal, intersections are listed from left to right.
    sort_by : {'cardinality', 'degree', '-cardinality', '-degree',
               'input', '-input'}
        If 'cardinality', subset are listed from largest to smallest.
        If 'degree', they are listed in order of the number of categories
        intersected. If 'input', the order they appear in the data input is
        used.
        Prefix with '-' to reverse the ordering.

        Note this affects ``subset_sizes`` but not ``data``.
    sort_categories_by : {'cardinality', '-cardinality', 'input', '-input'}
        Whether to sort the categories by total cardinality, or leave them
        in the input data's provided order (order of index levels).
        Prefix with '-' to reverse the ordering.
    subset_size : {'auto', 'count', 'sum'}
        Configures how to calculate the size of a subset. Choices are:

        'auto' (default)
            If `data` is a DataFrame, count the number of rows in each group,
            unless `sum_over` is specified.
            If `data` is a Series with at most one row for each group, use
            the value of the Series. If `data` is a Series with more than one
            row per group, raise a ValueError.
        'count'
            Count the number of rows in each group.
        'sum'
            Sum the value of the `data` Series, or the DataFrame field
            specified by `sum_over`.
    sum_over : str or None
        If `subset_size='sum'` or `'auto'`, then the intersection size is the
        sum of the specified field in the `data` DataFrame. If a Series, only
        None is supported and its value is summed.
    min_subset_size : int or "number%", optional
        Minimum size of a subset to be shown in the plot. All subsets with
        a size smaller than this threshold will be omitted from plotting.
        This may be specified as a percentage
        using a string, like "50%".
        Size may be a sum of values, see `subset_size`.

        .. versionadded:: 0.5

        .. versionchanged:: 0.9
            Support percentages
    max_subset_size : int or "number%", optional
        Maximum size of a subset to be shown in the plot. All subsets with
        a size greater than this threshold will be omitted from plotting.
        This may be specified as a percentage
        using a string, like "50%".

        .. versionadded:: 0.5

        .. versionchanged:: 0.9
            Support percentages
    max_subset_rank : int, optional
        Limit to the top N ranked subsets in descending order of size.
        All tied subsets are included.

        .. versionadded:: 0.9
    min_degree : int, optional
        Minimum degree of a subset to be shown in the plot.

        .. versionadded:: 0.5
    max_degree : int, optional
        Maximum degree of a subset to be shown in the plot.

        .. versionadded:: 0.5
    facecolor : 'auto' or matplotlib color or float
        Color for bar charts and active dots. Defaults to black if
        axes.facecolor is a light color, otherwise white.

        .. versionchanged:: 0.6
            Before 0.6, the default was 'black'
    other_dots_color : matplotlib color or float
        Color for shading of inactive dots, or opacity (between 0 and 1)
        applied to facecolor.

        .. versionadded:: 0.6
    shading_color : matplotlib color or float
        Color for shading of odd rows in matrix and totals, or opacity (between
        0 and 1) applied to facecolor.

        .. versionadded:: 0.6
    with_lines : bool
        Whether to show lines joining dots in the matrix, to mark multiple
        categories being intersected.
    element_size : float or None
        Side length in pt. If None, size is estimated to fit figure
    intersection_plot_elements : int
        The intersections plot should be large enough to fit this many matrix
        elements. Set to 0 to disable intersection size bars.

        .. versionchanged:: 0.4
            Setting to 0 is handled.
    totals_plot_elements : int
        The totals plot should be large enough to fit this many matrix
        elements. Set to 0 to disable the totals plot.

        .. versionchanged:: 0.9
            Setting to 0 is handled.
    show_counts : bool or str, default=False
        Whether to label the intersection size bars with the cardinality
        of the intersection. When a string, this formats the number.
        For example, '{:d}' is equivalent to True.
        Note that, for legacy reasons, if the string does not contain '{',
        it will be interpreted as a C-style format string, such as '%d'.
    show_percentages : bool or str, default=False
        Whether to label the intersection size bars with the percentage
        of the intersection relative to the total dataset.
        When a string, this formats the number representing a fraction of
        samples.
        For example, '{:.1%}' is the default, formatting .123 as 12.3%.
        This may be applied with or without show_counts.

        .. versionadded:: 0.4
    include_empty_subsets : bool (default=False)
        If True, all possible category combinations will be shown as subsets,
        even when some are not present in data.
    """

    _default_figsize = (10, 6)
    DPI = 100  # standard matplotlib value

    def __init__(
        self,
        data,
        orientation="horizontal",
        sort_by="degree",
        sort_categories_by="cardinality",
        subset_size="auto",
        sum_over=None,
        min_subset_size=None,
        max_subset_size=None,
        max_subset_rank=None,
        min_degree=None,
        max_degree=None,
        facecolor="auto",
        other_dots_color=0.18,
        shading_color=0.05,
        with_lines=True,
        element_size=32,
        intersection_plot_elements=6,
        totals_plot_elements=2,
        show_counts="",
        show_percentages=False,
        include_empty_subsets=False,
    ):
        self._horizontal = orientation == "horizontal"
        self._reorient = _identity if self._horizontal else _transpose
        if facecolor == "auto":
            bgcolor = matplotlib.rcParams.get("axes.facecolor", "white")
            r, g, b, a = colors.to_rgba(bgcolor)
            lightness = colors.rgb_to_hsv((r, g, b))[-1] * a
            facecolor = "black" if lightness >= 0.5 else "white"
        self._facecolor = facecolor
        self._shading_color = (
            _multiply_alpha(facecolor, shading_color)
            if isinstance(shading_color, float)
            else shading_color
        )
        self._other_dots_color = (
            _multiply_alpha(facecolor, other_dots_color)
            if isinstance(other_dots_color, float)
            else other_dots_color
        )
        self._with_lines = with_lines
        self._element_size = element_size
        self._totals_plot_elements = totals_plot_elements
        self._subset_plots = [
            {
                "type": "default",
                "id": "intersections",
                "elements": intersection_plot_elements,
            }
        ]
        if not intersection_plot_elements:
            self._subset_plots.pop()
        self._show_counts = show_counts
        self._show_percentages = show_percentages

        (self.total, self._df, self.intersections, self.totals) = _process_data(
            data,
            sort_by=sort_by,
            sort_categories_by=sort_categories_by,
            subset_size=subset_size,
            sum_over=sum_over,
            min_subset_size=min_subset_size,
            max_subset_size=max_subset_size,
            max_subset_rank=max_subset_rank,
            min_degree=min_degree,
            max_degree=max_degree,
            reverse=not self._horizontal,
            include_empty_subsets=include_empty_subsets,
        )
        self.category_styles = {}
        self.subset_styles = [
            {"facecolor": facecolor} for i in range(len(self.intersections))
        ]
        self.subset_legend = []  # pairs of (style, label)

    def _swapaxes(self, x, y):
        if self._horizontal:
            return x, y
        return y, x

[docs]    def style_subsets(
        self,
        present=None,
        absent=None,
        min_subset_size=None,
        max_subset_size=None,
        max_subset_rank=None,
        min_degree=None,
        max_degree=None,
        facecolor=None,
        edgecolor=None,
        hatch=None,
        linewidth=None,
        linestyle=None,
        label=None,
    ):
        """Updates the style of selected subsets' bars and matrix dots

        Parameters are either used to select subsets, or to style them with
        attributes of :class:`matplotlib.patches.Patch`, apart from label,
        which adds a legend entry.

        Parameters
        ----------
        present : str or list of str, optional
            Category or categories that must be present in subsets for styling.
        absent : str or list of str, optional
            Category or categories that must not be present in subsets for
            styling.
        min_subset_size : int or "number%", optional
            Minimum size of a subset to be styled.
            This may be specified as a percentage using a string, like "50%".

            .. versionchanged:: 0.9
                Support percentages
        max_subset_size : int or "number%", optional
            Maximum size of a subset to be styled.
            This may be specified as a percentage using a string, like "50%".

            .. versionchanged:: 0.9
                Support percentages
        max_subset_rank : int, optional
            Limit to the top N ranked subsets in descending order of size.
            All tied subsets are included.

            .. versionadded:: 0.9
        min_degree : int, optional
            Minimum degree of a subset to be styled.
        max_degree : int, optional
            Maximum degree of a subset to be styled.

        facecolor : str or matplotlib color, optional
            Override the default UpSet facecolor for selected subsets.
        edgecolor : str or matplotlib color, optional
            Set the edgecolor for bars, dots, and the line between dots.
        hatch : str, optional
            Set the hatch. This will apply to intersection size bars, but not
            to matrix dots.
        linewidth : int, optional
            Line width in points for edges.
        linestyle : str, optional
            Line style for edges.

        label : str, optional
            If provided, a legend will be added
        """
        style = {
            "facecolor": facecolor,
            "edgecolor": edgecolor,
            "hatch": hatch,
            "linewidth": linewidth,
            "linestyle": linestyle,
        }
        style = {k: v for k, v in style.items() if v is not None}
        mask = _get_subset_mask(
            self.intersections,
            present=present,
            absent=absent,
            min_subset_size=min_subset_size,
            max_subset_size=max_subset_size,
            max_subset_rank=max_subset_rank,
            min_degree=min_degree,
            max_degree=max_degree,
        )
        for idx in np.flatnonzero(mask):
            self.subset_styles[idx].update(style)

        if label is not None:
            if "facecolor" not in style:
                style["facecolor"] = self._facecolor
            for i, (other_style, other_label) in enumerate(self.subset_legend):
                if other_style == style:
                    if other_label != label:
                        self.subset_legend[i] = (style, other_label + "; " + label)
                    break
            else:
                self.subset_legend.append((style, label))

    def _plot_bars(self, ax, data, title, colors=None, use_labels=False):
        ax = self._reorient(ax)
        ax.set_autoscalex_on(False)
        data_df = pd.DataFrame(data)
        if self._horizontal:
            data_df = data_df.loc[:, ::-1]  # reverse: top row is top of stack

        # TODO: colors should be broadcastable to data_df shape
        if callable(colors):
            colors = colors(range(data_df.shape[1]))
        elif isinstance(colors, (str, type(None))):
            colors = [colors] * len(data_df)

        if self._horizontal:
            colors = reversed(colors)

        x = np.arange(len(data_df))
        cum_y = None
        all_rects = []
        for (name, y), color in zip(data_df.items(), colors):
            rects = ax.bar(
                x,
                y,
                0.5,
                cum_y,
                color=color,
                zorder=10,
                label=name if use_labels else None,
                align="center",
            )
            cum_y = y if cum_y is None else cum_y + y
            all_rects.extend(rects)

        self._label_sizes(ax, rects, "top" if self._horizontal else "right")

        ax.xaxis.set_visible(False)
        for x in ["top", "bottom", "right"]:
            ax.spines[self._reorient(x)].set_visible(False)

        tick_axis = ax.yaxis
        tick_axis.grid(True)
        ax.set_ylabel(title)
        return all_rects

    def _plot_stacked_bars(self, ax, by, sum_over, colors, title):
        df = self._df.set_index("_bin").set_index(by, append=True, drop=False)
        gb = df.groupby(level=list(range(df.index.nlevels)), sort=True)
        if sum_over is None and "_value" in df.columns:
            data = gb["_value"].sum()
        elif sum_over is None:
            data = gb.size()
        else:
            data = gb[sum_over].sum()
        data = data.unstack(by).fillna(0)
        if isinstance(colors, str):
            colors = matplotlib.cm.get_cmap(colors)
        elif isinstance(colors, typing.Mapping):
            colors = data.columns.map(colors).values
            if pd.isna(colors).any():
                raise KeyError(
                    "Some labels mapped by colors: %r"
                    % data.columns[pd.isna(colors)].tolist()
                )

        self._plot_bars(ax, data=data, colors=colors, title=title, use_labels=True)

        handles, labels = ax.get_legend_handles_labels()
        if self._horizontal:
            # Make legend order match visual stack order
            ax.legend(reversed(handles), reversed(labels))
        else:
            ax.legend()

[docs]    def add_stacked_bars(self, by, sum_over=None, colors=None, elements=3, title=None):
        """Add a stacked bar chart over subsets when :func:`plot` is called.

        Used to plot categorical variable distributions within each subset.

        .. versionadded:: 0.6

        Parameters
        ----------
        by : str
            Column name within the dataframe for color coding the stacked bars,
            containing discrete or categorical values.
        sum_over : str, optional
            Ordinarily the bars will chart the size of each group. sum_over
            may specify a column which will be summed to determine the size
            of each bar.
        colors : Mapping, list-like, str or callable, optional
            The facecolors to use for bars corresponding to each discrete
            label, specified as one of:

            Mapping
                Maps from label to matplotlib-compatible color specification.
            list-like
                A list of matplotlib colors to apply to labels in order.
            str
                The name of a matplotlib colormap name.
            callable
                When called with the number of labels, this should return a
                list-like of that many colors.  Matplotlib colormaps satisfy
                this callable API.
            None
                Uses the matplotlib default colormap.
        elements : int, default=3
            Size of the axes counted in number of matrix elements.
        title : str, optional
            The axis title labelling bar length.

        Returns
        -------
        None
        """
        # TODO: allow sort_by = {"lexical", "sum_squares", "rev_sum_squares",
        #                        list of labels}
        self._subset_plots.append(
            {
                "type": "stacked_bars",
                "by": by,
                "sum_over": sum_over,
                "colors": colors,
                "title": title,
                "id": "extra%d" % len(self._subset_plots),
                "elements": elements,
            }
        )

[docs]    def add_catplot(self, kind, value=None, elements=3, **kw):
        """Add a seaborn catplot over subsets when :func:`plot` is called.

        Parameters
        ----------
        kind : str
            One of {"point", "bar", "strip", "swarm", "box", "violin", "boxen"}
        value : str, optional
            Column name for the value to plot (i.e. y if
            orientation='horizontal'), required if `data` is a DataFrame.
        elements : int, default=3
            Size of the axes counted in number of matrix elements.
        **kw : dict
            Additional keywords to pass to :func:`seaborn.catplot`.

            Our implementation automatically determines 'ax', 'data', 'x', 'y'
            and 'orient', so these are prohibited keys in `kw`.

        Returns
        -------
        None
        """
        assert not set(kw.keys()) & {"ax", "data", "x", "y", "orient"}
        if value is None:
            if "_value" not in self._df.columns:
                raise ValueError(
                    "value cannot be set if data is a Series. " "Got %r" % value
                )
        else:
            if value not in self._df.columns:
                raise ValueError("value %r is not a column in data" % value)
        self._subset_plots.append(
            {
                "type": "catplot",
                "value": value,
                "kind": kind,
                "id": "extra%d" % len(self._subset_plots),
                "elements": elements,
                "kw": kw,
            }
        )

    def _check_value(self, value):
        if value is None and "_value" in self._df.columns:
            value = "_value"
        elif value is None:
            raise ValueError("value can only be None when data is a Series")
        return value

    def _plot_catplot(self, ax, value, kind, kw):
        df = self._df
        value = self._check_value(value)
        kw = kw.copy()
        if self._horizontal:
            kw["orient"] = "v"
            kw["x"] = "_bin"
            kw["y"] = value
        else:
            kw["orient"] = "h"
            kw["x"] = value
            kw["y"] = "_bin"
        import seaborn

        kw["ax"] = ax
        getattr(seaborn, kind + "plot")(data=df, **kw)

        ax = self._reorient(ax)
        if value == "_value":
            ax.set_ylabel("")

        ax.xaxis.set_visible(False)
        for x in ["top", "bottom", "right"]:
            ax.spines[self._reorient(x)].set_visible(False)

        tick_axis = ax.yaxis
        tick_axis.grid(True)

[docs]    def make_grid(self, fig=None):
        """Get a SubplotSpec for each Axes, accounting for label text width"""
        n_cats = len(self.totals)
        n_inters = len(self.intersections)

        if fig is None:
            fig = plt.gcf()

        # Determine text size to determine figure size / spacing
        text_kw = {"size": matplotlib.rcParams["xtick.labelsize"]}
        # adding "x" ensures a margin
        t = fig.text(
            0,
            0,
            "\n".join(str(label) + "x" for label in self.totals.index.values),
            **text_kw,
        )
        window_extent_args = {}
        if RENDERER_IMPORTED:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", DeprecationWarning)
                window_extent_args["renderer"] = get_renderer(fig)
        textw = t.get_window_extent(**window_extent_args).width
        t.remove()

        window_extent_args = {}
        if RENDERER_IMPORTED:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", DeprecationWarning)
                window_extent_args["renderer"] = get_renderer(fig)
        figw = self._reorient(fig.get_window_extent(**window_extent_args)).width

        sizes = np.asarray([p["elements"] for p in self._subset_plots])
        fig = self._reorient(fig)

        non_text_nelems = len(self.intersections) + self._totals_plot_elements
        if self._element_size is None:
            colw = (figw - textw) / non_text_nelems
        else:
            render_ratio = figw / fig.get_figwidth()
            colw = self._element_size / 72 * render_ratio
            figw = colw * (non_text_nelems + np.ceil(textw / colw) + 1)
            fig.set_figwidth(figw / render_ratio)
            fig.set_figheight((colw * (n_cats + sizes.sum())) / render_ratio)

        text_nelems = int(np.ceil(figw / colw - non_text_nelems))

        GS = self._reorient(matplotlib.gridspec.GridSpec)
        gridspec = GS(
            *self._swapaxes(
                n_cats + (sizes.sum() or 0),
                n_inters + text_nelems + self._totals_plot_elements,
            ),
            hspace=1,
        )
        if self._horizontal:
            out = {
                "matrix": gridspec[-n_cats:, -n_inters:],
                "shading": gridspec[-n_cats:, :],
                "totals": None
                if self._totals_plot_elements == 0
                else gridspec[-n_cats:, : self._totals_plot_elements],
                "gs": gridspec,
            }
            cumsizes = np.cumsum(sizes[::-1])
            for start, stop, plot in zip(
                np.hstack([[0], cumsizes]), cumsizes, self._subset_plots[::-1]
            ):
                out[plot["id"]] = gridspec[start:stop, -n_inters:]
        else:
            out = {
                "matrix": gridspec[-n_inters:, :n_cats],
                "shading": gridspec[:, :n_cats],
                "totals": None
                if self._totals_plot_elements == 0
                else gridspec[: self._totals_plot_elements, :n_cats],
                "gs": gridspec,
            }
            cumsizes = np.cumsum(sizes)
            for start, stop, plot in zip(
                np.hstack([[0], cumsizes]), cumsizes, self._subset_plots
            ):
                out[plot["id"]] = gridspec[-n_inters:, start + n_cats : stop + n_cats]
        return out

[docs]    def plot_matrix(self, ax):
        """Plot the matrix of intersection indicators onto ax"""
        ax = self._reorient(ax)
        data = self.intersections
        n_cats = data.index.nlevels

        inclusion = data.index.to_frame().values

        # Prepare styling
        styles = [
            [
                self.subset_styles[i]
                if inclusion[i, j]
                else {"facecolor": self._other_dots_color, "linewidth": 0}
                for j in range(n_cats)
            ]
            for i in range(len(data))
        ]
        styles = sum(styles, [])  # flatten nested list
        style_columns = {
            "facecolor": "facecolors",
            "edgecolor": "edgecolors",
            "linewidth": "linewidths",
            "linestyle": "linestyles",
            "hatch": "hatch",
        }
        styles = (
            pd.DataFrame(styles)
            .reindex(columns=style_columns.keys())
            .astype(
                {
                    "facecolor": "O",
                    "edgecolor": "O",
                    "linewidth": float,
                    "linestyle": "O",
                    "hatch": "O",
                }
            )
        )
        styles["linewidth"].fillna(1, inplace=True)
        styles["facecolor"].fillna(self._facecolor, inplace=True)
        styles["edgecolor"].fillna(styles["facecolor"], inplace=True)
        styles["linestyle"].fillna("solid", inplace=True)
        del styles["hatch"]  # not supported in matrix (currently)

        x = np.repeat(np.arange(len(data)), n_cats)
        y = np.tile(np.arange(n_cats), len(data))

        # Plot dots
        if self._element_size is not None:  # noqa
            s = (self._element_size * 0.35) ** 2
        else:
            # TODO: make s relative to colw
            s = 200
        ax.scatter(
            *self._swapaxes(x, y),
            s=s,
            zorder=10,
            **styles.rename(columns=style_columns),
        )

        # Plot lines
        if self._with_lines:
            idx = np.flatnonzero(inclusion)
            line_data = (
                pd.Series(y[idx], index=x[idx])
                .groupby(level=0)
                .aggregate(["min", "max"])
            )
            colors = pd.Series(
                [
                    style.get("edgecolor", style.get("facecolor", self._facecolor))
                    for style in self.subset_styles
                ],
                name="color",
            )
            line_data = line_data.join(colors)
            ax.vlines(
                line_data.index.values,
                line_data["min"],
                line_data["max"],
                lw=2,
                colors=line_data["color"],
                zorder=5,
            )

        # Ticks and axes
        tick_axis = ax.yaxis
        tick_axis.set_ticks(np.arange(n_cats))
        tick_axis.set_ticklabels(
            data.index.names, rotation=0 if self._horizontal else -90
        )
        ax.xaxis.set_visible(False)
        ax.tick_params(axis="both", which="both", length=0)
        if not self._horizontal:
            ax.yaxis.set_ticks_position("top")
        ax.set_frame_on(False)
        ax.set_xlim(-0.5, x[-1] + 0.5, auto=False)
        ax.grid(False)

[docs]    def plot_intersections(self, ax):
        """Plot bars indicating intersection size"""
        rects = self._plot_bars(
            ax, self.intersections, title="Intersection size", colors=self._facecolor
        )
        for style, rect in zip(self.subset_styles, rects):
            style = style.copy()
            style.setdefault("edgecolor", style.get("facecolor", self._facecolor))
            for attr, val in style.items():
                getattr(rect, "set_" + attr)(val)

        if self.subset_legend:
            styles, labels = zip(*self.subset_legend)
            styles = [patches.Patch(**patch_style) for patch_style in styles]
            ax.legend(styles, labels)

    def _label_sizes(self, ax, rects, where):
        if not self._show_counts and not self._show_percentages:
            return
        if self._show_counts is True:
            count_fmt = "{:.0f}"
        else:
            count_fmt = self._show_counts
            if "{" not in count_fmt:
                count_fmt = util.to_new_pos_format(count_fmt)

        pct_fmt = "{:.1%}" if self._show_percentages is True else self._show_percentages

        if count_fmt and pct_fmt:
            if where == "top":
                fmt = f"{count_fmt}\n({pct_fmt})"
            else:
                fmt = f"{count_fmt} ({pct_fmt})"

            def make_args(val):
                return val, val / self.total
        elif count_fmt:
            fmt = count_fmt

            def make_args(val):
                return (val,)
        else:
            fmt = pct_fmt

            def make_args(val):
                return (val / self.total,)

        if where == "right":
            margin = 0.01 * abs(np.diff(ax.get_xlim()))
            for rect in rects:
                width = rect.get_width() + rect.get_x()
                ax.text(
                    width + margin,
                    rect.get_y() + rect.get_height() * 0.5,
                    fmt.format(*make_args(width)),
                    ha="left",
                    va="center",
                )
        elif where == "left":
            margin = 0.01 * abs(np.diff(ax.get_xlim()))
            for rect in rects:
                width = rect.get_width() + rect.get_x()
                ax.text(
                    width + margin,
                    rect.get_y() + rect.get_height() * 0.5,
                    fmt.format(*make_args(width)),
                    ha="right",
                    va="center",
                )
        elif where == "top":
            margin = 0.01 * abs(np.diff(ax.get_ylim()))
            for rect in rects:
                height = rect.get_height() + rect.get_y()
                ax.text(
                    rect.get_x() + rect.get_width() * 0.5,
                    height + margin,
                    fmt.format(*make_args(height)),
                    ha="center",
                    va="bottom",
                )
        else:
            raise NotImplementedError("unhandled where: %r" % where)

[docs]    def plot_totals(self, ax):
        """Plot bars indicating total set size"""
        orig_ax = ax
        ax = self._reorient(ax)
        rects = ax.barh(
            np.arange(len(self.totals.index.values)),
            self.totals,
            0.5,
            color=self._facecolor,
            align="center",
        )
        self._label_sizes(ax, rects, "left" if self._horizontal else "top")

        for category, rect in zip(self.totals.index.values, rects):
            style = {
                k[len("bar_") :]: v
                for k, v in self.category_styles.get(category, {}).items()
                if k.startswith("bar_")
            }
            style.setdefault("edgecolor", style.get("facecolor", self._facecolor))
            for attr, val in style.items():
                getattr(rect, "set_" + attr)(val)

        max_total = self.totals.max()
        if self._horizontal:
            orig_ax.set_xlim(max_total, 0)
        for x in ["top", "left", "right"]:
            ax.spines[self._reorient(x)].set_visible(False)
        ax.yaxis.set_visible(False)
        ax.xaxis.grid(True)
        ax.yaxis.grid(False)
        ax.patch.set_visible(False)

    def plot_shading(self, ax):
        # shade all rows, set every second row to zero visibility
        for i, category in enumerate(self.totals.index):
            default_shading = (
                self._shading_color if i % 2 == 0 else (0.0, 0.0, 0.0, 0.0)
            )
            shading_style = {
                k[len("shading_") :]: v
                for k, v in self.category_styles.get(category, {}).items()
                if k.startswith("shading_")
            }

            lw = shading_style.get(
                "linewidth", 1 if shading_style.get("edgecolor") else 0
            )
            lw_padding = lw / (self._default_figsize[0] * self.DPI)
            start_x = lw_padding
            end_x = 1 - lw_padding * 3

            rect = plt.Rectangle(
                self._swapaxes(start_x, i - 0.4),
                *self._swapaxes(end_x, 0.8),
                facecolor=shading_style.get("facecolor", default_shading),
                edgecolor=shading_style.get("edgecolor", None),
                ls=shading_style.get("linestyle", "-"),
                lw=lw,
                zorder=0,
            )

            ax.add_patch(rect)
        ax.set_frame_on(False)
        ax.tick_params(
            axis="both",
            which="both",
            left=False,
            right=False,
            bottom=False,
            top=False,
            labelbottom=False,
            labelleft=False,
        )
        ax.grid(False)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xticklabels([])
        ax.set_yticklabels([])

[docs]    def style_categories(
        self,
        categories,
        *,
        bar_facecolor=None,
        bar_hatch=None,
        bar_edgecolor=None,
        bar_linewidth=None,
        bar_linestyle=None,
        shading_facecolor=None,
        shading_edgecolor=None,
        shading_linewidth=None,
        shading_linestyle=None,
    ):
        """Updates the style of the categories.

        Select a category by name, and style either its total bar or its shading.

        .. versionadded:: 0.9

        Parameters
        ----------
        categories : str or list[str]
            Category names where the changed style applies.
        bar_facecolor : str or RGBA matplotlib color tuple, optional.
            Override the default facecolor in the totals plot.
        bar_hatch : str, optional
            Set a hatch for the totals plot.
        bar_edgecolor : str or matplotlib color, optional
            Set the edgecolor for total bars.
        bar_linewidth : int, optional
            Line width in points for total bar edges.
        bar_linestyle : str, optional
            Line style for edges.
        shading_facecolor : str or RGBA matplotlib color tuple, optional.
            Override the default alternating shading for specified categories.
        shading_edgecolor : str or matplotlib color, optional
            Set the edgecolor for bars, dots, and the line between dots.
        shading_linewidth : int, optional
            Line width in points for edges.
        shading_linestyle : str, optional
            Line style for edges.
        """
        if isinstance(categories, str):
            categories = [categories]
        style = {
            "bar_facecolor": bar_facecolor,
            "bar_hatch": bar_hatch,
            "bar_edgecolor": bar_edgecolor,
            "bar_linewidth": bar_linewidth,
            "bar_linestyle": bar_linestyle,
            "shading_facecolor": shading_facecolor,
            "shading_edgecolor": shading_edgecolor,
            "shading_linewidth": shading_linewidth,
            "shading_linestyle": shading_linestyle,
        }
        style = {k: v for k, v in style.items() if v is not None}
        for category_name in categories:
            self.category_styles.setdefault(category_name, {}).update(style)

[docs]    def plot(self, fig=None):
        """Draw all parts of the plot onto fig or a new figure

        Parameters
        ----------
        fig : matplotlib.figure.Figure, optional
            Defaults to a new figure.

        Returns
        -------
        subplots : dict of matplotlib.axes.Axes
            Keys are 'matrix', 'intersections', 'totals', 'shading'
        """
        if fig is None:
            fig = plt.figure(figsize=self._default_figsize)
        specs = self.make_grid(fig)
        shading_ax = fig.add_subplot(specs["shading"])
        self.plot_shading(shading_ax)
        matrix_ax = self._reorient(fig.add_subplot)(specs["matrix"], sharey=shading_ax)
        self.plot_matrix(matrix_ax)
        if specs["totals"] is None:
            totals_ax = None
        else:
            totals_ax = self._reorient(fig.add_subplot)(
                specs["totals"], sharey=matrix_ax
            )
            self.plot_totals(totals_ax)
        out = {"matrix": matrix_ax, "shading": shading_ax, "totals": totals_ax}

        for plot in self._subset_plots:
            ax = self._reorient(fig.add_subplot)(specs[plot["id"]], sharex=matrix_ax)
            if plot["type"] == "default":
                self.plot_intersections(ax)
            elif plot["type"] in self.PLOT_TYPES:
                kw = plot.copy()
                del kw["type"]
                del kw["elements"]
                del kw["id"]
                self.PLOT_TYPES[plot["type"]](self, ax, **kw)
            else:
                raise ValueError("Unknown subset plot type: %r" % plot["type"])
            out[plot["id"]] = ax

        self._reorient(fig).align_ylabels(
            [out[plot["id"]] for plot in self._subset_plots]
        )
        return out

    PLOT_TYPES = {
        "catplot": _plot_catplot,
        "stacked_bars": _plot_stacked_bars,
    }

    def _repr_html_(self):
        fig = plt.figure(figsize=self._default_figsize)
        self.plot(fig=fig)
        return fig._repr_html_()


[docs]def plot(data, fig=None, **kwargs):
    """Make an UpSet plot of data on fig

    Parameters
    ----------
    data : pandas.Series or pandas.DataFrame
        Values for each set to plot.
        Should have multi-index where each level is binary,
        corresponding to set membership.
        If a DataFrame, `sum_over` must be a string or False.
    fig : matplotlib.figure.Figure, optional
        Defaults to a new figure.
    kwargs
        Other arguments for :class:`UpSet`

    Returns
    -------
    subplots : dict of matplotlib.axes.Axes
        Keys are 'matrix', 'intersections', 'totals', 'shading'
    """
    return UpSet(data, **kwargs).plot(fig)