Source code for matplotlib.category
"""
Plotting of string "category" data: ``plot(['d', 'f', 'a'], [1, 2, 3])`` will
plot three points with x-axis values of 'd', 'f', 'a'.
See :doc:`/gallery/lines_bars_and_markers/categorical_variables` for an
example.
The module uses Matplotlib's `matplotlib.units` mechanism to convert from
strings to integers and provides a tick locator, a tick formatter, and the
`.UnitData` class that creates and stores the string-to-integer mapping.
"""
from collections import OrderedDict
import dateutil.parser
import itertools
import logging
import numpy as np
from matplotlib import cbook, ticker, units
_log = logging.getLogger(__name__)
[docs]class StrCategoryConverter(units.ConversionInterface):
[docs] @staticmethod
def convert(value, unit, axis):
"""
Convert strings in *value* to floats using mapping information stored
in the *unit* object.
Parameters
----------
value : str or iterable
Value or list of values to be converted.
unit : `.UnitData`
An object mapping strings to integers.
axis : `~matplotlib.axis.Axis`
The axis on which the converted value is plotted.
.. note:: *axis* is unused.
Returns
-------
float or ndarray[float]
"""
if unit is None:
raise ValueError(
'Missing category information for StrCategoryConverter; '
'this might be caused by unintendedly mixing categorical and '
'numeric data')
# dtype = object preserves numerical pass throughs
values = np.atleast_1d(np.array(value, dtype=object))
# pass through sequence of non binary numbers
if all(units.ConversionInterface.is_numlike(v)
and not isinstance(v, (str, bytes))
for v in values):
return np.asarray(values, dtype=float)
# force an update so it also does type checking
unit.update(values)
return np.vectorize(unit._mapping.__getitem__, otypes=[float])(values)
[docs] @staticmethod
def axisinfo(unit, axis):
"""
Set the default axis ticks and labels.
Parameters
----------
unit : `.UnitData`
object string unit information for value
axis : `~matplotlib.axis.Axis`
axis for which information is being set
Returns
-------
`~matplotlib.units.AxisInfo`
Information to support default tick labeling
.. note: axis is not used
"""
# locator and formatter take mapping dict because
# args need to be pass by reference for updates
majloc = StrCategoryLocator(unit._mapping)
majfmt = StrCategoryFormatter(unit._mapping)
return units.AxisInfo(majloc=majloc, majfmt=majfmt)
[docs] @staticmethod
def default_units(data, axis):
"""
Set and update the `~matplotlib.axis.Axis` units.
Parameters
----------
data : str or iterable of str
axis : `~matplotlib.axis.Axis`
axis on which the data is plotted
Returns
-------
`.UnitData`
object storing string to integer mapping
"""
# the conversion call stack is default_units -> axis_info -> convert
if axis.units is None:
axis.set_units(UnitData(data))
else:
axis.units.update(data)
return axis.units
[docs]class StrCategoryLocator(ticker.Locator):
"""Tick at every integer mapping of the string data."""
def __init__(self, units_mapping):
"""
Parameters
-----------
units_mapping : dict
Mapping of category names (str) to indices (int).
"""
self._units = units_mapping
def __call__(self):
# docstring inherited
return list(self._units.values())
[docs] def tick_values(self, vmin, vmax):
# docstring inherited
return self()
[docs]class UnitData:
def __init__(self, data=None):
"""
Create mapping between unique categorical values and integer ids.
Parameters
----------
data : iterable
sequence of string values
"""
self._mapping = OrderedDict()
self._counter = itertools.count()
if data is not None:
self.update(data)
@staticmethod
def _str_is_convertible(val):
"""
Helper method to check whether a string can be parsed as float or date.
"""
try:
float(val)
except ValueError:
try:
dateutil.parser.parse(val)
except (ValueError, TypeError):
# TypeError if dateutil >= 2.8.1 else ValueError
return False
return True
[docs] def update(self, data):
"""
Map new values to integer identifiers.
Parameters
----------
data : iterable of str or bytes
Raises
------
TypeError
If elements in *data* are neither str nor bytes.
"""
data = np.atleast_1d(np.array(data, dtype=object))
# check if convertible to number:
convertible = True
for val in OrderedDict.fromkeys(data):
# OrderedDict just iterates over unique values in data.
cbook._check_isinstance((str, bytes), value=val)
if convertible:
# this will only be called so long as convertible is True.
convertible = self._str_is_convertible(val)
if val not in self._mapping:
self._mapping[val] = next(self._counter)
if convertible:
_log.info('Using categorical units to plot a list of strings '
'that are all parsable as floats or dates. If these '
'strings should be plotted as numbers, cast to the '
'appropriate data type before plotting.')
# Register the converter with Matplotlib's unit framework
units.registry[str] = StrCategoryConverter()
units.registry[np.str_] = StrCategoryConverter()
units.registry[bytes] = StrCategoryConverter()
units.registry[np.bytes_] = StrCategoryConverter()