Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/io/formats/printing.py : 8%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Printing tools.
3"""
5import sys
6from typing import (
7 Any,
8 Callable,
9 Iterable,
10 List,
11 Mapping,
12 Optional,
13 Sequence,
14 Tuple,
15 Union,
16)
18from pandas._config import get_option
20from pandas.core.dtypes.inference import is_sequence
22EscapeChars = Union[Mapping[str, str], Iterable[str]]
25def adjoin(space: int, *lists: List[str], **kwargs) -> str:
26 """
27 Glues together two sets of strings using the amount of space requested.
28 The idea is to prettify.
30 ----------
31 space : int
32 number of spaces for padding
33 lists : str
34 list of str which being joined
35 strlen : callable
36 function used to calculate the length of each str. Needed for unicode
37 handling.
38 justfunc : callable
39 function used to justify str. Needed for unicode handling.
40 """
41 strlen = kwargs.pop("strlen", len)
42 justfunc = kwargs.pop("justfunc", justify)
44 out_lines = []
45 newLists = []
46 lengths = [max(map(strlen, x)) + space for x in lists[:-1]]
47 # not the last one
48 lengths.append(max(map(len, lists[-1])))
49 maxLen = max(map(len, lists))
50 for i, lst in enumerate(lists):
51 nl = justfunc(lst, lengths[i], mode="left")
52 nl.extend([" " * lengths[i]] * (maxLen - len(lst)))
53 newLists.append(nl)
54 toJoin = zip(*newLists)
55 for lines in toJoin:
56 out_lines.append("".join(lines))
57 return "\n".join(out_lines)
60def justify(texts: Iterable[str], max_len: int, mode: str = "right") -> List[str]:
61 """
62 Perform ljust, center, rjust against string or list-like
63 """
64 if mode == "left":
65 return [x.ljust(max_len) for x in texts]
66 elif mode == "center":
67 return [x.center(max_len) for x in texts]
68 else:
69 return [x.rjust(max_len) for x in texts]
72# Unicode consolidation
73# ---------------------
74#
75# pprinting utility functions for generating Unicode text or
76# bytes(3.x)/str(2.x) representations of objects.
77# Try to use these as much as possible rather then rolling your own.
78#
79# When to use
80# -----------
81#
82# 1) If you're writing code internal to pandas (no I/O directly involved),
83# use pprint_thing().
84#
85# It will always return unicode text which can handled by other
86# parts of the package without breakage.
87#
88# 2) if you need to write something out to file, use
89# pprint_thing_encoded(encoding).
90#
91# If no encoding is specified, it defaults to utf-8. Since encoding pure
92# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're
93# working with straight ascii.
96def _pprint_seq(
97 seq: Sequence, _nest_lvl: int = 0, max_seq_items: Optional[int] = None, **kwds
98) -> str:
99 """
100 internal. pprinter for iterables. you should probably use pprint_thing()
101 rather then calling this directly.
103 bounds length of printed sequence, depending on options
104 """
105 if isinstance(seq, set):
106 fmt = "{{{body}}}"
107 else:
108 fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})"
110 if max_seq_items is False:
111 nitems = len(seq)
112 else:
113 nitems = max_seq_items or get_option("max_seq_items") or len(seq)
115 s = iter(seq)
116 # handle sets, no slicing
117 r = [
118 pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
119 for i in range(min(nitems, len(seq)))
120 ]
121 body = ", ".join(r)
123 if nitems < len(seq):
124 body += ", ..."
125 elif isinstance(seq, tuple) and len(seq) == 1:
126 body += ","
128 return fmt.format(body=body)
131def _pprint_dict(
132 seq: Mapping, _nest_lvl: int = 0, max_seq_items: Optional[int] = None, **kwds
133) -> str:
134 """
135 internal. pprinter for iterables. you should probably use pprint_thing()
136 rather then calling this directly.
137 """
138 fmt = "{{{things}}}"
139 pairs = []
141 pfmt = "{key}: {val}"
143 if max_seq_items is False:
144 nitems = len(seq)
145 else:
146 nitems = max_seq_items or get_option("max_seq_items") or len(seq)
148 for k, v in list(seq.items())[:nitems]:
149 pairs.append(
150 pfmt.format(
151 key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
152 val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
153 )
154 )
156 if nitems < len(seq):
157 return fmt.format(things=", ".join(pairs) + ", ...")
158 else:
159 return fmt.format(things=", ".join(pairs))
162def pprint_thing(
163 thing: Any,
164 _nest_lvl: int = 0,
165 escape_chars: Optional[EscapeChars] = None,
166 default_escapes: bool = False,
167 quote_strings: bool = False,
168 max_seq_items: Optional[int] = None,
169) -> str:
170 """
171 This function is the sanctioned way of converting objects
172 to a string representation and properly handles nested sequences.
174 Parameters
175 ----------
176 thing : anything to be formatted
177 _nest_lvl : internal use only. pprint_thing() is mutually-recursive
178 with pprint_sequence, this argument is used to keep track of the
179 current nesting level, and limit it.
180 escape_chars : list or dict, optional
181 Characters to escape. If a dict is passed the values are the
182 replacements
183 default_escapes : bool, default False
184 Whether the input escape characters replaces or adds to the defaults
185 max_seq_items : int or None, default None
186 Pass through to other pretty printers to limit sequence printing
188 Returns
189 -------
190 str
191 """
193 def as_escaped_string(
194 thing: Any, escape_chars: Optional[EscapeChars] = escape_chars
195 ) -> str:
196 translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"}
197 if isinstance(escape_chars, dict):
198 if default_escapes:
199 translate.update(escape_chars)
200 else:
201 translate = escape_chars
202 escape_chars = list(escape_chars.keys())
203 else:
204 escape_chars = escape_chars or tuple()
206 result = str(thing)
207 for c in escape_chars:
208 result = result.replace(c, translate[c])
209 return result
211 if hasattr(thing, "__next__"):
212 return str(thing)
213 elif isinstance(thing, dict) and _nest_lvl < get_option(
214 "display.pprint_nest_depth"
215 ):
216 result = _pprint_dict(
217 thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
218 )
219 elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
220 result = _pprint_seq(
221 thing,
222 _nest_lvl,
223 escape_chars=escape_chars,
224 quote_strings=quote_strings,
225 max_seq_items=max_seq_items,
226 )
227 elif isinstance(thing, str) and quote_strings:
228 result = "'{thing}'".format(thing=as_escaped_string(thing))
229 else:
230 result = as_escaped_string(thing)
232 return result
235def pprint_thing_encoded(
236 object, encoding: str = "utf-8", errors: str = "replace"
237) -> bytes:
238 value = pprint_thing(object) # get unicode representation of object
239 return value.encode(encoding, errors)
242def _enable_data_resource_formatter(enable: bool) -> None:
243 if "IPython" not in sys.modules:
244 # definitely not in IPython
245 return
246 from IPython import get_ipython
248 ip = get_ipython()
249 if ip is None:
250 # still not in IPython
251 return
253 formatters = ip.display_formatter.formatters
254 mimetype = "application/vnd.dataresource+json"
256 if enable:
257 if mimetype not in formatters:
258 # define tableschema formatter
259 from IPython.core.formatters import BaseFormatter
261 class TableSchemaFormatter(BaseFormatter):
262 print_method = "_repr_data_resource_"
263 _return_type = (dict,)
265 # register it:
266 formatters[mimetype] = TableSchemaFormatter()
267 # enable it if it's been disabled:
268 formatters[mimetype].enabled = True
269 else:
270 # unregister tableschema mime-type
271 if mimetype in formatters:
272 formatters[mimetype].enabled = False
275default_pprint = lambda x, max_seq_items=None: pprint_thing(
276 x, escape_chars=("\t", "\r", "\n"), quote_strings=True, max_seq_items=max_seq_items
277)
280def format_object_summary(
281 obj,
282 formatter: Callable,
283 is_justify: bool = True,
284 name: Optional[str] = None,
285 indent_for_name: bool = True,
286 line_break_each_value: bool = False,
287) -> str:
288 """
289 Return the formatted obj as a unicode string
291 Parameters
292 ----------
293 obj : object
294 must be iterable and support __getitem__
295 formatter : callable
296 string formatter for an element
297 is_justify : boolean
298 should justify the display
299 name : name, optional
300 defaults to the class name of the obj
301 indent_for_name : bool, default True
302 Whether subsequent lines should be be indented to
303 align with the name.
304 line_break_each_value : bool, default False
305 If True, inserts a line break for each value of ``obj``.
306 If False, only break lines when the a line of values gets wider
307 than the display width.
309 .. versionadded:: 0.25.0
311 Returns
312 -------
313 summary string
314 """
315 from pandas.io.formats.console import get_console_size
316 from pandas.io.formats.format import _get_adjustment
318 display_width, _ = get_console_size()
319 if display_width is None:
320 display_width = get_option("display.width") or 80
321 if name is None:
322 name = type(obj).__name__
324 if indent_for_name:
325 name_len = len(name)
326 space1 = f'\n{(" " * (name_len + 1))}'
327 space2 = f'\n{(" " * (name_len + 2))}'
328 else:
329 space1 = "\n"
330 space2 = "\n " # space for the opening '['
332 n = len(obj)
333 if line_break_each_value:
334 # If we want to vertically align on each value of obj, we need to
335 # separate values by a line break and indent the values
336 sep = ",\n " + " " * len(name)
337 else:
338 sep = ","
339 max_seq_items = get_option("display.max_seq_items") or n
341 # are we a truncated display
342 is_truncated = n > max_seq_items
344 # adj can optionally handle unicode eastern asian width
345 adj = _get_adjustment()
347 def _extend_line(
348 s: str, line: str, value: str, display_width: int, next_line_prefix: str
349 ) -> Tuple[str, str]:
351 if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width:
352 s += line.rstrip()
353 line = next_line_prefix
354 line += value
355 return s, line
357 def best_len(values: List[str]) -> int:
358 if values:
359 return max(adj.len(x) for x in values)
360 else:
361 return 0
363 close = ", "
365 if n == 0:
366 summary = f"[]{close}"
367 elif n == 1 and not line_break_each_value:
368 first = formatter(obj[0])
369 summary = f"[{first}]{close}"
370 elif n == 2 and not line_break_each_value:
371 first = formatter(obj[0])
372 last = formatter(obj[-1])
373 summary = f"[{first}, {last}]{close}"
374 else:
376 if n > max_seq_items:
377 n = min(max_seq_items // 2, 10)
378 head = [formatter(x) for x in obj[:n]]
379 tail = [formatter(x) for x in obj[-n:]]
380 else:
381 head = []
382 tail = [formatter(x) for x in obj]
384 # adjust all values to max length if needed
385 if is_justify:
386 if line_break_each_value:
387 # Justify each string in the values of head and tail, so the
388 # strings will right align when head and tail are stacked
389 # vertically.
390 head, tail = _justify(head, tail)
391 elif is_truncated or not (
392 len(", ".join(head)) < display_width
393 and len(", ".join(tail)) < display_width
394 ):
395 # Each string in head and tail should align with each other
396 max_length = max(best_len(head), best_len(tail))
397 head = [x.rjust(max_length) for x in head]
398 tail = [x.rjust(max_length) for x in tail]
399 # If we are not truncated and we are only a single
400 # line, then don't justify
402 if line_break_each_value:
403 # Now head and tail are of type List[Tuple[str]]. Below we
404 # convert them into List[str], so there will be one string per
405 # value. Also truncate items horizontally if wider than
406 # max_space
407 max_space = display_width - len(space2)
408 value = tail[0]
409 for max_items in reversed(range(1, len(value) + 1)):
410 pprinted_seq = _pprint_seq(value, max_seq_items=max_items)
411 if len(pprinted_seq) < max_space:
412 break
413 head = [_pprint_seq(x, max_seq_items=max_items) for x in head]
414 tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail]
416 summary = ""
417 line = space2
419 for max_items in range(len(head)):
420 word = head[max_items] + sep + " "
421 summary, line = _extend_line(summary, line, word, display_width, space2)
423 if is_truncated:
424 # remove trailing space of last line
425 summary += line.rstrip() + space2 + "..."
426 line = space2
428 for max_items in range(len(tail) - 1):
429 word = tail[max_items] + sep + " "
430 summary, line = _extend_line(summary, line, word, display_width, space2)
432 # last value: no sep added + 1 space of width used for trailing ','
433 summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2)
434 summary += line
436 # right now close is either '' or ', '
437 # Now we want to include the ']', but not the maybe space.
438 close = "]" + close.rstrip(" ")
439 summary += close
441 if len(summary) > (display_width) or line_break_each_value:
442 summary += space1
443 else: # one row
444 summary += " "
446 # remove initial space
447 summary = "[" + summary[len(space2) :]
449 return summary
452def _justify(
453 head: List[Sequence[str]], tail: List[Sequence[str]]
454) -> Tuple[List[Tuple[str, ...]], List[Tuple[str, ...]]]:
455 """
456 Justify items in head and tail, so they are right-aligned when stacked.
458 Parameters
459 ----------
460 head : list-like of list-likes of strings
461 tail : list-like of list-likes of strings
463 Returns
464 -------
465 tuple of list of tuples of strings
466 Same as head and tail, but items are right aligned when stacked
467 vertically.
469 Examples
470 --------
471 >>> _justify([['a', 'b']], [['abc', 'abcd']])
472 ([(' a', ' b')], [('abc', 'abcd')])
473 """
474 combined = head + tail
476 # For each position for the sequences in ``combined``,
477 # find the length of the largest string.
478 max_length = [0] * len(combined[0])
479 for inner_seq in combined:
480 length = [len(item) for item in inner_seq]
481 max_length = [max(x, y) for x, y in zip(max_length, length)]
483 # justify each item in each list-like in head and tail using max_length
484 head = [
485 tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head
486 ]
487 tail = [
488 tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail
489 ]
490 # https://github.com/python/mypy/issues/4975
491 # error: Incompatible return value type (got "Tuple[List[Sequence[str]],
492 # List[Sequence[str]]]", expected "Tuple[List[Tuple[str, ...]],
493 # List[Tuple[str, ...]]]")
494 return head, tail # type: ignore
497def format_object_attrs(
498 obj: Sequence, include_dtype: bool = True
499) -> List[Tuple[str, Union[str, int]]]:
500 """
501 Return a list of tuples of the (attr, formatted_value)
502 for common attrs, including dtype, name, length
504 Parameters
505 ----------
506 obj : object
507 must be iterable
508 include_dtype : bool
509 If False, dtype won't be in the returned list
511 Returns
512 -------
513 list of 2-tuple
515 """
516 attrs: List[Tuple[str, Union[str, int]]] = []
517 if hasattr(obj, "dtype") and include_dtype:
518 # error: "Sequence[Any]" has no attribute "dtype"
519 attrs.append(("dtype", f"'{obj.dtype}'")) # type: ignore
520 if getattr(obj, "name", None) is not None:
521 # error: "Sequence[Any]" has no attribute "name"
522 attrs.append(("name", default_pprint(obj.name))) # type: ignore
523 # error: "Sequence[Any]" has no attribute "names"
524 elif getattr(obj, "names", None) is not None and any(obj.names): # type: ignore
525 # error: "Sequence[Any]" has no attribute "names"
526 attrs.append(("names", default_pprint(obj.names))) # type: ignore
527 max_seq_items = get_option("display.max_seq_items") or len(obj)
528 if len(obj) > max_seq_items:
529 attrs.append(("length", len(obj)))
530 return attrs