Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/numpy/core/defchararray.py : 41%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2This module contains a set of functions for vectorized string
3operations and methods.
5.. note::
6 The `chararray` class exists for backwards compatibility with
7 Numarray, it is not recommended for new development. Starting from numpy
8 1.4, if one needs arrays of strings, it is recommended to use arrays of
9 `dtype` `object_`, `string_` or `unicode_`, and use the free functions
10 in the `numpy.char` module for fast vectorized string operations.
12Some methods will only be available if the corresponding string method is
13available in your version of Python.
15The preferred alias for `defchararray` is `numpy.char`.
17"""
18import functools
19import sys
20from .numerictypes import (
21 string_, unicode_, integer, int_, object_, bool_, character)
22from .numeric import ndarray, compare_chararrays
23from .numeric import array as narray
24from numpy.core.multiarray import _vec_string
25from numpy.core.overrides import set_module
26from numpy.core import overrides
27from numpy.compat import asbytes
28import numpy
30__all__ = [
31 'equal', 'not_equal', 'greater_equal', 'less_equal',
32 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
33 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
34 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
35 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
36 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
37 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
38 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
39 'array', 'asarray'
40 ]
43_globalvar = 0
45array_function_dispatch = functools.partial(
46 overrides.array_function_dispatch, module='numpy.char')
49def _use_unicode(*args):
50 """
51 Helper function for determining the output type of some string
52 operations.
54 For an operation on two ndarrays, if at least one is unicode, the
55 result should be unicode.
56 """
57 for x in args:
58 if (isinstance(x, str) or
59 issubclass(numpy.asarray(x).dtype.type, unicode_)):
60 return unicode_
61 return string_
63def _to_string_or_unicode_array(result):
64 """
65 Helper function to cast a result back into a string or unicode array
66 if an object array must be used as an intermediary.
67 """
68 return numpy.asarray(result.tolist())
70def _clean_args(*args):
71 """
72 Helper function for delegating arguments to Python string
73 functions.
75 Many of the Python string operations that have optional arguments
76 do not use 'None' to indicate a default value. In these cases,
77 we need to remove all None arguments, and those following them.
78 """
79 newargs = []
80 for chk in args:
81 if chk is None:
82 break
83 newargs.append(chk)
84 return newargs
86def _get_num_chars(a):
87 """
88 Helper function that returns the number of characters per field in
89 a string or unicode array. This is to abstract out the fact that
90 for a unicode array this is itemsize / 4.
91 """
92 if issubclass(a.dtype.type, unicode_):
93 return a.itemsize // 4
94 return a.itemsize
97def _binary_op_dispatcher(x1, x2):
98 return (x1, x2)
101@array_function_dispatch(_binary_op_dispatcher)
102def equal(x1, x2):
103 """
104 Return (x1 == x2) element-wise.
106 Unlike `numpy.equal`, this comparison is performed by first
107 stripping whitespace characters from the end of the string. This
108 behavior is provided for backward-compatibility with numarray.
110 Parameters
111 ----------
112 x1, x2 : array_like of str or unicode
113 Input arrays of the same shape.
115 Returns
116 -------
117 out : ndarray or bool
118 Output array of bools, or a single bool if x1 and x2 are scalars.
120 See Also
121 --------
122 not_equal, greater_equal, less_equal, greater, less
123 """
124 return compare_chararrays(x1, x2, '==', True)
127@array_function_dispatch(_binary_op_dispatcher)
128def not_equal(x1, x2):
129 """
130 Return (x1 != x2) element-wise.
132 Unlike `numpy.not_equal`, this comparison is performed by first
133 stripping whitespace characters from the end of the string. This
134 behavior is provided for backward-compatibility with numarray.
136 Parameters
137 ----------
138 x1, x2 : array_like of str or unicode
139 Input arrays of the same shape.
141 Returns
142 -------
143 out : ndarray or bool
144 Output array of bools, or a single bool if x1 and x2 are scalars.
146 See Also
147 --------
148 equal, greater_equal, less_equal, greater, less
149 """
150 return compare_chararrays(x1, x2, '!=', True)
153@array_function_dispatch(_binary_op_dispatcher)
154def greater_equal(x1, x2):
155 """
156 Return (x1 >= x2) element-wise.
158 Unlike `numpy.greater_equal`, this comparison is performed by
159 first stripping whitespace characters from the end of the string.
160 This behavior is provided for backward-compatibility with
161 numarray.
163 Parameters
164 ----------
165 x1, x2 : array_like of str or unicode
166 Input arrays of the same shape.
168 Returns
169 -------
170 out : ndarray or bool
171 Output array of bools, or a single bool if x1 and x2 are scalars.
173 See Also
174 --------
175 equal, not_equal, less_equal, greater, less
176 """
177 return compare_chararrays(x1, x2, '>=', True)
180@array_function_dispatch(_binary_op_dispatcher)
181def less_equal(x1, x2):
182 """
183 Return (x1 <= x2) element-wise.
185 Unlike `numpy.less_equal`, this comparison is performed by first
186 stripping whitespace characters from the end of the string. This
187 behavior is provided for backward-compatibility with numarray.
189 Parameters
190 ----------
191 x1, x2 : array_like of str or unicode
192 Input arrays of the same shape.
194 Returns
195 -------
196 out : ndarray or bool
197 Output array of bools, or a single bool if x1 and x2 are scalars.
199 See Also
200 --------
201 equal, not_equal, greater_equal, greater, less
202 """
203 return compare_chararrays(x1, x2, '<=', True)
206@array_function_dispatch(_binary_op_dispatcher)
207def greater(x1, x2):
208 """
209 Return (x1 > x2) element-wise.
211 Unlike `numpy.greater`, this comparison is performed by first
212 stripping whitespace characters from the end of the string. This
213 behavior is provided for backward-compatibility with numarray.
215 Parameters
216 ----------
217 x1, x2 : array_like of str or unicode
218 Input arrays of the same shape.
220 Returns
221 -------
222 out : ndarray or bool
223 Output array of bools, or a single bool if x1 and x2 are scalars.
225 See Also
226 --------
227 equal, not_equal, greater_equal, less_equal, less
228 """
229 return compare_chararrays(x1, x2, '>', True)
232@array_function_dispatch(_binary_op_dispatcher)
233def less(x1, x2):
234 """
235 Return (x1 < x2) element-wise.
237 Unlike `numpy.greater`, this comparison is performed by first
238 stripping whitespace characters from the end of the string. This
239 behavior is provided for backward-compatibility with numarray.
241 Parameters
242 ----------
243 x1, x2 : array_like of str or unicode
244 Input arrays of the same shape.
246 Returns
247 -------
248 out : ndarray or bool
249 Output array of bools, or a single bool if x1 and x2 are scalars.
251 See Also
252 --------
253 equal, not_equal, greater_equal, less_equal, greater
254 """
255 return compare_chararrays(x1, x2, '<', True)
258def _unary_op_dispatcher(a):
259 return (a,)
262@array_function_dispatch(_unary_op_dispatcher)
263def str_len(a):
264 """
265 Return len(a) element-wise.
267 Parameters
268 ----------
269 a : array_like of str or unicode
271 Returns
272 -------
273 out : ndarray
274 Output array of integers
276 See also
277 --------
278 builtins.len
279 """
280 # Note: __len__, etc. currently return ints, which are not C-integers.
281 # Generally intp would be expected for lengths, although int is sufficient
282 # due to the dtype itemsize limitation.
283 return _vec_string(a, int_, '__len__')
286@array_function_dispatch(_binary_op_dispatcher)
287def add(x1, x2):
288 """
289 Return element-wise string concatenation for two arrays of str or unicode.
291 Arrays `x1` and `x2` must have the same shape.
293 Parameters
294 ----------
295 x1 : array_like of str or unicode
296 Input array.
297 x2 : array_like of str or unicode
298 Input array.
300 Returns
301 -------
302 add : ndarray
303 Output array of `string_` or `unicode_`, depending on input types
304 of the same shape as `x1` and `x2`.
306 """
307 arr1 = numpy.asarray(x1)
308 arr2 = numpy.asarray(x2)
309 out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
310 dtype = _use_unicode(arr1, arr2)
311 return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
314def _multiply_dispatcher(a, i):
315 return (a,)
318@array_function_dispatch(_multiply_dispatcher)
319def multiply(a, i):
320 """
321 Return (a * i), that is string multiple concatenation,
322 element-wise.
324 Values in `i` of less than 0 are treated as 0 (which yields an
325 empty string).
327 Parameters
328 ----------
329 a : array_like of str or unicode
331 i : array_like of ints
333 Returns
334 -------
335 out : ndarray
336 Output array of str or unicode, depending on input types
338 """
339 a_arr = numpy.asarray(a)
340 i_arr = numpy.asarray(i)
341 if not issubclass(i_arr.dtype.type, integer):
342 raise ValueError("Can only multiply by integers")
343 out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0)
344 return _vec_string(
345 a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
348def _mod_dispatcher(a, values):
349 return (a, values)
352@array_function_dispatch(_mod_dispatcher)
353def mod(a, values):
354 """
355 Return (a % i), that is pre-Python 2.6 string formatting
356 (interpolation), element-wise for a pair of array_likes of str
357 or unicode.
359 Parameters
360 ----------
361 a : array_like of str or unicode
363 values : array_like of values
364 These values will be element-wise interpolated into the string.
366 Returns
367 -------
368 out : ndarray
369 Output array of str or unicode, depending on input types
371 See also
372 --------
373 str.__mod__
375 """
376 return _to_string_or_unicode_array(
377 _vec_string(a, object_, '__mod__', (values,)))
380@array_function_dispatch(_unary_op_dispatcher)
381def capitalize(a):
382 """
383 Return a copy of `a` with only the first character of each element
384 capitalized.
386 Calls `str.capitalize` element-wise.
388 For 8-bit strings, this method is locale-dependent.
390 Parameters
391 ----------
392 a : array_like of str or unicode
393 Input array of strings to capitalize.
395 Returns
396 -------
397 out : ndarray
398 Output array of str or unicode, depending on input
399 types
401 See also
402 --------
403 str.capitalize
405 Examples
406 --------
407 >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c
408 array(['a1b2', '1b2a', 'b2a1', '2a1b'],
409 dtype='|S4')
410 >>> np.char.capitalize(c)
411 array(['A1b2', '1b2a', 'B2a1', '2a1b'],
412 dtype='|S4')
414 """
415 a_arr = numpy.asarray(a)
416 return _vec_string(a_arr, a_arr.dtype, 'capitalize')
419def _center_dispatcher(a, width, fillchar=None):
420 return (a,)
423@array_function_dispatch(_center_dispatcher)
424def center(a, width, fillchar=' '):
425 """
426 Return a copy of `a` with its elements centered in a string of
427 length `width`.
429 Calls `str.center` element-wise.
431 Parameters
432 ----------
433 a : array_like of str or unicode
435 width : int
436 The length of the resulting strings
437 fillchar : str or unicode, optional
438 The padding character to use (default is space).
440 Returns
441 -------
442 out : ndarray
443 Output array of str or unicode, depending on input
444 types
446 See also
447 --------
448 str.center
450 """
451 a_arr = numpy.asarray(a)
452 width_arr = numpy.asarray(width)
453 size = int(numpy.max(width_arr.flat))
454 if numpy.issubdtype(a_arr.dtype, numpy.string_):
455 fillchar = asbytes(fillchar)
456 return _vec_string(
457 a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
460def _count_dispatcher(a, sub, start=None, end=None):
461 return (a,)
464@array_function_dispatch(_count_dispatcher)
465def count(a, sub, start=0, end=None):
466 """
467 Returns an array with the number of non-overlapping occurrences of
468 substring `sub` in the range [`start`, `end`].
470 Calls `str.count` element-wise.
472 Parameters
473 ----------
474 a : array_like of str or unicode
476 sub : str or unicode
477 The substring to search for.
479 start, end : int, optional
480 Optional arguments `start` and `end` are interpreted as slice
481 notation to specify the range in which to count.
483 Returns
484 -------
485 out : ndarray
486 Output array of ints.
488 See also
489 --------
490 str.count
492 Examples
493 --------
494 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
495 >>> c
496 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
497 >>> np.char.count(c, 'A')
498 array([3, 1, 1])
499 >>> np.char.count(c, 'aA')
500 array([3, 1, 0])
501 >>> np.char.count(c, 'A', start=1, end=4)
502 array([2, 1, 1])
503 >>> np.char.count(c, 'A', start=1, end=3)
504 array([1, 0, 0])
506 """
507 return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end))
510def _code_dispatcher(a, encoding=None, errors=None):
511 return (a,)
514@array_function_dispatch(_code_dispatcher)
515def decode(a, encoding=None, errors=None):
516 """
517 Calls `str.decode` element-wise.
519 The set of available codecs comes from the Python standard library,
520 and may be extended at runtime. For more information, see the
521 :mod:`codecs` module.
523 Parameters
524 ----------
525 a : array_like of str or unicode
527 encoding : str, optional
528 The name of an encoding
530 errors : str, optional
531 Specifies how to handle encoding errors
533 Returns
534 -------
535 out : ndarray
537 See also
538 --------
539 str.decode
541 Notes
542 -----
543 The type of the result will depend on the encoding specified.
545 Examples
546 --------
547 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
548 >>> c
549 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
550 >>> np.char.encode(c, encoding='cp037')
551 array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
552 '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
553 dtype='|S7')
555 """
556 return _to_string_or_unicode_array(
557 _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
560@array_function_dispatch(_code_dispatcher)
561def encode(a, encoding=None, errors=None):
562 """
563 Calls `str.encode` element-wise.
565 The set of available codecs comes from the Python standard library,
566 and may be extended at runtime. For more information, see the codecs
567 module.
569 Parameters
570 ----------
571 a : array_like of str or unicode
573 encoding : str, optional
574 The name of an encoding
576 errors : str, optional
577 Specifies how to handle encoding errors
579 Returns
580 -------
581 out : ndarray
583 See also
584 --------
585 str.encode
587 Notes
588 -----
589 The type of the result will depend on the encoding specified.
591 """
592 return _to_string_or_unicode_array(
593 _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
596def _endswith_dispatcher(a, suffix, start=None, end=None):
597 return (a,)
600@array_function_dispatch(_endswith_dispatcher)
601def endswith(a, suffix, start=0, end=None):
602 """
603 Returns a boolean array which is `True` where the string element
604 in `a` ends with `suffix`, otherwise `False`.
606 Calls `str.endswith` element-wise.
608 Parameters
609 ----------
610 a : array_like of str or unicode
612 suffix : str
614 start, end : int, optional
615 With optional `start`, test beginning at that position. With
616 optional `end`, stop comparing at that position.
618 Returns
619 -------
620 out : ndarray
621 Outputs an array of bools.
623 See also
624 --------
625 str.endswith
627 Examples
628 --------
629 >>> s = np.array(['foo', 'bar'])
630 >>> s[0] = 'foo'
631 >>> s[1] = 'bar'
632 >>> s
633 array(['foo', 'bar'], dtype='<U3')
634 >>> np.char.endswith(s, 'ar')
635 array([False, True])
636 >>> np.char.endswith(s, 'a', start=1, end=2)
637 array([False, True])
639 """
640 return _vec_string(
641 a, bool_, 'endswith', [suffix, start] + _clean_args(end))
644def _expandtabs_dispatcher(a, tabsize=None):
645 return (a,)
648@array_function_dispatch(_expandtabs_dispatcher)
649def expandtabs(a, tabsize=8):
650 """
651 Return a copy of each string element where all tab characters are
652 replaced by one or more spaces.
654 Calls `str.expandtabs` element-wise.
656 Return a copy of each string element where all tab characters are
657 replaced by one or more spaces, depending on the current column
658 and the given `tabsize`. The column number is reset to zero after
659 each newline occurring in the string. This doesn't understand other
660 non-printing characters or escape sequences.
662 Parameters
663 ----------
664 a : array_like of str or unicode
665 Input array
666 tabsize : int, optional
667 Replace tabs with `tabsize` number of spaces. If not given defaults
668 to 8 spaces.
670 Returns
671 -------
672 out : ndarray
673 Output array of str or unicode, depending on input type
675 See also
676 --------
677 str.expandtabs
679 """
680 return _to_string_or_unicode_array(
681 _vec_string(a, object_, 'expandtabs', (tabsize,)))
684@array_function_dispatch(_count_dispatcher)
685def find(a, sub, start=0, end=None):
686 """
687 For each element, return the lowest index in the string where
688 substring `sub` is found.
690 Calls `str.find` element-wise.
692 For each element, return the lowest index in the string where
693 substring `sub` is found, such that `sub` is contained in the
694 range [`start`, `end`].
696 Parameters
697 ----------
698 a : array_like of str or unicode
700 sub : str or unicode
702 start, end : int, optional
703 Optional arguments `start` and `end` are interpreted as in
704 slice notation.
706 Returns
707 -------
708 out : ndarray or int
709 Output array of ints. Returns -1 if `sub` is not found.
711 See also
712 --------
713 str.find
715 """
716 return _vec_string(
717 a, int_, 'find', [sub, start] + _clean_args(end))
720@array_function_dispatch(_count_dispatcher)
721def index(a, sub, start=0, end=None):
722 """
723 Like `find`, but raises `ValueError` when the substring is not found.
725 Calls `str.index` element-wise.
727 Parameters
728 ----------
729 a : array_like of str or unicode
731 sub : str or unicode
733 start, end : int, optional
735 Returns
736 -------
737 out : ndarray
738 Output array of ints. Returns -1 if `sub` is not found.
740 See also
741 --------
742 find, str.find
744 """
745 return _vec_string(
746 a, int_, 'index', [sub, start] + _clean_args(end))
749@array_function_dispatch(_unary_op_dispatcher)
750def isalnum(a):
751 """
752 Returns true for each element if all characters in the string are
753 alphanumeric and there is at least one character, false otherwise.
755 Calls `str.isalnum` element-wise.
757 For 8-bit strings, this method is locale-dependent.
759 Parameters
760 ----------
761 a : array_like of str or unicode
763 Returns
764 -------
765 out : ndarray
766 Output array of str or unicode, depending on input type
768 See also
769 --------
770 str.isalnum
771 """
772 return _vec_string(a, bool_, 'isalnum')
775@array_function_dispatch(_unary_op_dispatcher)
776def isalpha(a):
777 """
778 Returns true for each element if all characters in the string are
779 alphabetic and there is at least one character, false otherwise.
781 Calls `str.isalpha` element-wise.
783 For 8-bit strings, this method is locale-dependent.
785 Parameters
786 ----------
787 a : array_like of str or unicode
789 Returns
790 -------
791 out : ndarray
792 Output array of bools
794 See also
795 --------
796 str.isalpha
797 """
798 return _vec_string(a, bool_, 'isalpha')
801@array_function_dispatch(_unary_op_dispatcher)
802def isdigit(a):
803 """
804 Returns true for each element if all characters in the string are
805 digits and there is at least one character, false otherwise.
807 Calls `str.isdigit` element-wise.
809 For 8-bit strings, this method is locale-dependent.
811 Parameters
812 ----------
813 a : array_like of str or unicode
815 Returns
816 -------
817 out : ndarray
818 Output array of bools
820 See also
821 --------
822 str.isdigit
823 """
824 return _vec_string(a, bool_, 'isdigit')
827@array_function_dispatch(_unary_op_dispatcher)
828def islower(a):
829 """
830 Returns true for each element if all cased characters in the
831 string are lowercase and there is at least one cased character,
832 false otherwise.
834 Calls `str.islower` element-wise.
836 For 8-bit strings, this method is locale-dependent.
838 Parameters
839 ----------
840 a : array_like of str or unicode
842 Returns
843 -------
844 out : ndarray
845 Output array of bools
847 See also
848 --------
849 str.islower
850 """
851 return _vec_string(a, bool_, 'islower')
854@array_function_dispatch(_unary_op_dispatcher)
855def isspace(a):
856 """
857 Returns true for each element if there are only whitespace
858 characters in the string and there is at least one character,
859 false otherwise.
861 Calls `str.isspace` element-wise.
863 For 8-bit strings, this method is locale-dependent.
865 Parameters
866 ----------
867 a : array_like of str or unicode
869 Returns
870 -------
871 out : ndarray
872 Output array of bools
874 See also
875 --------
876 str.isspace
877 """
878 return _vec_string(a, bool_, 'isspace')
881@array_function_dispatch(_unary_op_dispatcher)
882def istitle(a):
883 """
884 Returns true for each element if the element is a titlecased
885 string and there is at least one character, false otherwise.
887 Call `str.istitle` element-wise.
889 For 8-bit strings, this method is locale-dependent.
891 Parameters
892 ----------
893 a : array_like of str or unicode
895 Returns
896 -------
897 out : ndarray
898 Output array of bools
900 See also
901 --------
902 str.istitle
903 """
904 return _vec_string(a, bool_, 'istitle')
907@array_function_dispatch(_unary_op_dispatcher)
908def isupper(a):
909 """
910 Returns true for each element if all cased characters in the
911 string are uppercase and there is at least one character, false
912 otherwise.
914 Call `str.isupper` element-wise.
916 For 8-bit strings, this method is locale-dependent.
918 Parameters
919 ----------
920 a : array_like of str or unicode
922 Returns
923 -------
924 out : ndarray
925 Output array of bools
927 See also
928 --------
929 str.isupper
930 """
931 return _vec_string(a, bool_, 'isupper')
934def _join_dispatcher(sep, seq):
935 return (sep, seq)
938@array_function_dispatch(_join_dispatcher)
939def join(sep, seq):
940 """
941 Return a string which is the concatenation of the strings in the
942 sequence `seq`.
944 Calls `str.join` element-wise.
946 Parameters
947 ----------
948 sep : array_like of str or unicode
949 seq : array_like of str or unicode
951 Returns
952 -------
953 out : ndarray
954 Output array of str or unicode, depending on input types
956 See also
957 --------
958 str.join
959 """
960 return _to_string_or_unicode_array(
961 _vec_string(sep, object_, 'join', (seq,)))
965def _just_dispatcher(a, width, fillchar=None):
966 return (a,)
969@array_function_dispatch(_just_dispatcher)
970def ljust(a, width, fillchar=' '):
971 """
972 Return an array with the elements of `a` left-justified in a
973 string of length `width`.
975 Calls `str.ljust` element-wise.
977 Parameters
978 ----------
979 a : array_like of str or unicode
981 width : int
982 The length of the resulting strings
983 fillchar : str or unicode, optional
984 The character to use for padding
986 Returns
987 -------
988 out : ndarray
989 Output array of str or unicode, depending on input type
991 See also
992 --------
993 str.ljust
995 """
996 a_arr = numpy.asarray(a)
997 width_arr = numpy.asarray(width)
998 size = int(numpy.max(width_arr.flat))
999 if numpy.issubdtype(a_arr.dtype, numpy.string_):
1000 fillchar = asbytes(fillchar)
1001 return _vec_string(
1002 a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
1005@array_function_dispatch(_unary_op_dispatcher)
1006def lower(a):
1007 """
1008 Return an array with the elements converted to lowercase.
1010 Call `str.lower` element-wise.
1012 For 8-bit strings, this method is locale-dependent.
1014 Parameters
1015 ----------
1016 a : array_like, {str, unicode}
1017 Input array.
1019 Returns
1020 -------
1021 out : ndarray, {str, unicode}
1022 Output array of str or unicode, depending on input type
1024 See also
1025 --------
1026 str.lower
1028 Examples
1029 --------
1030 >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
1031 array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
1032 >>> np.char.lower(c)
1033 array(['a1b c', '1bca', 'bca1'], dtype='<U5')
1035 """
1036 a_arr = numpy.asarray(a)
1037 return _vec_string(a_arr, a_arr.dtype, 'lower')
1040def _strip_dispatcher(a, chars=None):
1041 return (a,)
1044@array_function_dispatch(_strip_dispatcher)
1045def lstrip(a, chars=None):
1046 """
1047 For each element in `a`, return a copy with the leading characters
1048 removed.
1050 Calls `str.lstrip` element-wise.
1052 Parameters
1053 ----------
1054 a : array-like, {str, unicode}
1055 Input array.
1057 chars : {str, unicode}, optional
1058 The `chars` argument is a string specifying the set of
1059 characters to be removed. If omitted or None, the `chars`
1060 argument defaults to removing whitespace. The `chars` argument
1061 is not a prefix; rather, all combinations of its values are
1062 stripped.
1064 Returns
1065 -------
1066 out : ndarray, {str, unicode}
1067 Output array of str or unicode, depending on input type
1069 See also
1070 --------
1071 str.lstrip
1073 Examples
1074 --------
1075 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
1076 >>> c
1077 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1079 The 'a' variable is unstripped from c[1] because whitespace leading.
1081 >>> np.char.lstrip(c, 'a')
1082 array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7')
1085 >>> np.char.lstrip(c, 'A') # leaves c unchanged
1086 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1087 >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()
1088 ... # XXX: is this a regression? This used to return True
1089 ... # np.char.lstrip(c,'') does not modify c at all.
1090 False
1091 >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()
1092 True
1094 """
1095 a_arr = numpy.asarray(a)
1096 return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
1099def _partition_dispatcher(a, sep):
1100 return (a,)
1103@array_function_dispatch(_partition_dispatcher)
1104def partition(a, sep):
1105 """
1106 Partition each element in `a` around `sep`.
1108 Calls `str.partition` element-wise.
1110 For each element in `a`, split the element as the first
1111 occurrence of `sep`, and return 3 strings containing the part
1112 before the separator, the separator itself, and the part after
1113 the separator. If the separator is not found, return 3 strings
1114 containing the string itself, followed by two empty strings.
1116 Parameters
1117 ----------
1118 a : array_like, {str, unicode}
1119 Input array
1120 sep : {str, unicode}
1121 Separator to split each string element in `a`.
1123 Returns
1124 -------
1125 out : ndarray, {str, unicode}
1126 Output array of str or unicode, depending on input type.
1127 The output array will have an extra dimension with 3
1128 elements per input element.
1130 See also
1131 --------
1132 str.partition
1134 """
1135 return _to_string_or_unicode_array(
1136 _vec_string(a, object_, 'partition', (sep,)))
1139def _replace_dispatcher(a, old, new, count=None):
1140 return (a,)
1143@array_function_dispatch(_replace_dispatcher)
1144def replace(a, old, new, count=None):
1145 """
1146 For each element in `a`, return a copy of the string with all
1147 occurrences of substring `old` replaced by `new`.
1149 Calls `str.replace` element-wise.
1151 Parameters
1152 ----------
1153 a : array-like of str or unicode
1155 old, new : str or unicode
1157 count : int, optional
1158 If the optional argument `count` is given, only the first
1159 `count` occurrences are replaced.
1161 Returns
1162 -------
1163 out : ndarray
1164 Output array of str or unicode, depending on input type
1166 See also
1167 --------
1168 str.replace
1170 """
1171 return _to_string_or_unicode_array(
1172 _vec_string(
1173 a, object_, 'replace', [old, new] + _clean_args(count)))
1176@array_function_dispatch(_count_dispatcher)
1177def rfind(a, sub, start=0, end=None):
1178 """
1179 For each element in `a`, return the highest index in the string
1180 where substring `sub` is found, such that `sub` is contained
1181 within [`start`, `end`].
1183 Calls `str.rfind` element-wise.
1185 Parameters
1186 ----------
1187 a : array-like of str or unicode
1189 sub : str or unicode
1191 start, end : int, optional
1192 Optional arguments `start` and `end` are interpreted as in
1193 slice notation.
1195 Returns
1196 -------
1197 out : ndarray
1198 Output array of ints. Return -1 on failure.
1200 See also
1201 --------
1202 str.rfind
1204 """
1205 return _vec_string(
1206 a, int_, 'rfind', [sub, start] + _clean_args(end))
1209@array_function_dispatch(_count_dispatcher)
1210def rindex(a, sub, start=0, end=None):
1211 """
1212 Like `rfind`, but raises `ValueError` when the substring `sub` is
1213 not found.
1215 Calls `str.rindex` element-wise.
1217 Parameters
1218 ----------
1219 a : array-like of str or unicode
1221 sub : str or unicode
1223 start, end : int, optional
1225 Returns
1226 -------
1227 out : ndarray
1228 Output array of ints.
1230 See also
1231 --------
1232 rfind, str.rindex
1234 """
1235 return _vec_string(
1236 a, int_, 'rindex', [sub, start] + _clean_args(end))
1239@array_function_dispatch(_just_dispatcher)
1240def rjust(a, width, fillchar=' '):
1241 """
1242 Return an array with the elements of `a` right-justified in a
1243 string of length `width`.
1245 Calls `str.rjust` element-wise.
1247 Parameters
1248 ----------
1249 a : array_like of str or unicode
1251 width : int
1252 The length of the resulting strings
1253 fillchar : str or unicode, optional
1254 The character to use for padding
1256 Returns
1257 -------
1258 out : ndarray
1259 Output array of str or unicode, depending on input type
1261 See also
1262 --------
1263 str.rjust
1265 """
1266 a_arr = numpy.asarray(a)
1267 width_arr = numpy.asarray(width)
1268 size = int(numpy.max(width_arr.flat))
1269 if numpy.issubdtype(a_arr.dtype, numpy.string_):
1270 fillchar = asbytes(fillchar)
1271 return _vec_string(
1272 a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
1275@array_function_dispatch(_partition_dispatcher)
1276def rpartition(a, sep):
1277 """
1278 Partition (split) each element around the right-most separator.
1280 Calls `str.rpartition` element-wise.
1282 For each element in `a`, split the element as the last
1283 occurrence of `sep`, and return 3 strings containing the part
1284 before the separator, the separator itself, and the part after
1285 the separator. If the separator is not found, return 3 strings
1286 containing the string itself, followed by two empty strings.
1288 Parameters
1289 ----------
1290 a : array_like of str or unicode
1291 Input array
1292 sep : str or unicode
1293 Right-most separator to split each element in array.
1295 Returns
1296 -------
1297 out : ndarray
1298 Output array of string or unicode, depending on input
1299 type. The output array will have an extra dimension with
1300 3 elements per input element.
1302 See also
1303 --------
1304 str.rpartition
1306 """
1307 return _to_string_or_unicode_array(
1308 _vec_string(a, object_, 'rpartition', (sep,)))
1311def _split_dispatcher(a, sep=None, maxsplit=None):
1312 return (a,)
1315@array_function_dispatch(_split_dispatcher)
1316def rsplit(a, sep=None, maxsplit=None):
1317 """
1318 For each element in `a`, return a list of the words in the
1319 string, using `sep` as the delimiter string.
1321 Calls `str.rsplit` element-wise.
1323 Except for splitting from the right, `rsplit`
1324 behaves like `split`.
1326 Parameters
1327 ----------
1328 a : array_like of str or unicode
1330 sep : str or unicode, optional
1331 If `sep` is not specified or None, any whitespace string
1332 is a separator.
1333 maxsplit : int, optional
1334 If `maxsplit` is given, at most `maxsplit` splits are done,
1335 the rightmost ones.
1337 Returns
1338 -------
1339 out : ndarray
1340 Array of list objects
1342 See also
1343 --------
1344 str.rsplit, split
1346 """
1347 # This will return an array of lists of different sizes, so we
1348 # leave it as an object array
1349 return _vec_string(
1350 a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
1353def _strip_dispatcher(a, chars=None):
1354 return (a,)
1357@array_function_dispatch(_strip_dispatcher)
1358def rstrip(a, chars=None):
1359 """
1360 For each element in `a`, return a copy with the trailing
1361 characters removed.
1363 Calls `str.rstrip` element-wise.
1365 Parameters
1366 ----------
1367 a : array-like of str or unicode
1369 chars : str or unicode, optional
1370 The `chars` argument is a string specifying the set of
1371 characters to be removed. If omitted or None, the `chars`
1372 argument defaults to removing whitespace. The `chars` argument
1373 is not a suffix; rather, all combinations of its values are
1374 stripped.
1376 Returns
1377 -------
1378 out : ndarray
1379 Output array of str or unicode, depending on input type
1381 See also
1382 --------
1383 str.rstrip
1385 Examples
1386 --------
1387 >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c
1388 array(['aAaAaA', 'abBABba'],
1389 dtype='|S7')
1390 >>> np.char.rstrip(c, b'a')
1391 array(['aAaAaA', 'abBABb'],
1392 dtype='|S7')
1393 >>> np.char.rstrip(c, b'A')
1394 array(['aAaAa', 'abBABba'],
1395 dtype='|S7')
1397 """
1398 a_arr = numpy.asarray(a)
1399 return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
1402@array_function_dispatch(_split_dispatcher)
1403def split(a, sep=None, maxsplit=None):
1404 """
1405 For each element in `a`, return a list of the words in the
1406 string, using `sep` as the delimiter string.
1408 Calls `str.split` element-wise.
1410 Parameters
1411 ----------
1412 a : array_like of str or unicode
1414 sep : str or unicode, optional
1415 If `sep` is not specified or None, any whitespace string is a
1416 separator.
1418 maxsplit : int, optional
1419 If `maxsplit` is given, at most `maxsplit` splits are done.
1421 Returns
1422 -------
1423 out : ndarray
1424 Array of list objects
1426 See also
1427 --------
1428 str.split, rsplit
1430 """
1431 # This will return an array of lists of different sizes, so we
1432 # leave it as an object array
1433 return _vec_string(
1434 a, object_, 'split', [sep] + _clean_args(maxsplit))
1437def _splitlines_dispatcher(a, keepends=None):
1438 return (a,)
1441@array_function_dispatch(_splitlines_dispatcher)
1442def splitlines(a, keepends=None):
1443 """
1444 For each element in `a`, return a list of the lines in the
1445 element, breaking at line boundaries.
1447 Calls `str.splitlines` element-wise.
1449 Parameters
1450 ----------
1451 a : array_like of str or unicode
1453 keepends : bool, optional
1454 Line breaks are not included in the resulting list unless
1455 keepends is given and true.
1457 Returns
1458 -------
1459 out : ndarray
1460 Array of list objects
1462 See also
1463 --------
1464 str.splitlines
1466 """
1467 return _vec_string(
1468 a, object_, 'splitlines', _clean_args(keepends))
1471def _startswith_dispatcher(a, prefix, start=None, end=None):
1472 return (a,)
1475@array_function_dispatch(_startswith_dispatcher)
1476def startswith(a, prefix, start=0, end=None):
1477 """
1478 Returns a boolean array which is `True` where the string element
1479 in `a` starts with `prefix`, otherwise `False`.
1481 Calls `str.startswith` element-wise.
1483 Parameters
1484 ----------
1485 a : array_like of str or unicode
1487 prefix : str
1489 start, end : int, optional
1490 With optional `start`, test beginning at that position. With
1491 optional `end`, stop comparing at that position.
1493 Returns
1494 -------
1495 out : ndarray
1496 Array of booleans
1498 See also
1499 --------
1500 str.startswith
1502 """
1503 return _vec_string(
1504 a, bool_, 'startswith', [prefix, start] + _clean_args(end))
1507@array_function_dispatch(_strip_dispatcher)
1508def strip(a, chars=None):
1509 """
1510 For each element in `a`, return a copy with the leading and
1511 trailing characters removed.
1513 Calls `str.strip` element-wise.
1515 Parameters
1516 ----------
1517 a : array-like of str or unicode
1519 chars : str or unicode, optional
1520 The `chars` argument is a string specifying the set of
1521 characters to be removed. If omitted or None, the `chars`
1522 argument defaults to removing whitespace. The `chars` argument
1523 is not a prefix or suffix; rather, all combinations of its
1524 values are stripped.
1526 Returns
1527 -------
1528 out : ndarray
1529 Output array of str or unicode, depending on input type
1531 See also
1532 --------
1533 str.strip
1535 Examples
1536 --------
1537 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
1538 >>> c
1539 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1540 >>> np.char.strip(c)
1541 array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7')
1542 >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads
1543 array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7')
1544 >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails
1545 array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7')
1547 """
1548 a_arr = numpy.asarray(a)
1549 return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
1552@array_function_dispatch(_unary_op_dispatcher)
1553def swapcase(a):
1554 """
1555 Return element-wise a copy of the string with
1556 uppercase characters converted to lowercase and vice versa.
1558 Calls `str.swapcase` element-wise.
1560 For 8-bit strings, this method is locale-dependent.
1562 Parameters
1563 ----------
1564 a : array_like, {str, unicode}
1565 Input array.
1567 Returns
1568 -------
1569 out : ndarray, {str, unicode}
1570 Output array of str or unicode, depending on input type
1572 See also
1573 --------
1574 str.swapcase
1576 Examples
1577 --------
1578 >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c
1579 array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
1580 dtype='|S5')
1581 >>> np.char.swapcase(c)
1582 array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
1583 dtype='|S5')
1585 """
1586 a_arr = numpy.asarray(a)
1587 return _vec_string(a_arr, a_arr.dtype, 'swapcase')
1590@array_function_dispatch(_unary_op_dispatcher)
1591def title(a):
1592 """
1593 Return element-wise title cased version of string or unicode.
1595 Title case words start with uppercase characters, all remaining cased
1596 characters are lowercase.
1598 Calls `str.title` element-wise.
1600 For 8-bit strings, this method is locale-dependent.
1602 Parameters
1603 ----------
1604 a : array_like, {str, unicode}
1605 Input array.
1607 Returns
1608 -------
1609 out : ndarray
1610 Output array of str or unicode, depending on input type
1612 See also
1613 --------
1614 str.title
1616 Examples
1617 --------
1618 >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c
1619 array(['a1b c', '1b ca', 'b ca1', 'ca1b'],
1620 dtype='|S5')
1621 >>> np.char.title(c)
1622 array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
1623 dtype='|S5')
1625 """
1626 a_arr = numpy.asarray(a)
1627 return _vec_string(a_arr, a_arr.dtype, 'title')
1630def _translate_dispatcher(a, table, deletechars=None):
1631 return (a,)
1634@array_function_dispatch(_translate_dispatcher)
1635def translate(a, table, deletechars=None):
1636 """
1637 For each element in `a`, return a copy of the string where all
1638 characters occurring in the optional argument `deletechars` are
1639 removed, and the remaining characters have been mapped through the
1640 given translation table.
1642 Calls `str.translate` element-wise.
1644 Parameters
1645 ----------
1646 a : array-like of str or unicode
1648 table : str of length 256
1650 deletechars : str
1652 Returns
1653 -------
1654 out : ndarray
1655 Output array of str or unicode, depending on input type
1657 See also
1658 --------
1659 str.translate
1661 """
1662 a_arr = numpy.asarray(a)
1663 if issubclass(a_arr.dtype.type, unicode_):
1664 return _vec_string(
1665 a_arr, a_arr.dtype, 'translate', (table,))
1666 else:
1667 return _vec_string(
1668 a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
1671@array_function_dispatch(_unary_op_dispatcher)
1672def upper(a):
1673 """
1674 Return an array with the elements converted to uppercase.
1676 Calls `str.upper` element-wise.
1678 For 8-bit strings, this method is locale-dependent.
1680 Parameters
1681 ----------
1682 a : array_like, {str, unicode}
1683 Input array.
1685 Returns
1686 -------
1687 out : ndarray, {str, unicode}
1688 Output array of str or unicode, depending on input type
1690 See also
1691 --------
1692 str.upper
1694 Examples
1695 --------
1696 >>> c = np.array(['a1b c', '1bca', 'bca1']); c
1697 array(['a1b c', '1bca', 'bca1'], dtype='<U5')
1698 >>> np.char.upper(c)
1699 array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
1701 """
1702 a_arr = numpy.asarray(a)
1703 return _vec_string(a_arr, a_arr.dtype, 'upper')
1706def _zfill_dispatcher(a, width):
1707 return (a,)
1710@array_function_dispatch(_zfill_dispatcher)
1711def zfill(a, width):
1712 """
1713 Return the numeric string left-filled with zeros
1715 Calls `str.zfill` element-wise.
1717 Parameters
1718 ----------
1719 a : array_like, {str, unicode}
1720 Input array.
1721 width : int
1722 Width of string to left-fill elements in `a`.
1724 Returns
1725 -------
1726 out : ndarray, {str, unicode}
1727 Output array of str or unicode, depending on input type
1729 See also
1730 --------
1731 str.zfill
1733 """
1734 a_arr = numpy.asarray(a)
1735 width_arr = numpy.asarray(width)
1736 size = int(numpy.max(width_arr.flat))
1737 return _vec_string(
1738 a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
1741@array_function_dispatch(_unary_op_dispatcher)
1742def isnumeric(a):
1743 """
1744 For each element, return True if there are only numeric
1745 characters in the element.
1747 Calls `unicode.isnumeric` element-wise.
1749 Numeric characters include digit characters, and all characters
1750 that have the Unicode numeric value property, e.g. ``U+2155,
1751 VULGAR FRACTION ONE FIFTH``.
1753 Parameters
1754 ----------
1755 a : array_like, unicode
1756 Input array.
1758 Returns
1759 -------
1760 out : ndarray, bool
1761 Array of booleans of same shape as `a`.
1763 See also
1764 --------
1765 unicode.isnumeric
1767 """
1768 if _use_unicode(a) != unicode_:
1769 raise TypeError("isnumeric is only available for Unicode strings and arrays")
1770 return _vec_string(a, bool_, 'isnumeric')
1773@array_function_dispatch(_unary_op_dispatcher)
1774def isdecimal(a):
1775 """
1776 For each element, return True if there are only decimal
1777 characters in the element.
1779 Calls `unicode.isdecimal` element-wise.
1781 Decimal characters include digit characters, and all characters
1782 that can be used to form decimal-radix numbers,
1783 e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``.
1785 Parameters
1786 ----------
1787 a : array_like, unicode
1788 Input array.
1790 Returns
1791 -------
1792 out : ndarray, bool
1793 Array of booleans identical in shape to `a`.
1795 See also
1796 --------
1797 unicode.isdecimal
1799 """
1800 if _use_unicode(a) != unicode_:
1801 raise TypeError("isnumeric is only available for Unicode strings and arrays")
1802 return _vec_string(a, bool_, 'isdecimal')
1805@set_module('numpy')
1806class chararray(ndarray):
1807 """
1808 chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
1809 strides=None, order=None)
1811 Provides a convenient view on arrays of string and unicode values.
1813 .. note::
1814 The `chararray` class exists for backwards compatibility with
1815 Numarray, it is not recommended for new development. Starting from numpy
1816 1.4, if one needs arrays of strings, it is recommended to use arrays of
1817 `dtype` `object_`, `string_` or `unicode_`, and use the free functions
1818 in the `numpy.char` module for fast vectorized string operations.
1820 Versus a regular NumPy array of type `str` or `unicode`, this
1821 class adds the following functionality:
1823 1) values automatically have whitespace removed from the end
1824 when indexed
1826 2) comparison operators automatically remove whitespace from the
1827 end when comparing values
1829 3) vectorized string operations are provided as methods
1830 (e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``)
1832 chararrays should be created using `numpy.char.array` or
1833 `numpy.char.asarray`, rather than this constructor directly.
1835 This constructor creates the array, using `buffer` (with `offset`
1836 and `strides`) if it is not ``None``. If `buffer` is ``None``, then
1837 constructs a new array with `strides` in "C order", unless both
1838 ``len(shape) >= 2`` and ``order='F'``, in which case `strides`
1839 is in "Fortran order".
1841 Methods
1842 -------
1843 astype
1844 argsort
1845 copy
1846 count
1847 decode
1848 dump
1849 dumps
1850 encode
1851 endswith
1852 expandtabs
1853 fill
1854 find
1855 flatten
1856 getfield
1857 index
1858 isalnum
1859 isalpha
1860 isdecimal
1861 isdigit
1862 islower
1863 isnumeric
1864 isspace
1865 istitle
1866 isupper
1867 item
1868 join
1869 ljust
1870 lower
1871 lstrip
1872 nonzero
1873 put
1874 ravel
1875 repeat
1876 replace
1877 reshape
1878 resize
1879 rfind
1880 rindex
1881 rjust
1882 rsplit
1883 rstrip
1884 searchsorted
1885 setfield
1886 setflags
1887 sort
1888 split
1889 splitlines
1890 squeeze
1891 startswith
1892 strip
1893 swapaxes
1894 swapcase
1895 take
1896 title
1897 tofile
1898 tolist
1899 tostring
1900 translate
1901 transpose
1902 upper
1903 view
1904 zfill
1906 Parameters
1907 ----------
1908 shape : tuple
1909 Shape of the array.
1910 itemsize : int, optional
1911 Length of each array element, in number of characters. Default is 1.
1912 unicode : bool, optional
1913 Are the array elements of type unicode (True) or string (False).
1914 Default is False.
1915 buffer : object exposing the buffer interface or str, optional
1916 Memory address of the start of the array data. Default is None,
1917 in which case a new array is created.
1918 offset : int, optional
1919 Fixed stride displacement from the beginning of an axis?
1920 Default is 0. Needs to be >=0.
1921 strides : array_like of ints, optional
1922 Strides for the array (see `ndarray.strides` for full description).
1923 Default is None.
1924 order : {'C', 'F'}, optional
1925 The order in which the array data is stored in memory: 'C' ->
1926 "row major" order (the default), 'F' -> "column major"
1927 (Fortran) order.
1929 Examples
1930 --------
1931 >>> charar = np.chararray((3, 3))
1932 >>> charar[:] = 'a'
1933 >>> charar
1934 chararray([[b'a', b'a', b'a'],
1935 [b'a', b'a', b'a'],
1936 [b'a', b'a', b'a']], dtype='|S1')
1938 >>> charar = np.chararray(charar.shape, itemsize=5)
1939 >>> charar[:] = 'abc'
1940 >>> charar
1941 chararray([[b'abc', b'abc', b'abc'],
1942 [b'abc', b'abc', b'abc'],
1943 [b'abc', b'abc', b'abc']], dtype='|S5')
1945 """
1946 def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
1947 offset=0, strides=None, order='C'):
1948 global _globalvar
1950 if unicode:
1951 dtype = unicode_
1952 else:
1953 dtype = string_
1955 # force itemsize to be a Python int, since using NumPy integer
1956 # types results in itemsize.itemsize being used as the size of
1957 # strings in the new array.
1958 itemsize = int(itemsize)
1960 if isinstance(buffer, str):
1961 # unicode objects do not have the buffer interface
1962 filler = buffer
1963 buffer = None
1964 else:
1965 filler = None
1967 _globalvar = 1
1968 if buffer is None:
1969 self = ndarray.__new__(subtype, shape, (dtype, itemsize),
1970 order=order)
1971 else:
1972 self = ndarray.__new__(subtype, shape, (dtype, itemsize),
1973 buffer=buffer,
1974 offset=offset, strides=strides,
1975 order=order)
1976 if filler is not None:
1977 self[...] = filler
1978 _globalvar = 0
1979 return self
1981 def __array_finalize__(self, obj):
1982 # The b is a special case because it is used for reconstructing.
1983 if not _globalvar and self.dtype.char not in 'SUbc':
1984 raise ValueError("Can only create a chararray from string data.")
1986 def __getitem__(self, obj):
1987 val = ndarray.__getitem__(self, obj)
1989 if isinstance(val, character):
1990 temp = val.rstrip()
1991 if len(temp) == 0:
1992 val = ''
1993 else:
1994 val = temp
1996 return val
1998 # IMPLEMENTATION NOTE: Most of the methods of this class are
1999 # direct delegations to the free functions in this module.
2000 # However, those that return an array of strings should instead
2001 # return a chararray, so some extra wrapping is required.
2003 def __eq__(self, other):
2004 """
2005 Return (self == other) element-wise.
2007 See also
2008 --------
2009 equal
2010 """
2011 return equal(self, other)
2013 def __ne__(self, other):
2014 """
2015 Return (self != other) element-wise.
2017 See also
2018 --------
2019 not_equal
2020 """
2021 return not_equal(self, other)
2023 def __ge__(self, other):
2024 """
2025 Return (self >= other) element-wise.
2027 See also
2028 --------
2029 greater_equal
2030 """
2031 return greater_equal(self, other)
2033 def __le__(self, other):
2034 """
2035 Return (self <= other) element-wise.
2037 See also
2038 --------
2039 less_equal
2040 """
2041 return less_equal(self, other)
2043 def __gt__(self, other):
2044 """
2045 Return (self > other) element-wise.
2047 See also
2048 --------
2049 greater
2050 """
2051 return greater(self, other)
2053 def __lt__(self, other):
2054 """
2055 Return (self < other) element-wise.
2057 See also
2058 --------
2059 less
2060 """
2061 return less(self, other)
2063 def __add__(self, other):
2064 """
2065 Return (self + other), that is string concatenation,
2066 element-wise for a pair of array_likes of str or unicode.
2068 See also
2069 --------
2070 add
2071 """
2072 return asarray(add(self, other))
2074 def __radd__(self, other):
2075 """
2076 Return (other + self), that is string concatenation,
2077 element-wise for a pair of array_likes of `string_` or `unicode_`.
2079 See also
2080 --------
2081 add
2082 """
2083 return asarray(add(numpy.asarray(other), self))
2085 def __mul__(self, i):
2086 """
2087 Return (self * i), that is string multiple concatenation,
2088 element-wise.
2090 See also
2091 --------
2092 multiply
2093 """
2094 return asarray(multiply(self, i))
2096 def __rmul__(self, i):
2097 """
2098 Return (self * i), that is string multiple concatenation,
2099 element-wise.
2101 See also
2102 --------
2103 multiply
2104 """
2105 return asarray(multiply(self, i))
2107 def __mod__(self, i):
2108 """
2109 Return (self % i), that is pre-Python 2.6 string formatting
2110 (interpolation), element-wise for a pair of array_likes of `string_`
2111 or `unicode_`.
2113 See also
2114 --------
2115 mod
2116 """
2117 return asarray(mod(self, i))
2119 def __rmod__(self, other):
2120 return NotImplemented
2122 def argsort(self, axis=-1, kind=None, order=None):
2123 """
2124 Return the indices that sort the array lexicographically.
2126 For full documentation see `numpy.argsort`, for which this method is
2127 in fact merely a "thin wrapper."
2129 Examples
2130 --------
2131 >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')
2132 >>> c = c.view(np.chararray); c
2133 chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],
2134 dtype='|S5')
2135 >>> c[c.argsort()]
2136 chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],
2137 dtype='|S5')
2139 """
2140 return self.__array__().argsort(axis, kind, order)
2141 argsort.__doc__ = ndarray.argsort.__doc__
2143 def capitalize(self):
2144 """
2145 Return a copy of `self` with only the first character of each element
2146 capitalized.
2148 See also
2149 --------
2150 char.capitalize
2152 """
2153 return asarray(capitalize(self))
2155 def center(self, width, fillchar=' '):
2156 """
2157 Return a copy of `self` with its elements centered in a
2158 string of length `width`.
2160 See also
2161 --------
2162 center
2163 """
2164 return asarray(center(self, width, fillchar))
2166 def count(self, sub, start=0, end=None):
2167 """
2168 Returns an array with the number of non-overlapping occurrences of
2169 substring `sub` in the range [`start`, `end`].
2171 See also
2172 --------
2173 char.count
2175 """
2176 return count(self, sub, start, end)
2178 def decode(self, encoding=None, errors=None):
2179 """
2180 Calls `str.decode` element-wise.
2182 See also
2183 --------
2184 char.decode
2186 """
2187 return decode(self, encoding, errors)
2189 def encode(self, encoding=None, errors=None):
2190 """
2191 Calls `str.encode` element-wise.
2193 See also
2194 --------
2195 char.encode
2197 """
2198 return encode(self, encoding, errors)
2200 def endswith(self, suffix, start=0, end=None):
2201 """
2202 Returns a boolean array which is `True` where the string element
2203 in `self` ends with `suffix`, otherwise `False`.
2205 See also
2206 --------
2207 char.endswith
2209 """
2210 return endswith(self, suffix, start, end)
2212 def expandtabs(self, tabsize=8):
2213 """
2214 Return a copy of each string element where all tab characters are
2215 replaced by one or more spaces.
2217 See also
2218 --------
2219 char.expandtabs
2221 """
2222 return asarray(expandtabs(self, tabsize))
2224 def find(self, sub, start=0, end=None):
2225 """
2226 For each element, return the lowest index in the string where
2227 substring `sub` is found.
2229 See also
2230 --------
2231 char.find
2233 """
2234 return find(self, sub, start, end)
2236 def index(self, sub, start=0, end=None):
2237 """
2238 Like `find`, but raises `ValueError` when the substring is not found.
2240 See also
2241 --------
2242 char.index
2244 """
2245 return index(self, sub, start, end)
2247 def isalnum(self):
2248 """
2249 Returns true for each element if all characters in the string
2250 are alphanumeric and there is at least one character, false
2251 otherwise.
2253 See also
2254 --------
2255 char.isalnum
2257 """
2258 return isalnum(self)
2260 def isalpha(self):
2261 """
2262 Returns true for each element if all characters in the string
2263 are alphabetic and there is at least one character, false
2264 otherwise.
2266 See also
2267 --------
2268 char.isalpha
2270 """
2271 return isalpha(self)
2273 def isdigit(self):
2274 """
2275 Returns true for each element if all characters in the string are
2276 digits and there is at least one character, false otherwise.
2278 See also
2279 --------
2280 char.isdigit
2282 """
2283 return isdigit(self)
2285 def islower(self):
2286 """
2287 Returns true for each element if all cased characters in the
2288 string are lowercase and there is at least one cased character,
2289 false otherwise.
2291 See also
2292 --------
2293 char.islower
2295 """
2296 return islower(self)
2298 def isspace(self):
2299 """
2300 Returns true for each element if there are only whitespace
2301 characters in the string and there is at least one character,
2302 false otherwise.
2304 See also
2305 --------
2306 char.isspace
2308 """
2309 return isspace(self)
2311 def istitle(self):
2312 """
2313 Returns true for each element if the element is a titlecased
2314 string and there is at least one character, false otherwise.
2316 See also
2317 --------
2318 char.istitle
2320 """
2321 return istitle(self)
2323 def isupper(self):
2324 """
2325 Returns true for each element if all cased characters in the
2326 string are uppercase and there is at least one character, false
2327 otherwise.
2329 See also
2330 --------
2331 char.isupper
2333 """
2334 return isupper(self)
2336 def join(self, seq):
2337 """
2338 Return a string which is the concatenation of the strings in the
2339 sequence `seq`.
2341 See also
2342 --------
2343 char.join
2345 """
2346 return join(self, seq)
2348 def ljust(self, width, fillchar=' '):
2349 """
2350 Return an array with the elements of `self` left-justified in a
2351 string of length `width`.
2353 See also
2354 --------
2355 char.ljust
2357 """
2358 return asarray(ljust(self, width, fillchar))
2360 def lower(self):
2361 """
2362 Return an array with the elements of `self` converted to
2363 lowercase.
2365 See also
2366 --------
2367 char.lower
2369 """
2370 return asarray(lower(self))
2372 def lstrip(self, chars=None):
2373 """
2374 For each element in `self`, return a copy with the leading characters
2375 removed.
2377 See also
2378 --------
2379 char.lstrip
2381 """
2382 return asarray(lstrip(self, chars))
2384 def partition(self, sep):
2385 """
2386 Partition each element in `self` around `sep`.
2388 See also
2389 --------
2390 partition
2391 """
2392 return asarray(partition(self, sep))
2394 def replace(self, old, new, count=None):
2395 """
2396 For each element in `self`, return a copy of the string with all
2397 occurrences of substring `old` replaced by `new`.
2399 See also
2400 --------
2401 char.replace
2403 """
2404 return asarray(replace(self, old, new, count))
2406 def rfind(self, sub, start=0, end=None):
2407 """
2408 For each element in `self`, return the highest index in the string
2409 where substring `sub` is found, such that `sub` is contained
2410 within [`start`, `end`].
2412 See also
2413 --------
2414 char.rfind
2416 """
2417 return rfind(self, sub, start, end)
2419 def rindex(self, sub, start=0, end=None):
2420 """
2421 Like `rfind`, but raises `ValueError` when the substring `sub` is
2422 not found.
2424 See also
2425 --------
2426 char.rindex
2428 """
2429 return rindex(self, sub, start, end)
2431 def rjust(self, width, fillchar=' '):
2432 """
2433 Return an array with the elements of `self`
2434 right-justified in a string of length `width`.
2436 See also
2437 --------
2438 char.rjust
2440 """
2441 return asarray(rjust(self, width, fillchar))
2443 def rpartition(self, sep):
2444 """
2445 Partition each element in `self` around `sep`.
2447 See also
2448 --------
2449 rpartition
2450 """
2451 return asarray(rpartition(self, sep))
2453 def rsplit(self, sep=None, maxsplit=None):
2454 """
2455 For each element in `self`, return a list of the words in
2456 the string, using `sep` as the delimiter string.
2458 See also
2459 --------
2460 char.rsplit
2462 """
2463 return rsplit(self, sep, maxsplit)
2465 def rstrip(self, chars=None):
2466 """
2467 For each element in `self`, return a copy with the trailing
2468 characters removed.
2470 See also
2471 --------
2472 char.rstrip
2474 """
2475 return asarray(rstrip(self, chars))
2477 def split(self, sep=None, maxsplit=None):
2478 """
2479 For each element in `self`, return a list of the words in the
2480 string, using `sep` as the delimiter string.
2482 See also
2483 --------
2484 char.split
2486 """
2487 return split(self, sep, maxsplit)
2489 def splitlines(self, keepends=None):
2490 """
2491 For each element in `self`, return a list of the lines in the
2492 element, breaking at line boundaries.
2494 See also
2495 --------
2496 char.splitlines
2498 """
2499 return splitlines(self, keepends)
2501 def startswith(self, prefix, start=0, end=None):
2502 """
2503 Returns a boolean array which is `True` where the string element
2504 in `self` starts with `prefix`, otherwise `False`.
2506 See also
2507 --------
2508 char.startswith
2510 """
2511 return startswith(self, prefix, start, end)
2513 def strip(self, chars=None):
2514 """
2515 For each element in `self`, return a copy with the leading and
2516 trailing characters removed.
2518 See also
2519 --------
2520 char.strip
2522 """
2523 return asarray(strip(self, chars))
2525 def swapcase(self):
2526 """
2527 For each element in `self`, return a copy of the string with
2528 uppercase characters converted to lowercase and vice versa.
2530 See also
2531 --------
2532 char.swapcase
2534 """
2535 return asarray(swapcase(self))
2537 def title(self):
2538 """
2539 For each element in `self`, return a titlecased version of the
2540 string: words start with uppercase characters, all remaining cased
2541 characters are lowercase.
2543 See also
2544 --------
2545 char.title
2547 """
2548 return asarray(title(self))
2550 def translate(self, table, deletechars=None):
2551 """
2552 For each element in `self`, return a copy of the string where
2553 all characters occurring in the optional argument
2554 `deletechars` are removed, and the remaining characters have
2555 been mapped through the given translation table.
2557 See also
2558 --------
2559 char.translate
2561 """
2562 return asarray(translate(self, table, deletechars))
2564 def upper(self):
2565 """
2566 Return an array with the elements of `self` converted to
2567 uppercase.
2569 See also
2570 --------
2571 char.upper
2573 """
2574 return asarray(upper(self))
2576 def zfill(self, width):
2577 """
2578 Return the numeric string left-filled with zeros in a string of
2579 length `width`.
2581 See also
2582 --------
2583 char.zfill
2585 """
2586 return asarray(zfill(self, width))
2588 def isnumeric(self):
2589 """
2590 For each element in `self`, return True if there are only
2591 numeric characters in the element.
2593 See also
2594 --------
2595 char.isnumeric
2597 """
2598 return isnumeric(self)
2600 def isdecimal(self):
2601 """
2602 For each element in `self`, return True if there are only
2603 decimal characters in the element.
2605 See also
2606 --------
2607 char.isdecimal
2609 """
2610 return isdecimal(self)
2613def array(obj, itemsize=None, copy=True, unicode=None, order=None):
2614 """
2615 Create a `chararray`.
2617 .. note::
2618 This class is provided for numarray backward-compatibility.
2619 New code (not concerned with numarray compatibility) should use
2620 arrays of type `string_` or `unicode_` and use the free functions
2621 in :mod:`numpy.char <numpy.core.defchararray>` for fast
2622 vectorized string operations instead.
2624 Versus a regular NumPy array of type `str` or `unicode`, this
2625 class adds the following functionality:
2627 1) values automatically have whitespace removed from the end
2628 when indexed
2630 2) comparison operators automatically remove whitespace from the
2631 end when comparing values
2633 3) vectorized string operations are provided as methods
2634 (e.g. `str.endswith`) and infix operators (e.g. ``+, *, %``)
2636 Parameters
2637 ----------
2638 obj : array of str or unicode-like
2640 itemsize : int, optional
2641 `itemsize` is the number of characters per scalar in the
2642 resulting array. If `itemsize` is None, and `obj` is an
2643 object array or a Python list, the `itemsize` will be
2644 automatically determined. If `itemsize` is provided and `obj`
2645 is of type str or unicode, then the `obj` string will be
2646 chunked into `itemsize` pieces.
2648 copy : bool, optional
2649 If true (default), then the object is copied. Otherwise, a copy
2650 will only be made if __array__ returns a copy, if obj is a
2651 nested sequence, or if a copy is needed to satisfy any of the other
2652 requirements (`itemsize`, unicode, `order`, etc.).
2654 unicode : bool, optional
2655 When true, the resulting `chararray` can contain Unicode
2656 characters, when false only 8-bit characters. If unicode is
2657 None and `obj` is one of the following:
2659 - a `chararray`,
2660 - an ndarray of type `str` or `unicode`
2661 - a Python str or unicode object,
2663 then the unicode setting of the output array will be
2664 automatically determined.
2666 order : {'C', 'F', 'A'}, optional
2667 Specify the order of the array. If order is 'C' (default), then the
2668 array will be in C-contiguous order (last-index varies the
2669 fastest). If order is 'F', then the returned array
2670 will be in Fortran-contiguous order (first-index varies the
2671 fastest). If order is 'A', then the returned array may
2672 be in any order (either C-, Fortran-contiguous, or even
2673 discontiguous).
2674 """
2675 if isinstance(obj, (bytes, str)):
2676 if unicode is None:
2677 if isinstance(obj, str):
2678 unicode = True
2679 else:
2680 unicode = False
2682 if itemsize is None:
2683 itemsize = len(obj)
2684 shape = len(obj) // itemsize
2686 return chararray(shape, itemsize=itemsize, unicode=unicode,
2687 buffer=obj, order=order)
2689 if isinstance(obj, (list, tuple)):
2690 obj = numpy.asarray(obj)
2692 if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):
2693 # If we just have a vanilla chararray, create a chararray
2694 # view around it.
2695 if not isinstance(obj, chararray):
2696 obj = obj.view(chararray)
2698 if itemsize is None:
2699 itemsize = obj.itemsize
2700 # itemsize is in 8-bit chars, so for Unicode, we need
2701 # to divide by the size of a single Unicode character,
2702 # which for NumPy is always 4
2703 if issubclass(obj.dtype.type, unicode_):
2704 itemsize //= 4
2706 if unicode is None:
2707 if issubclass(obj.dtype.type, unicode_):
2708 unicode = True
2709 else:
2710 unicode = False
2712 if unicode:
2713 dtype = unicode_
2714 else:
2715 dtype = string_
2717 if order is not None:
2718 obj = numpy.asarray(obj, order=order)
2719 if (copy or
2720 (itemsize != obj.itemsize) or
2721 (not unicode and isinstance(obj, unicode_)) or
2722 (unicode and isinstance(obj, string_))):
2723 obj = obj.astype((dtype, int(itemsize)))
2724 return obj
2726 if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):
2727 if itemsize is None:
2728 # Since no itemsize was specified, convert the input array to
2729 # a list so the ndarray constructor will automatically
2730 # determine the itemsize for us.
2731 obj = obj.tolist()
2732 # Fall through to the default case
2734 if unicode:
2735 dtype = unicode_
2736 else:
2737 dtype = string_
2739 if itemsize is None:
2740 val = narray(obj, dtype=dtype, order=order, subok=True)
2741 else:
2742 val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)
2743 return val.view(chararray)
2746def asarray(obj, itemsize=None, unicode=None, order=None):
2747 """
2748 Convert the input to a `chararray`, copying the data only if
2749 necessary.
2751 Versus a regular NumPy array of type `str` or `unicode`, this
2752 class adds the following functionality:
2754 1) values automatically have whitespace removed from the end
2755 when indexed
2757 2) comparison operators automatically remove whitespace from the
2758 end when comparing values
2760 3) vectorized string operations are provided as methods
2761 (e.g. `str.endswith`) and infix operators (e.g. ``+``, ``*``,``%``)
2763 Parameters
2764 ----------
2765 obj : array of str or unicode-like
2767 itemsize : int, optional
2768 `itemsize` is the number of characters per scalar in the
2769 resulting array. If `itemsize` is None, and `obj` is an
2770 object array or a Python list, the `itemsize` will be
2771 automatically determined. If `itemsize` is provided and `obj`
2772 is of type str or unicode, then the `obj` string will be
2773 chunked into `itemsize` pieces.
2775 unicode : bool, optional
2776 When true, the resulting `chararray` can contain Unicode
2777 characters, when false only 8-bit characters. If unicode is
2778 None and `obj` is one of the following:
2780 - a `chararray`,
2781 - an ndarray of type `str` or 'unicode`
2782 - a Python str or unicode object,
2784 then the unicode setting of the output array will be
2785 automatically determined.
2787 order : {'C', 'F'}, optional
2788 Specify the order of the array. If order is 'C' (default), then the
2789 array will be in C-contiguous order (last-index varies the
2790 fastest). If order is 'F', then the returned array
2791 will be in Fortran-contiguous order (first-index varies the
2792 fastest).
2793 """
2794 return array(obj, itemsize, copy=False,
2795 unicode=unicode, order=order)