Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/scipy/sparse/csr.py : 23%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Compressed Sparse Row matrix format"""
3__docformat__ = "restructuredtext en"
5__all__ = ['csr_matrix', 'isspmatrix_csr']
7import numpy as np
9from .base import spmatrix
10from ._sparsetools import (csr_tocsc, csr_tobsr, csr_count_blocks,
11 get_csr_submatrix)
12from .sputils import upcast, get_index_dtype
14from .compressed import _cs_matrix
17class csr_matrix(_cs_matrix):
18 """
19 Compressed Sparse Row matrix
21 This can be instantiated in several ways:
22 csr_matrix(D)
23 with a dense matrix or rank-2 ndarray D
25 csr_matrix(S)
26 with another sparse matrix S (equivalent to S.tocsr())
28 csr_matrix((M, N), [dtype])
29 to construct an empty matrix with shape (M, N)
30 dtype is optional, defaulting to dtype='d'.
32 csr_matrix((data, (row_ind, col_ind)), [shape=(M, N)])
33 where ``data``, ``row_ind`` and ``col_ind`` satisfy the
34 relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
36 csr_matrix((data, indices, indptr), [shape=(M, N)])
37 is the standard CSR representation where the column indices for
38 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their
39 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
40 If the shape parameter is not supplied, the matrix dimensions
41 are inferred from the index arrays.
43 Attributes
44 ----------
45 dtype : dtype
46 Data type of the matrix
47 shape : 2-tuple
48 Shape of the matrix
49 ndim : int
50 Number of dimensions (this is always 2)
51 nnz
52 Number of stored values, including explicit zeros
53 data
54 CSR format data array of the matrix
55 indices
56 CSR format index array of the matrix
57 indptr
58 CSR format index pointer array of the matrix
59 has_sorted_indices
60 Whether indices are sorted
62 Notes
63 -----
65 Sparse matrices can be used in arithmetic operations: they support
66 addition, subtraction, multiplication, division, and matrix power.
68 Advantages of the CSR format
69 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.
70 - efficient row slicing
71 - fast matrix vector products
73 Disadvantages of the CSR format
74 - slow column slicing operations (consider CSC)
75 - changes to the sparsity structure are expensive (consider LIL or DOK)
77 Examples
78 --------
80 >>> import numpy as np
81 >>> from scipy.sparse import csr_matrix
82 >>> csr_matrix((3, 4), dtype=np.int8).toarray()
83 array([[0, 0, 0, 0],
84 [0, 0, 0, 0],
85 [0, 0, 0, 0]], dtype=int8)
87 >>> row = np.array([0, 0, 1, 2, 2, 2])
88 >>> col = np.array([0, 2, 2, 0, 1, 2])
89 >>> data = np.array([1, 2, 3, 4, 5, 6])
90 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
91 array([[1, 0, 2],
92 [0, 0, 3],
93 [4, 5, 6]])
95 >>> indptr = np.array([0, 2, 3, 6])
96 >>> indices = np.array([0, 2, 2, 0, 1, 2])
97 >>> data = np.array([1, 2, 3, 4, 5, 6])
98 >>> csr_matrix((data, indices, indptr), shape=(3, 3)).toarray()
99 array([[1, 0, 2],
100 [0, 0, 3],
101 [4, 5, 6]])
103 Duplicate entries are summed together:
105 >>> row = np.array([0, 1, 2, 0])
106 >>> col = np.array([0, 1, 1, 0])
107 >>> data = np.array([1, 2, 4, 8])
108 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
109 array([[9, 0, 0],
110 [0, 2, 0],
111 [0, 4, 0]])
113 As an example of how to construct a CSR matrix incrementally,
114 the following snippet builds a term-document matrix from texts:
116 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]
117 >>> indptr = [0]
118 >>> indices = []
119 >>> data = []
120 >>> vocabulary = {}
121 >>> for d in docs:
122 ... for term in d:
123 ... index = vocabulary.setdefault(term, len(vocabulary))
124 ... indices.append(index)
125 ... data.append(1)
126 ... indptr.append(len(indices))
127 ...
128 >>> csr_matrix((data, indices, indptr), dtype=int).toarray()
129 array([[2, 1, 0, 0],
130 [0, 1, 1, 1]])
132 """
133 format = 'csr'
135 def transpose(self, axes=None, copy=False):
136 if axes is not None:
137 raise ValueError(("Sparse matrices do not support "
138 "an 'axes' parameter because swapping "
139 "dimensions is the only logical permutation."))
141 M, N = self.shape
143 from .csc import csc_matrix
144 return csc_matrix((self.data, self.indices,
145 self.indptr), shape=(N, M), copy=copy)
147 transpose.__doc__ = spmatrix.transpose.__doc__
149 def tolil(self, copy=False):
150 from .lil import lil_matrix
151 lil = lil_matrix(self.shape,dtype=self.dtype)
153 self.sum_duplicates()
154 ptr,ind,dat = self.indptr,self.indices,self.data
155 rows, data = lil.rows, lil.data
157 for n in range(self.shape[0]):
158 start = ptr[n]
159 end = ptr[n+1]
160 rows[n] = ind[start:end].tolist()
161 data[n] = dat[start:end].tolist()
163 return lil
165 tolil.__doc__ = spmatrix.tolil.__doc__
167 def tocsr(self, copy=False):
168 if copy:
169 return self.copy()
170 else:
171 return self
173 tocsr.__doc__ = spmatrix.tocsr.__doc__
175 def tocsc(self, copy=False):
176 idx_dtype = get_index_dtype((self.indptr, self.indices),
177 maxval=max(self.nnz, self.shape[0]))
178 indptr = np.empty(self.shape[1] + 1, dtype=idx_dtype)
179 indices = np.empty(self.nnz, dtype=idx_dtype)
180 data = np.empty(self.nnz, dtype=upcast(self.dtype))
182 csr_tocsc(self.shape[0], self.shape[1],
183 self.indptr.astype(idx_dtype),
184 self.indices.astype(idx_dtype),
185 self.data,
186 indptr,
187 indices,
188 data)
190 from .csc import csc_matrix
191 A = csc_matrix((data, indices, indptr), shape=self.shape)
192 A.has_sorted_indices = True
193 return A
195 tocsc.__doc__ = spmatrix.tocsc.__doc__
197 def tobsr(self, blocksize=None, copy=True):
198 from .bsr import bsr_matrix
200 if blocksize is None:
201 from .spfuncs import estimate_blocksize
202 return self.tobsr(blocksize=estimate_blocksize(self))
204 elif blocksize == (1,1):
205 arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr)
206 return bsr_matrix(arg1, shape=self.shape, copy=copy)
208 else:
209 R,C = blocksize
210 M,N = self.shape
212 if R < 1 or C < 1 or M % R != 0 or N % C != 0:
213 raise ValueError('invalid blocksize %s' % blocksize)
215 blks = csr_count_blocks(M,N,R,C,self.indptr,self.indices)
217 idx_dtype = get_index_dtype((self.indptr, self.indices),
218 maxval=max(N//C, blks))
219 indptr = np.empty(M//R+1, dtype=idx_dtype)
220 indices = np.empty(blks, dtype=idx_dtype)
221 data = np.zeros((blks,R,C), dtype=self.dtype)
223 csr_tobsr(M, N, R, C,
224 self.indptr.astype(idx_dtype),
225 self.indices.astype(idx_dtype),
226 self.data,
227 indptr, indices, data.ravel())
229 return bsr_matrix((data,indices,indptr), shape=self.shape)
231 tobsr.__doc__ = spmatrix.tobsr.__doc__
233 # these functions are used by the parent class (_cs_matrix)
234 # to remove redudancy between csc_matrix and csr_matrix
235 def _swap(self, x):
236 """swap the members of x if this is a column-oriented matrix
237 """
238 return x
240 def __iter__(self):
241 indptr = np.zeros(2, dtype=self.indptr.dtype)
242 shape = (1, self.shape[1])
243 i0 = 0
244 for i1 in self.indptr[1:]:
245 indptr[1] = i1 - i0
246 indices = self.indices[i0:i1]
247 data = self.data[i0:i1]
248 yield csr_matrix((data, indices, indptr), shape=shape, copy=True)
249 i0 = i1
251 def getrow(self, i):
252 """Returns a copy of row i of the matrix, as a (1 x n)
253 CSR matrix (row vector).
254 """
255 M, N = self.shape
256 i = int(i)
257 if i < 0:
258 i += M
259 if i < 0 or i >= M:
260 raise IndexError('index (%d) out of range' % i)
261 indptr, indices, data = get_csr_submatrix(
262 M, N, self.indptr, self.indices, self.data, i, i + 1, 0, N)
263 return csr_matrix((data, indices, indptr), shape=(1, N),
264 dtype=self.dtype, copy=False)
266 def getcol(self, i):
267 """Returns a copy of column i of the matrix, as a (m x 1)
268 CSR matrix (column vector).
269 """
270 M, N = self.shape
271 i = int(i)
272 if i < 0:
273 i += N
274 if i < 0 or i >= N:
275 raise IndexError('index (%d) out of range' % i)
276 indptr, indices, data = get_csr_submatrix(
277 M, N, self.indptr, self.indices, self.data, 0, M, i, i + 1)
278 return csr_matrix((data, indices, indptr), shape=(M, 1),
279 dtype=self.dtype, copy=False)
281 def _get_intXarray(self, row, col):
282 return self.getrow(row)._minor_index_fancy(col)
284 def _get_intXslice(self, row, col):
285 if col.step in (1, None):
286 return self._get_submatrix(row, col, copy=True)
287 # TODO: uncomment this once it's faster:
288 # return self.getrow(row)._minor_slice(col)
290 M, N = self.shape
291 start, stop, stride = col.indices(N)
293 ii, jj = self.indptr[row:row+2]
294 row_indices = self.indices[ii:jj]
295 row_data = self.data[ii:jj]
297 if stride > 0:
298 ind = (row_indices >= start) & (row_indices < stop)
299 else:
300 ind = (row_indices <= start) & (row_indices > stop)
302 if abs(stride) > 1:
303 ind &= (row_indices - start) % stride == 0
305 row_indices = (row_indices[ind] - start) // stride
306 row_data = row_data[ind]
307 row_indptr = np.array([0, len(row_indices)])
309 if stride < 0:
310 row_data = row_data[::-1]
311 row_indices = abs(row_indices[::-1])
313 shape = (1, int(np.ceil(float(stop - start) / stride)))
314 return csr_matrix((row_data, row_indices, row_indptr), shape=shape,
315 dtype=self.dtype, copy=False)
317 def _get_sliceXint(self, row, col):
318 if row.step in (1, None):
319 return self._get_submatrix(row, col, copy=True)
320 return self._major_slice(row)._get_submatrix(minor=col)
322 def _get_sliceXarray(self, row, col):
323 return self._major_slice(row)._minor_index_fancy(col)
325 def _get_arrayXint(self, row, col):
326 return self._major_index_fancy(row)._get_submatrix(minor=col)
328 def _get_arrayXslice(self, row, col):
329 if col.step not in (1, None):
330 col = np.arange(*col.indices(self.shape[1]))
331 return self._get_arrayXarray(row, col)
332 return self._major_index_fancy(row)._get_submatrix(minor=col)
335def isspmatrix_csr(x):
336 """Is x of csr_matrix type?
338 Parameters
339 ----------
340 x
341 object to check for being a csr matrix
343 Returns
344 -------
345 bool
346 True if x is a csr matrix, False otherwise
348 Examples
349 --------
350 >>> from scipy.sparse import csr_matrix, isspmatrix_csr
351 >>> isspmatrix_csr(csr_matrix([[5]]))
352 True
354 >>> from scipy.sparse import csc_matrix, csr_matrix, isspmatrix_csc
355 >>> isspmatrix_csr(csc_matrix([[5]]))
356 False
357 """
358 return isinstance(x, csr_matrix)