Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/PyPDF2/utils.py : 36%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright (c) 2006, Mathieu Fenniak
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8# * Redistributions of source code must retain the above copyright notice,
9# this list of conditions and the following disclaimer.
10# * Redistributions in binary form must reproduce the above copyright notice,
11# this list of conditions and the following disclaimer in the documentation
12# and/or other materials provided with the distribution.
13# * The name of the author may not be used to endorse or promote products
14# derived from this software without specific prior written permission.
15#
16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26# POSSIBILITY OF SUCH DAMAGE.
28"""
29Utility functions for PDF library.
30"""
31__author__ = "Mathieu Fenniak"
32__author_email__ = "biziqe@mathieu.fenniak.net"
35import sys
37try:
38 import __builtin__ as builtins
39except ImportError: # Py3
40 import builtins
43xrange_fn = getattr(builtins, "xrange", range)
44_basestring = getattr(builtins, "basestring", str)
46bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
47string_type = getattr(builtins, "unicode", str)
48int_types = (int, long) if sys.version_info[0] < 3 else (int,)
51# Make basic type tests more consistent
52def isString(s):
53 """Test if arg is a string. Compatible with Python 2 and 3."""
54 return isinstance(s, _basestring)
57def isInt(n):
58 """Test if arg is an int. Compatible with Python 2 and 3."""
59 return isinstance(n, int_types)
62def isBytes(b):
63 """Test if arg is a bytes instance. Compatible with Python 2 and 3."""
64 return isinstance(b, bytes_type)
67#custom implementation of warnings.formatwarning
68def formatWarning(message, category, filename, lineno, line=None):
69 file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
70 return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
73def readUntilWhitespace(stream, maxchars=None):
74 """
75 Reads non-whitespace characters and returns them.
76 Stops upon encountering whitespace or when maxchars is reached.
77 """
78 txt = b_("")
79 while True:
80 tok = stream.read(1)
81 if tok.isspace() or not tok:
82 break
83 txt += tok
84 if len(txt) == maxchars:
85 break
86 return txt
89def readNonWhitespace(stream):
90 """
91 Finds and reads the next non-whitespace character (ignores whitespace).
92 """
93 tok = WHITESPACES[0]
94 while tok in WHITESPACES:
95 tok = stream.read(1)
96 return tok
99def skipOverWhitespace(stream):
100 """
101 Similar to readNonWhitespace, but returns a Boolean if more than
102 one whitespace character was read.
103 """
104 tok = WHITESPACES[0]
105 cnt = 0;
106 while tok in WHITESPACES:
107 tok = stream.read(1)
108 cnt+=1
109 return (cnt > 1)
112def skipOverComment(stream):
113 tok = stream.read(1)
114 stream.seek(-1, 1)
115 if tok == b_('%'):
116 while tok not in (b_('\n'), b_('\r')):
117 tok = stream.read(1)
120def readUntilRegex(stream, regex, ignore_eof=False):
121 """
122 Reads until the regular expression pattern matched (ignore the match)
123 Raise PdfStreamError on premature end-of-file.
124 :param bool ignore_eof: If true, ignore end-of-line and return immediately
125 """
126 name = b_('')
127 while True:
128 tok = stream.read(16)
129 if not tok:
130 # stream has truncated prematurely
131 if ignore_eof == True:
132 return name
133 else:
134 raise PdfStreamError("Stream has ended unexpectedly")
135 m = regex.search(tok)
136 if m is not None:
137 name += tok[:m.start()]
138 stream.seek(m.start()-len(tok), 1)
139 break
140 name += tok
141 return name
144class ConvertFunctionsToVirtualList(object):
145 def __init__(self, lengthFunction, getFunction):
146 self.lengthFunction = lengthFunction
147 self.getFunction = getFunction
149 def __len__(self):
150 return self.lengthFunction()
152 def __getitem__(self, index):
153 if isinstance(index, slice):
154 indices = xrange_fn(*index.indices(len(self)))
155 cls = type(self)
156 return cls(indices.__len__, lambda idx: self[indices[idx]])
157 if not isInt(index):
158 raise TypeError("sequence indices must be integers")
159 len_self = len(self)
160 if index < 0:
161 # support negative indexes
162 index = len_self + index
163 if index < 0 or index >= len_self:
164 raise IndexError("sequence index out of range")
165 return self.getFunction(index)
168def RC4_encrypt(key, plaintext):
169 S = [i for i in range(256)]
170 j = 0
171 for i in range(256):
172 j = (j + S[i] + ord_(key[i % len(key)])) % 256
173 S[i], S[j] = S[j], S[i]
174 i, j = 0, 0
175 retval = b_("")
176 for x in range(len(plaintext)):
177 i = (i + 1) % 256
178 j = (j + S[i]) % 256
179 S[i], S[j] = S[j], S[i]
180 t = S[(S[i] + S[j]) % 256]
181 retval += b_(chr(ord_(plaintext[x]) ^ t))
182 return retval
185def matrixMultiply(a, b):
186 return [[sum([float(i)*float(j)
187 for i, j in zip(row, col)]
188 ) for col in zip(*b)]
189 for row in a]
192def markLocation(stream):
193 """Creates text file showing current location in context."""
194 # Mainly for debugging
195 RADIUS = 5000
196 stream.seek(-RADIUS, 1)
197 outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
198 outputDoc.write(stream.read(RADIUS))
199 outputDoc.write('HERE')
200 outputDoc.write(stream.read(RADIUS))
201 outputDoc.close()
202 stream.seek(-RADIUS, 1)
205class PyPdfError(Exception):
206 pass
209class PdfReadError(PyPdfError):
210 pass
213class PageSizeNotDefinedError(PyPdfError):
214 pass
217class PdfReadWarning(UserWarning):
218 pass
221class PdfStreamError(PdfReadError):
222 pass
225if sys.version_info[0] < 3:
226 def b_(s):
227 return s
228else:
229 B_CACHE = {}
231 def b_(s):
232 bc = B_CACHE
233 if s in bc:
234 return bc[s]
235 if type(s) == bytes:
236 return s
237 else:
238 r = s.encode('latin-1')
239 if len(s) < 2:
240 bc[s] = r
241 return r
244def u_(s):
245 if sys.version_info[0] < 3:
246 return unicode(s, 'unicode_escape')
247 else:
248 return s
251def str_(b):
252 if sys.version_info[0] < 3:
253 return b
254 else:
255 if type(b) == bytes:
256 return b.decode('latin-1')
257 else:
258 return b
261def ord_(b):
262 if sys.version_info[0] < 3 or type(b) == str:
263 return ord(b)
264 else:
265 return b
268def chr_(c):
269 if sys.version_info[0] < 3:
270 return c
271 else:
272 return chr(c)
275def barray(b):
276 if sys.version_info[0] < 3:
277 return b
278 else:
279 return bytearray(b)
282def hexencode(b):
283 if sys.version_info[0] < 3:
284 return b.encode('hex')
285 else:
286 import codecs
287 coder = codecs.getencoder('hex_codec')
288 return coder(b)[0]
291def hexStr(num):
292 return hex(num).replace('L', '')
295WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]