Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

"""Implementation of JSONDecoder 

""" 

import re 

import sys 

import struct 

 

from json import scanner 

try: 

    from _json import scanstring as c_scanstring 

except ImportError: 

    c_scanstring = None 

 

__all__ = ['JSONDecoder'] 

 

FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 

 

def _floatconstants(): 

    _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') 

    if sys.byteorder != 'big': 

        _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] 

    nan, inf = struct.unpack('dd', _BYTES) 

    return nan, inf, -inf 

 

NaN, PosInf, NegInf = _floatconstants() 

 

 

def linecol(doc, pos): 

    lineno = doc.count('\n', 0, pos) + 1 

    if lineno == 1: 

        colno = pos + 1 

    else: 

        colno = pos - doc.rindex('\n', 0, pos) 

    return lineno, colno 

 

 

def errmsg(msg, doc, pos, end=None): 

    # Note that this function is called from _json 

    lineno, colno = linecol(doc, pos) 

    if end is None: 

        fmt = '{0}: line {1} column {2} (char {3})' 

        return fmt.format(msg, lineno, colno, pos) 

        #fmt = '%s: line %d column %d (char %d)' 

        #return fmt % (msg, lineno, colno, pos) 

    endlineno, endcolno = linecol(doc, end) 

    fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' 

    return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) 

    #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' 

    #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) 

 

 

_CONSTANTS = { 

    '-Infinity': NegInf, 

    'Infinity': PosInf, 

    'NaN': NaN, 

} 

 

STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 

BACKSLASH = { 

    '"': u'"', '\\': u'\\', '/': u'/', 

    'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', 

} 

 

DEFAULT_ENCODING = "utf-8" 

 

def py_scanstring(s, end, encoding=None, strict=True, 

        _b=BACKSLASH, _m=STRINGCHUNK.match): 

    """Scan the string s for a JSON string. End is the index of the 

    character in s after the quote that started the JSON string. 

    Unescapes all valid JSON string escape sequences and raises ValueError 

    on attempt to decode an invalid string. If strict is False then literal 

    control characters are allowed in the string. 

 

    Returns a tuple of the decoded string and the index of the character in s 

    after the end quote.""" 

    if encoding is None: 

        encoding = DEFAULT_ENCODING 

    chunks = [] 

    _append = chunks.append 

    begin = end - 1 

    while 1: 

        chunk = _m(s, end) 

        if chunk is None: 

            raise ValueError( 

                errmsg("Unterminated string starting at", s, begin)) 

        end = chunk.end() 

        content, terminator = chunk.groups() 

        # Content is contains zero or more unescaped string characters 

        if content: 

            if not isinstance(content, unicode): 

                content = unicode(content, encoding) 

            _append(content) 

        # Terminator is the end of string, a literal control character, 

        # or a backslash denoting that an escape sequence follows 

        if terminator == '"': 

            break 

        elif terminator != '\\': 

            if strict: 

                #msg = "Invalid control character %r at" % (terminator,) 

                msg = "Invalid control character {0!r} at".format(terminator) 

                raise ValueError(errmsg(msg, s, end)) 

            else: 

                _append(terminator) 

                continue 

        try: 

            esc = s[end] 

        except IndexError: 

            raise ValueError( 

                errmsg("Unterminated string starting at", s, begin)) 

        # If not a unicode escape sequence, must be in the lookup table 

        if esc != 'u': 

            try: 

                char = _b[esc] 

            except KeyError: 

                msg = "Invalid \\escape: " + repr(esc) 

                raise ValueError(errmsg(msg, s, end)) 

            end += 1 

        else: 

            # Unicode escape sequence 

            esc = s[end + 1:end + 5] 

            next_end = end + 5 

            if len(esc) != 4: 

                msg = "Invalid \\uXXXX escape" 

                raise ValueError(errmsg(msg, s, end)) 

            uni = int(esc, 16) 

            # Check for surrogate pair on UCS-4 systems 

            if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: 

                msg = "Invalid \\uXXXX\\uXXXX surrogate pair" 

                if not s[end + 5:end + 7] == '\\u': 

                    raise ValueError(errmsg(msg, s, end)) 

                esc2 = s[end + 7:end + 11] 

                if len(esc2) != 4: 

                    raise ValueError(errmsg(msg, s, end)) 

                uni2 = int(esc2, 16) 

                uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) 

                next_end += 6 

            char = unichr(uni) 

            end = next_end 

        # Append the unescaped character 

        _append(char) 

    return u''.join(chunks), end 

 

 

# Use speedup if available 

scanstring = c_scanstring or py_scanstring 

 

WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) 

WHITESPACE_STR = ' \t\n\r' 

 

def JSONObject(s_and_end, encoding, strict, scan_once, object_hook, 

               object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 

    s, end = s_and_end 

    pairs = [] 

    pairs_append = pairs.append 

    # Use a slice to prevent IndexError from being raised, the following 

    # check will raise a more specific ValueError if the string is empty 

    nextchar = s[end:end + 1] 

    # Normally we expect nextchar == '"' 

    if nextchar != '"': 

        if nextchar in _ws: 

            end = _w(s, end).end() 

            nextchar = s[end:end + 1] 

        # Trivial empty object 

        if nextchar == '}': 

            if object_pairs_hook is not None: 

                result = object_pairs_hook(pairs) 

                return result, end + 1 

            pairs = {} 

            if object_hook is not None: 

                pairs = object_hook(pairs) 

            return pairs, end + 1 

        elif nextchar != '"': 

            raise ValueError(errmsg( 

                "Expecting property name enclosed in double quotes", s, end)) 

    end += 1 

    while True: 

        key, end = scanstring(s, end, encoding, strict) 

 

        # To skip some function call overhead we optimize the fast paths where 

        # the JSON key separator is ": " or just ":". 

        if s[end:end + 1] != ':': 

            end = _w(s, end).end() 

            if s[end:end + 1] != ':': 

                raise ValueError(errmsg("Expecting ':' delimiter", s, end)) 

        end += 1 

 

        try: 

            if s[end] in _ws: 

                end += 1 

                if s[end] in _ws: 

                    end = _w(s, end + 1).end() 

        except IndexError: 

            pass 

 

        try: 

            value, end = scan_once(s, end) 

        except StopIteration: 

            raise ValueError(errmsg("Expecting object", s, end)) 

        pairs_append((key, value)) 

 

        try: 

            nextchar = s[end] 

            if nextchar in _ws: 

                end = _w(s, end + 1).end() 

                nextchar = s[end] 

        except IndexError: 

            nextchar = '' 

        end += 1 

 

        if nextchar == '}': 

            break 

        elif nextchar != ',': 

            raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) 

 

        try: 

            nextchar = s[end] 

            if nextchar in _ws: 

                end += 1 

                nextchar = s[end] 

                if nextchar in _ws: 

                    end = _w(s, end + 1).end() 

                    nextchar = s[end] 

        except IndexError: 

            nextchar = '' 

 

        end += 1 

        if nextchar != '"': 

            raise ValueError(errmsg( 

                "Expecting property name enclosed in double quotes", s, end - 1)) 

    if object_pairs_hook is not None: 

        result = object_pairs_hook(pairs) 

        return result, end 

    pairs = dict(pairs) 

    if object_hook is not None: 

        pairs = object_hook(pairs) 

    return pairs, end 

 

def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 

    s, end = s_and_end 

    values = [] 

    nextchar = s[end:end + 1] 

    if nextchar in _ws: 

        end = _w(s, end + 1).end() 

        nextchar = s[end:end + 1] 

    # Look-ahead for trivial empty array 

    if nextchar == ']': 

        return values, end + 1 

    _append = values.append 

    while True: 

        try: 

            value, end = scan_once(s, end) 

        except StopIteration: 

            raise ValueError(errmsg("Expecting object", s, end)) 

        _append(value) 

        nextchar = s[end:end + 1] 

        if nextchar in _ws: 

            end = _w(s, end + 1).end() 

            nextchar = s[end:end + 1] 

        end += 1 

        if nextchar == ']': 

            break 

        elif nextchar != ',': 

            raise ValueError(errmsg("Expecting ',' delimiter", s, end)) 

        try: 

            if s[end] in _ws: 

                end += 1 

                if s[end] in _ws: 

                    end = _w(s, end + 1).end() 

        except IndexError: 

            pass 

 

    return values, end 

 

class JSONDecoder(object): 

    """Simple JSON <http://json.org> decoder 

 

    Performs the following translations in decoding by default: 

 

    +---------------+-------------------+ 

    | JSON          | Python            | 

    +===============+===================+ 

    | object        | dict              | 

    +---------------+-------------------+ 

    | array         | list              | 

    +---------------+-------------------+ 

    | string        | unicode           | 

    +---------------+-------------------+ 

    | number (int)  | int, long         | 

    +---------------+-------------------+ 

    | number (real) | float             | 

    +---------------+-------------------+ 

    | true          | True              | 

    +---------------+-------------------+ 

    | false         | False             | 

    +---------------+-------------------+ 

    | null          | None              | 

    +---------------+-------------------+ 

 

    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 

    their corresponding ``float`` values, which is outside the JSON spec. 

 

    """ 

 

    def __init__(self, encoding=None, object_hook=None, parse_float=None, 

            parse_int=None, parse_constant=None, strict=True, 

            object_pairs_hook=None): 

        """``encoding`` determines the encoding used to interpret any ``str`` 

        objects decoded by this instance (utf-8 by default).  It has no 

        effect when decoding ``unicode`` objects. 

 

        Note that currently only encodings that are a superset of ASCII work, 

        strings of other encodings should be passed in as ``unicode``. 

 

        ``object_hook``, if specified, will be called with the result 

        of every JSON object decoded and its return value will be used in 

        place of the given ``dict``.  This can be used to provide custom 

        deserializations (e.g. to support JSON-RPC class hinting). 

 

        ``object_pairs_hook``, if specified will be called with the result of 

        every JSON object decoded with an ordered list of pairs.  The return 

        value of ``object_pairs_hook`` will be used instead of the ``dict``. 

        This feature can be used to implement custom decoders that rely on the 

        order that the key and value pairs are decoded (for example, 

        collections.OrderedDict will remember the order of insertion). If 

        ``object_hook`` is also defined, the ``object_pairs_hook`` takes 

        priority. 

 

        ``parse_float``, if specified, will be called with the string 

        of every JSON float to be decoded. By default this is equivalent to 

        float(num_str). This can be used to use another datatype or parser 

        for JSON floats (e.g. decimal.Decimal). 

 

        ``parse_int``, if specified, will be called with the string 

        of every JSON int to be decoded. By default this is equivalent to 

        int(num_str). This can be used to use another datatype or parser 

        for JSON integers (e.g. float). 

 

        ``parse_constant``, if specified, will be called with one of the 

        following strings: -Infinity, Infinity, NaN. 

        This can be used to raise an exception if invalid JSON numbers 

        are encountered. 

 

        If ``strict`` is false (true is the default), then control 

        characters will be allowed inside strings.  Control characters in 

        this context are those with character codes in the 0-31 range, 

        including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. 

 

        """ 

        self.encoding = encoding 

        self.object_hook = object_hook 

        self.object_pairs_hook = object_pairs_hook 

        self.parse_float = parse_float or float 

        self.parse_int = parse_int or int 

        self.parse_constant = parse_constant or _CONSTANTS.__getitem__ 

        self.strict = strict 

        self.parse_object = JSONObject 

        self.parse_array = JSONArray 

        self.parse_string = scanstring 

        self.scan_once = scanner.make_scanner(self) 

 

    def decode(self, s, _w=WHITESPACE.match): 

        """Return the Python representation of ``s`` (a ``str`` or ``unicode`` 

        instance containing a JSON document) 

 

        """ 

        obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 

        end = _w(s, end).end() 

        if end != len(s): 

            raise ValueError(errmsg("Extra data", s, end, len(s))) 

        return obj 

 

    def raw_decode(self, s, idx=0): 

        """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` 

        beginning with a JSON document) and return a 2-tuple of the Python 

        representation and the index in ``s`` where the document ended. 

 

        This can be used to decode a JSON document from a string that may 

        have extraneous data at the end. 

 

        """ 

        try: 

            obj, end = self.scan_once(s, idx) 

        except StopIteration: 

            raise ValueError("No JSON object could be decoded") 

        return obj, end