Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

1001

1002

1003

1004

1005

1006

1007

1008

1009

1010

1011

1012

1013

1014

1015

1016

1017

1018

1019

1020

1021

1022

1023

1024

1025

1026

1027

1028

1029

1030

1031

1032

1033

1034

1035

1036

1037

1038

1039

1040

1041

1042

1043

1044

1045

1046

1047

1048

1049

1050

1051

1052

1053

1054

1055

1056

1057

1058

1059

1060

1061

1062

1063

1064

1065

1066

1067

1068

1069

1070

1071

1072

1073

1074

1075

1076

1077

1078

1079

1080

1081

1082

1083

1084

1085

1086

1087

1088

1089

1090

1091

1092

1093

1094

1095

1096

1097

1098

1099

1100

1101

1102

1103

1104

1105

1106

1107

1108

1109

1110

1111

1112

1113

1114

1115

1116

1117

1118

1119

1120

1121

1122

1123

1124

1125

1126

1127

1128

1129

1130

1131

1132

1133

1134

1135

1136

1137

1138

1139

1140

1141

1142

1143

1144

1145

1146

1147

1148

1149

1150

1151

1152

1153

1154

1155

1156

1157

1158

1159

1160

1161

1162

1163

1164

1165

1166

1167

1168

1169

1170

1171

1172

1173

1174

1175

1176

1177

1178

1179

1180

1181

1182

1183

1184

1185

1186

1187

1188

1189

1190

1191

1192

1193

1194

1195

1196

1197

1198

1199

1200

1201

1202

1203

1204

1205

1206

1207

1208

1209

1210

1211

1212

1213

1214

1215

1216

1217

1218

1219

1220

1221

1222

1223

1224

1225

1226

1227

1228

1229

1230

1231

1232

1233

1234

1235

1236

1237

1238

1239

1240

1241

1242

1243

1244

1245

1246

1247

1248

1249

1250

1251

1252

1253

1254

1255

1256

1257

1258

1259

1260

1261

1262

1263

1264

1265

1266

1267

1268

1269

1270

1271

1272

1273

1274

1275

1276

1277

1278

1279

1280

1281

1282

1283

1284

1285

1286

1287

1288

1289

1290

1291

1292

1293

1294

1295

1296

1297

1298

1299

1300

1301

1302

1303

1304

1305

1306

1307

1308

1309

1310

1311

1312

1313

1314

1315

1316

1317

1318

1319

1320

1321

1322

1323

1324

1325

1326

1327

1328

1329

1330

1331

1332

1333

1334

1335

1336

1337

1338

1339

1340

1341

1342

1343

1344

1345

1346

1347

1348

1349

1350

1351

1352

1353

1354

1355

1356

1357

1358

1359

1360

1361

1362

1363

1364

1365

1366

1367

1368

#! python 

#-*- coding: utf-8 -*- 

# 

# Copyright 2013-2015 European Commission (JRC); 

# Licensed under the EUPL (the 'Licence'); 

# You may not use this work except in compliance with the Licence. 

# You may obtain a copy of the Licence at: http://ec.europa.eu/idabc/eupl 

 

""" 

A :dfn:`pandas-model` is a tree of strings, numbers, sequences, dicts, pandas instances and resolvable 

URI-references, implemented by :class:`Pandel`. 

""" 

 

from __future__ import division, unicode_literals 

 

import abc 

import binascii 

from collections import Mapping, Sequence, OrderedDict, namedtuple 

from json.decoder import JSONDecoder 

from json.encoder import JSONEncoder 

import numbers 

import pickle 

import re 

 

from jsonschema import Draft3Validator, Draft4Validator, ValidationError 

import jsonschema 

from jsonschema.exceptions import SchemaError, RefResolutionError 

from pandas.core.generic import NDFrame 

from past.builtins import basestring 

 

import numpy as np 

import pandas as pd 

 

 

try: 

    from unittest.mock import MagicMock 

except ImportError: 

    from mock import MagicMock  # @UnusedImport 

 

 

try: 

    from urllib.parse import urljoin 

except ImportError: 

    from urlparse import urljoin 

 

__commit__ = "" 

 

_value_with_units_regex = re.compile(r'''^\s* 

                                        (?P<name>[^[<]*?)   # column-name 

                                        \s* 

                                        (?P<units>          # start parenthesized-units optional-group 

                                            \[              # units enclosed in [] 

                                                [^\]]* 

                                            \] 

                                            | 

                                            <              # units enclosed in <> 

                                                [^)]* 

                                            > 

                                        )?                  # end parenthesized-units 

                                        \s*$''', re.X) 

_units_cleaner_regex = re.compile(r'^[[<]|[\]>]$') 

 

 

"""An item-descriptor with units, i.e. used as a table-column header.""" 

_U = namedtuple('United', ('name', 'units')) 

 

 

def parse_value_with_units(arg): 

    """ 

    Parses *name-units* pairs (i.e. used as a table-column header). 

 

    :return:    a United(name, units) named-tuple, or `None` if bad syntax; 

                note that ``name=''`` but ``units=None`` when missing. 

 

    Examples:: 

 

        >>> parse_value_with_units('value [units]') 

        United(name='value', units='units') 

 

        >>> parse_value_with_units('foo   bar  <bar/krow>') 

        United(name='foo   bar', units='bar/krow') 

 

        >>> parse_value_with_units('no units') 

        United(name='no units', units=None) 

 

        >>> parse_value_with_units('') 

        United(name='', units=None) 

 

    But notice:: 

 

        >>> assert parse_value_with_units('ok but [bad units') is None 

 

        >>> parse_value_with_units('<only units>') 

        United(name='', units='only units') 

 

        >>> parse_value_with_units(None) 

        Traceback (most recent call last): 

        TypeError: expected string or buffer 

 

    """ 

 

    m = _value_with_units_regex.match(arg) 

    if m: 

        res = m.groupdict() 

        units = res['units'] 

        if units: 

            res['units'] = _units_cleaner_regex.sub('', units) 

        return _U(**res) 

 

 

class ModelOperations(namedtuple('ModelOperations', 'inp out conv')): 

 

    """ 

    Customization functions for traversing, I/O, and converting self-or-descendant branch (sub)model values. 

    """ 

    def __new__(cls, inp=None, out=None, conv=None): 

        """ 

 

        :param list inp:    the `args-list` to :meth:`Pandel._read_branch()` 

 

        :param out:         The args to :meth:`Pandel._write_branch()`, that may be specified either as: 

 

                            * an `args-list`, that will apply for all model data-types (lists, dicts & pandas), 

                            * a map of ``type`` --> ``args-list``, where the ``None`` key is the *catch-all* case, 

                            * a function returning the `args-list` for some branch-value, 

                              with signature: ``def get_write_branch_args(branch)``. 

 

        :param conv:        The conversion-functions (:dfn:`convertors`) for the various model's data-types. 

                            The convertors have signature ``def convert(branch)``, and they may be 

                            specified either as: 

 

                            * a map of ``(from_type, to_type)`` --> ``conversion_func()``, where the ``None`` key 

                              is the *catch-all* case, 

                            * a "master-switch" function returning the appropriate convertor 

                              depending on the requested conversion. 

                              The master-function's signature is ``def get_convertor(from_branch, to_branch)``. 

 

                            The minimum convertors demanded by :class:`Pandel` are (at least, check the code for more): 

 

                            * DataFrame  <--> dict 

                            * Series     <--> dict 

                            * ndarray    <--> list 

        """ 

 

        return super(ModelOperations, cls).__new__(cls, inp, out, conv) 

 

    def choose_out_args(self, branch): 

        pass 

 

    def choose_convertor(self, from_type, to_type): 

        pass 

 

 

# Workaround https://github.com/Julian/jsonschema/issues/178 

ValidatorBase = jsonschema.validators.create({}) 

 

 

class PandelVisitor(ValidatorBase): 

 

    """ 

    A customized :class:`Draft4Validator` suporting instance-trees with pandas and numpy objects, natively. 

 

    Any pandas or numpy instance (for example ``obj``) is treated like that: 

 

    +----------------------------+-----------------------------------------+ 

    |        Python Type         |     JSON Equivalence                    | 

    +============================+=========================================+ 

    | :class:`pandas.DataFrame`  | as ``object`` *json-type*, with         | 

    |                            | ``obj.columns`` as *keys*, and          | 

    |                            | ``obj[col].values`` as *values*         | 

    +----------------------------+-----------------------------------------+ 

    | :class:`pandas.Series`     | as ``object`` *json-type*, with         | 

    |                            | ``obj.index`` as *keys*, and            | 

    |                            | ``obj.values`` as *values*              | 

    +----------------------------+-----------------------------------------+ 

    | :class:`np.ndarray`,       | as ``array`` *json-type*                | 

    | :class:`list`,             |                                         | 

    | :class:`tuple`             |                                         | 

    +----------------------------+-----------------------------------------+ 

 

    Note that the value of each dataFrame column is a :``ndarray`` instances. 

 

    The simplest validations of an object or a pandas-instance is like this: 

 

        >>> import pandas as pd 

 

        >>> schema = { 

        ...     'type': 'object', 

        ... } 

        >>> pv = PandelVisitor(schema) 

 

        >>> pv.validate({'foo': 'bar'}) 

        >>> pv.validate(pd.Series({'foo': 1})) 

        >>> pv.validate([1,2])                                       ## A sequence is invalid here. 

        Traceback (most recent call last): 

        ... 

        jsonschema.exceptions.ValidationError: [1, 2] is not of type 'object' 

        <BLANKLINE> 

        Failed validating 'type' in schema: 

            {'type': 'object'} 

        <BLANKLINE> 

        On instance: 

            [1, 2] 

 

 

    Or demanding specific properties with ``required`` and no ``additionalProperties``: 

 

        >>> schema = { 

        ...     'type':     'object', 

        ...     'required': ['foo'], 

        ...    'additionalProperties': False, 

        ...    'properties': { 

        ...        'foo': {} 

        ...    } 

        ... } 

        >>> pv = PandelVisitor(schema) 

 

        >>> pv.validate(pd.Series({'foo': 1})) 

        >>> pv.validate(pd.Series({'foo': 1, 'bar': 2}))             ## Additional 'bar' is present! 

        Traceback (most recent call last): 

        ... 

        jsonschema.exceptions.ValidationError: Additional properties are not allowed ('bar' was unexpected) 

        <BLANKLINE> 

        Failed validating 'additionalProperties' in schema: 

            {'additionalProperties': False, 

             'properties': {'foo': {}}, 

             'required': ['foo'], 

             'type': 'object'} 

        <BLANKLINE> 

        On instance: 

            bar    2 

            foo    1 

            dtype: int64 

 

        >>> pv.validate(pd.Series({}))                               ## Required 'foo' missing! 

        Traceback (most recent call last): 

        ... 

        jsonschema.exceptions.ValidationError: 'foo' is a required property 

        <BLANKLINE> 

        Failed validating 'required' in schema: 

            {'additionalProperties': False, 

             'properties': {'foo': {}}, 

             'required': ['foo'], 

             'type': 'object'} 

        <BLANKLINE> 

        On instance: 

            Series([], dtype: float64) 

 

    """ 

 

    def __init__(self, schema, types=(), resolver=None, format_checker=None, skip_meta_validation=False): 

        super(PandelVisitor, self).__init__( 

            schema, types, resolver, format_checker) 

 

        self._types.update({ 

            # type(np.nan) == builtins.float! FIXME, are numpy-numbers --> 

            # json-types OK?? 

            "number":   (numbers.Number, np.number), 

            "integer":  (int, np.integer), 

            "boolean":  (bool, np.bool_),  # , np.bool8), 

            "array":    (list, tuple, np.ndarray), 

            "object":  (dict, pd.DataFrame, pd.Series) 

        }) 

 

        # Setup Draft4/3 validation 

        # 

        # Meta-validate schema 

        #    with original validators (and not self) 

        #    because this class inherits an empty (schema/rules) validator. 

        # Falls back to 'Draft4' if no `$schema` exists. 

        validator_class = jsonschema.validators.validator_for(schema) 

        self.VALIDATORS = validator_class.VALIDATORS.copy() 

        self.META_SCHEMA = validator_class.META_SCHEMA 

        self.VALIDATORS.update({ 

            'items':                PandelVisitor._rule_items, 

            'additionalProperties': PandelVisitor._rule_additionalProperties, 

            'additionalItems':      PandelVisitor._rule_additionalItems, 

        }) 

        if validator_class == Draft3Validator: 

            self.VALIDATORS.update({ 

                'properties':           PandelVisitor._rule_properties_draft3, 

            }) 

        else: 

            self.VALIDATORS.update({ 

                'properties':           PandelVisitor._rule_properties_draft4, 

                'required':             PandelVisitor._rule_required_draft4, 

            }) 

 

        self.old_scopes = [] 

 

        # Cannot use ``validator_class.check_schema()`` because 

        #    need to relay my args to ``validator_class.__init__()``. 

        # Even better use myself, that i'm fatser (kind of...). 

        if not skip_meta_validation: 

            for error in self.iter_errors(schema, validator_class.META_SCHEMA): 

                raise SchemaError.create_from(error) 

 

    ################################## 

    ############ Visiting ########### 

    ################################## 

 

    def _get_iprop(self, instance, prop): 

        val = instance[prop] 

        if isinstance(val, NDFrame): 

            val = val.values 

        return val 

 

    def _is_iprop_in(self, instance, prop): 

        return prop in instance.keys() 

 

    def _iter_iprop_names(self, instance): 

        return instance.keys() 

 

    def _iter_iprop_pairs(self, instance): 

        if isinstance(instance, pd.DataFrame): 

            return ((k, v.values) for k, v in instance.iteritems()) 

        if isinstance(instance, pd.Series): 

            return instance.iteritems() 

        return instance.items() 

 

    def _iter_iitems(self, instance): 

        return instance 

 

    def iter_errors(self, instance, _schema=None): 

        if _schema is None: 

            _schema = self.schema 

 

        scope = _schema.get("id") 

        if scope: 

            self.resolver.push_scope(scope) 

 

        ref = _schema.get("$ref") 

        if ref is not None: 

            validators = [("$ref", ref)] 

        else: 

            validators = self._iter_iprop_pairs(_schema) 

 

        for k, v in validators: 

            validator = self.VALIDATORS.get(k) 

            if validator is None: 

                continue 

 

            errors = validator(self, v, instance, _schema) or () 

            for error in errors: 

                # set details if not already set by the called fn 

                error._set( 

                    validator=k, 

                    validator_value=v, 

                    instance=instance, 

                    schema=_schema, 

                ) 

                if k != "$ref": 

                    error.schema_path.appendleft(k) 

                yield error 

 

        if scope: 

            self.resolver.pop_scope() 

 

    ################################## 

    ############# Rules ############## 

    ################################## 

 

    def _rule_properties_draft4(self, sprops, instance, schema): 

        if not self.is_type(instance, "object"): 

            return 

 

        iprops = set(self._iter_iprop_names(instance)) 

        for prop in iprops & set(sprops.keys()): 

            subschema = sprops[prop] 

            for error in self.descend( 

                self._get_iprop(instance, prop), 

                subschema, 

                path=prop, 

                schema_path=prop, 

            ): 

                yield error 

 

    def _rule_properties_draft3(self, properties, instance, schema): 

        if not self.is_type(instance, "object"): 

            return 

 

        for prop, subschema in self._iter_iprop_pairs(properties): 

            if self._is_iprop_in(instance, prop): 

                for error in self.descend( 

                    self._get_iprop(instance, prop), 

                    subschema, 

                    path=prop, 

                    schema_path=prop, 

                ): 

                    yield error 

            elif subschema.get("required", False): 

                error = ValidationError("%r is a required prop" % prop) 

                error._set( 

                    validator="required", 

                    validator_value=subschema["required"], 

                    instance=instance, 

                    schema=schema, 

                ) 

                error.path.appendleft(prop) 

                error.schema_path.extend([prop, "required"]) 

                yield error 

 

    def _rule_items(self, items, instance, schema): 

        if not self.is_type(instance, "array"): 

            return 

 

        if self.is_type(items, "object"): 

            for index, item in enumerate(self._iter_iitems(instance)): 

                for error in self.descend(item, items, path=index): 

                    yield error 

        else: 

            for (index, item), subschema in zip(enumerate(self._iter_iitems(instance)), items): 

                for error in self.descend( 

                    item, subschema, path=index, schema_path=index, 

                ): 

                    yield error 

 

    def _rule_additionalProperties(self, aP, instance, schema): 

        if not self.is_type(instance, 'object'): 

            return 

 

        sprops = schema.get("properties", {}) 

        patterns = "|".join(schema.get("patternProperties", {})) 

        extras = set() 

        for iprop in self._iter_iprop_names(instance): 

            if iprop not in sprops and \ 

                    not patterns or not re.search(patterns, iprop): 

                extras.add(iprop) 

 

        if extras: 

            if self.is_type(aP, "object"): 

                for extra in extras: 

                    for error in self.descend(self._get_iprop(instance, extra), aP, path=extra): 

                        yield error 

            elif not aP: 

                yield ValidationError( 

                    "Additional properties are not allowed (%s %s unexpected)" % 

                    jsonschema._utils.extras_msg(extras)) 

 

    def _rule_additionalItems(self, aI, instance, schema): 

        if ( 

            not self.is_type(instance, "array") or 

            self.is_type(schema.get("items", {}), "object") 

        ): 

            return 

 

        len_items = len(schema.get("items", [])) 

        if self.is_type(aI, "object"): 

            for index, item in enumerate(instance[len_items:], start=len_items): 

                for error in self.descend(item, aI, path=index): 

                    yield error 

        elif not aI and len(instance) > len_items: 

            yield ValidationError( 

                "Additional items are not allowed (%s %s unexpected)" % 

                jsonschema._utils.extras_msg( 

                    instance[len(schema.get("items", [])):]) 

            ) 

 

    def _rule_required_draft4(self, required, instance, schema): 

        if self.is_type(instance, 'object'): 

            for sprop in required: 

                if not self._is_iprop_in(instance, sprop): 

                    yield ValidationError("%r is a required property" % sprop) 

 

 

def trees_equal(t1, t2): 

    return True 

 

 

class Pandel(object): 

 

    """ 

    Builds, validates and stores a *pandas-model*, a mergeable stack of JSON-schema abiding trees of 

    strings and numbers, assembled with 

 

    * sequences, 

    * dictionaries, 

    * :class:`pandas.DataFrame`, 

    * :class:`pandas.Series`, and 

    * URI-references to other model-trees. 

 

 

 

    .. _pandel-overview: 

 

    **Overview** 

 

    The **making of a model** involves, among others, schema-validating, reading :dfn:`subtree-branches` 

    from URIs, cloning, converting and merging multiple :dfn:`sub-models` in a single :dfn:`unified-model` tree, 

    without side-effecting given input. 

    All these happen in 4+1 steps:: 

 

                       ....................... Model Construction ................. 

          ------------ :  _______    ___________                                  : 

         / top_model /==>|Resolve|->|PreValidate|-+                               : 

         -----------'  : |___0___|  |_____1_____| |                               : 

          ------------ :  _______    ___________  |   _____    ________    ______ :   -------- 

         / base-model/==>|Resolve|->|PreValidate|-+->|Merge|->|Validate|->|Curate|==>/ model / 

         -----------'  : |___0___|  |_____1_____|    |_ 2__|  |___3____|  |__4+__|:  -------' 

                       ............................................................ 

 

    All steps are executed "lazily" using generators (with :keyword:`yield`). 

    Before proceeding to the next step, the previous one must have completed successfully. 

    That way, any ad-hoc code in building-step-5(*curation*), for instance, will not suffer a horrible death 

    due to badly-formed data. 

 

    [TODO] The **storing of a model** simply involves distributing model parts into different files and/or formats, 

    again without side-effecting the unified-model. 

 

 

 

    .. _pandel-building-model: 

 

    **Building model** 

 

    Here is a detailed description of each building-step: 

 

    1.  :meth:`_resolve` and substitute any `json-references <http://tools.ietf.org/html/draft-pbryan-zyp-json-ref-03>`_ 

        present in the submodels with content-fragments fetched from the referred URIs. 

        The submodels are **cloned** first, to avoid side-effecting them. 

 

        Although by default a combination of *JSON* and *CSV* files is expected, this can be customized, 

        either by the content in the json-ref, within the model (see below), or 

        as :ref:`explained  <pandel-customization>` below. 

 

        The **extended json-refs syntax** supported provides for passing arguments into :meth:`_read_branch()` 

        and :meth:`_write_branch()` methods.  The syntax is easier to explain by showing what 

        the default :attr:`_global_cntxt` corresponds to, for a ``DataFrame``:: 

 

            { 

              "$ref": "http://example.com/example.json#/foo/bar", 

              "$inp": ["AUTO"], 

              "$out": ["CSV", "encoding=UTF-8"] 

            } 

 

        And here what is required to read and (later) store into a HDF5 local file with a predefined name:: 

 

            { 

              "$ref": "file://./filename.hdf5", 

              "$inp": ["AUTO"], 

              "$out": ["HDF5"] 

            } 

 

        .. Warning:: Step NOT IMPLEMENTED YET! 

 

 

    2.  Loosely :meth:`_prevalidate` each sub-model separately with :term:`json-schema`, 

        where any pandas-instances (DataFrames and Series) are left as is. 

        It is the duty of the developer to ensure that the prevalidation-schema is *loose enough* that 

        it allows for various submodel-forms, prior to merging, to pass. 

 

 

    3.  Recursively **clone**  and :meth:`_merge` sub-models in a single unified-model tree. 

        Branches from sub-models higher in the stack override the respective ones from the sub-models below, 

        recursively.  Different object types need to be **converted** appropriately (ie. merging 

        a ``dict`` with a ``DataFrame`` results into a ``DataFrame``, so the dictionary has to convert 

        to dataframe). 

 

        The required **conversions** into pandas classes can be customized as :ref:`explained  <pandel-customization>` 

        below.  Series and DataFrames cannot merge together, and Sequences do not merge 

        with any other object-type (themselfs included), they just "overwrite". 

 

        The default convertor-functions defined both for submodels and models are listed in the following table: 

 

        ============ ========== ========================================= 

            From:       To:                  Method: 

        ============ ========== ========================================= 

         dict        DataFrame  ``pd.DataFrame``  (the constructor) 

         DataFrame   dict       ``lambda df: df.to_dict('list')`` 

         dict        Series     ``pd.Series``     (the constructor) 

         Series      dict       :meth:`lambda sr: sr.to_dict()` 

        ============ ========== ========================================= 

 

 

    4.  Strictly json-:meth:`_validate` the unified-model (ie enforcing ``required`` schema-rules). 

 

        The required **conversions** from pandas classes can be customized as :ref:`explained  <pandel-customization>` 

        below. 

 

        The default convertor-functions are the same as above. 

 

 

    5.  (Optionally) Apply the :meth:`_curate` functions on the the model to enforce dependencies and/or any 

        ad-hoc generation-rules among the data.  You can think of bash-like expansion patterns, 

        like ``${/some/path:=$HOME}`` or expressions like ``%len(../other/path)``. 

 

 

 

    .. _pandel-storing: 

 

    **Storing model** 

 

    When storing model-parts, if unspecified, the filenames to write into will be deduced from the jsonpointer-path 

    of the ``$out``'s parent, by substituting "strange" chars with undescores(``_``). 

 

    .. Warning:: Functionality NOT IMPLEMENTED YET! 

 

 

 

    .. _pandel-customization: 

 

    **Customization** 

 

    Some operations within steps (namely *conversion* and *IO*) can be customized by the following means 

    (from lower to higher precedance): 

 

    a.  The global-default :class:`ModelOperations` instance on the :attr:`_global_cntxt`, 

        applied on both submodels and unified-model. 

 

        For example to channel the whole reading/writing of models through 

        `HDF5 <http://pandas.pydata.org/pandas-docs/stable/io.html#io-hdf5>`_ data-format, it would suffice 

        to modify the :attr:`_global_cntxt` like that:: 

 

            pm = FooPandelModel()                        ## some concrete model-maker 

            io_args = ["HDF5"] 

            pm.mod_global_operations(inp=io_args, out=io_args) 

 

    b.  [TODO] Extra-properties on the json-schema applied on both submodels and unified-model for the specific path defined. 

        The supported properties are the non-functional properties of :class:`ModelOperations`. 

 

    d.  Specific-properties regarding *IO* operations within each submodel - see the *resolve* building-step, 

        above. 

 

    c.  Context-maps of ``json_paths`` --> :class:`ModelOperations` instances, installed by :meth:`add_submodel()` and 

        :attr:`unified_contexts` on the model-maker.  They apply to self-or-descedant subtree of each model. 

 

        The `json_path` is a strings obeying a simplified :term:`json-pointer` syntax (no char-normalizations yet), 

        ie ``/some/foo/1/pointer``.  An empty-string(``''``) matches all model. 

 

        When multiple convertors match for a model-value, the selected convertor to be used is the most specific one 

        (the one with longest prefix).  For instance, on the model:: 

 

            [ { "foo": { "bar": 0 } } ] 

 

 

        all of the following would match the ``0`` value: 

 

        - the global-default :attr:`_global_cntxt`, 

        - ``/``, and 

        - ``/0/foo`` 

 

        but only the last's context-props will be applied. 

 

 

 

    .. _Attributes: 

 

    **Atributes** 

 

    .. Attribute:: model 

 

        The model-tree that will receive the merged submodels after :meth:`build()` has been invoked. 

        Depending on the submodels, the top-value can be any of the supported model data-types. 

 

 

    .. Attribute:: _submodel_tuples 

 

        The stack of (``submodel``, ``path_ops``) tuples. The list's 1st element is the :dfn:`base-model`, 

        the last one, the :dfn:`top-model`.  Use the :meth:`add_submodel()` to build this list. 

 

 

    .. Attribute:: _global_cntxt 

 

        A :class:`ModelOperations` instance acting as the global-default context for the unified-model and all submodels. 

        Use :meth:`mod_global_operations()` to modify it. 

 

 

    .. Attribute:: _curate_funcs 

 

        The sequence of *curate* functions to be executed as the final step by :meth:`_curate()`. 

        They are "normal" functions (not generators) with signature:: 

 

            def curate_func(model_maker): 

                pass      ## ie: modify ``model_maker.model``. 

 

        Better specify this list of functions on construction time. 

 

 

    .. Attribute:: _errored 

 

            An internal boolean flag that becomes ``True`` if any build-step has failed, 

            to halt proceeding to the next one.  It is ``None`` if build has not started yet. 

 

 

    .. _pandel-examples: 

 

    **Examples** 

 

    The basic usage requires to subclass your own model-maker, just so that a *json-schema* 

    is provided for both validation-steps, 2 & 4: 

 

        >>> from collections import OrderedDict as od                           ## Json is better with stable keys-order 

 

        >>> class MyModel(Pandel): 

        ...     def _get_json_schema(self, is_prevalidation): 

        ...         return {                                                    ## Define the json-schema. 

        ...             '$schema': 'http://json-schema.org/draft-04/schema#', 

        ...             'required': [] if is_prevalidation else ['a', 'b'],     ## Prevalidation is more loose. 

        ...             'properties': { 

        ...                 'a': {'type': 'string'}, 

        ...                 'b': {'type': 'number'}, 

        ...                 'c': {'type': 'number'}, 

        ...             } 

        ...         } 

 

 

    Then you can instanciate it and add your submodels: 

 

        >>> mm = MyModel() 

        >>> mm.add_submodel(od(a='foo', b=1))                                   ## submodel-1 (base) 

        >>> mm.add_submodel(pd.Series(od(a='bar', c=2)))                        ## submodel-2 (top-model) 

 

 

    You then have to build the final unified-model (any validation errors would be reported at this point): 

 

        >>> mdl = mm.build() 

 

    Note that you can also access the unified-model in the :attr:`model` attribute. 

    You can now interogate it: 

 

        >>> mdl['a'] == 'bar'                       ## Value overridden by top-model 

        True 

        >>> mdl['b'] == 1                           ## Value left intact from base-model 

        True 

        >>> mdl['c'] == 2                           ## New value from top-model 

        True 

 

 

    Lets try to build with invalid submodels: 

 

        >>> mm = MyModel() 

        >>> mm.add_submodel({'a': 1})               ## According to the schema, this should have been a string, 

        >>> mm.add_submodel({'b': 'string'})        ## and this one, a number. 

 

        >>> sorted(mm.build_iter(), key=lambda ex: ex.message)    ## Fetch a list with all validation errors. # doctest: +NORMALIZE_WHITESPACE 

        [<ValidationError: "'string' is not of type 'number'">, 

         <ValidationError: "1 is not of type 'string'">, 

         <ValidationError: 'Gave-up building model after step 1.prevalidate (out of 4).'>] 

 

        >>> mdl = mm.model 

        >>> mdl is None                                     ## No model constructed, failed before merging. 

        True 

 

 

    And lets try to build with valid submodels but invalid merged-one: 

 

        >>> mm = MyModel() 

        >>> mm.add_submodel({'a': 'a str'}) 

        >>> mm.add_submodel({'c': 1}) 

 

        >>> sorted(mm.build_iter(), key=lambda ex: ex.message)  # doctest: +NORMALIZE_WHITESPACE 

        [<ValidationError: "'b' is a required property">, 

         <ValidationError: 'Gave-up building model after step 3.validate (out of 4).'>] 

 

    """ 

 

    __metaclass__ = abc.ABCMeta 

 

    def __init__(self, curate_funcs=()): 

        """ 

 

        :param sequence curate_funcs:   See :attr:`_curate_funcs`. 

        """ 

 

        self.model = None 

        self._errored = None 

        self._submodel_tuples = [] 

        self._curate_funcs = curate_funcs 

        self._global_cntxt = [] 

        self._unified_contexts = None 

 

    def mod_global_operations(self, operations=None, **cntxt_kwargs): 

        """ 

 

        Since it is the fall-back operation for *conversions* and *IO* operation, it must exist and have 

        all its props well-defined for the class to work correctly. 

 

        :param ModelOperations operations:  Replaces values of the installed context with 

                                            non-empty values from this one. 

        :param cntxt_kwargs:                Replaces the keyworded-values on the existing `operations`. 

                                            See :class:`ModelOperations` for supported keywords. 

        """ 

        if operations: 

            assert isinstance( 

                operations, ModelOperations), (type(operations), operations) 

            self._global_cntxt = operations 

        self._global_cntxt._replace(**cntxt_kwargs) 

 

    @property 

    def unified_contexts(self): 

        """ 

        A map of ``json_paths`` --> :class:`ModelOperations` instances acting on the unified-model. 

        """ 

        return self._unified_contexts 

 

    @unified_contexts.setter 

    def unified_contexts(self, path_ops): 

        assert isinstance(path_ops, Mapping), (type(path_ops), path_ops) 

        self._unified_contexts = path_ops 

 

    def _select_context(self, path, branch): 

        """ 

        Finds which context to use while visiting model-nodes, by enforcing the precedance-rules described 

        in the :ref:`Customizations  <pandel-customization>`. 

 

        :param str path:    the branch's jsonpointer-path 

        :param str branch:  the actual branch's node 

        :return:            the selected :class:`ModelOperations` 

        """ 

        pass 

 

    def _read_branch(self): 

        """ 

        Reads model-branches during *resolve* step. 

        """ 

        pass  # TODO: impl read_branch() 

 

    def _write_branch(self): 

        """ 

        Writes model-branches during *distribute* step. 

        """ 

        pass  # TODO: impl write_branch() 

 

    def _get_json_schema(self, is_prevalidation): 

        """ 

        :return: a json schema, more loose when `prevalidation` for each case 

        :rtype: dictionary 

        """ 

        # TODO: Make it a factory o 

        pass 

 

    def _rule_AdditionalProperties(self, validator, aP, required, instance, schema): 

        properties = schema.get("properties", {}) 

        patterns = "|".join(schema.get("patternProperties", {})) 

        extras = set() 

        for prop in instance: 

            if prop not in properties: 

                if patterns and re.search(patterns, prop): 

                    continue 

                extras.add(prop) 

 

        if validator.is_type(aP, "object"): 

            for extra in extras: 

                for error in validator.descend(instance[extra], aP, path=extra): 

                    yield error 

        elif not aP and extras: 

            error = "Additional properties are not allowed (%s %s unexpected)" 

            yield ValidationError(error % jsonschema._utils.extras_msg(extras)) 

 

    def _rule_Required(self, validator, required, instance, schema): 

        if (validator.is_type(instance, "object") or 

            validator.is_type(instance, "DataFrame") or 

                validator.is_type(instance, "Series")): 

            for prop in required: 

                if prop not in instance: 

                    yield ValidationError("%r is a required property" % prop) 

 

    def _get_model_validator(self, schema): 

 

        validator = Draft4Validator(schema) 

        validator._types.update( 

            {"ndarray": np.ndarray, "DataFrame": pd.DataFrame, 'Series': pd.Series}) 

        validator.VALIDATORS['DataFrame'] = self._rule_Required 

 

        return validator 

 

    def _validate_json_model(self, schema, mdl): 

        validator = self._get_model_validator(schema) 

        for err in validator.iter_errors(mdl): 

            self._errored = True 

            yield err 

 

    def _clone_and_merge_submodels(self, a, b, path=''): 

        """' Recursively merge b into a, cloning both. """ 

 

        if isinstance(a, pd.DataFrame) or isinstance(b, pd.DataFrame): 

            a = pd.DataFrame() if a is None else pd.DataFrame(a) 

            b = pd.DataFrame() if b is None else pd.DataFrame(b) 

 

            a.update(b)  # , 'outer') NOT IMPL YET 

            extra_b_items = list(set(b.columns) - set(a.columns)) 

            a[extra_b_items] = b[extra_b_items] 

 

        elif isinstance(a, pd.Series) or isinstance(b, pd.Series): 

            a = pd.Series() if a is None else pd.Series(a) 

            b = pd.Series() if b is None else pd.Series(b) 

            # a.update(b) # DOES NOT append extra keys! 

            a = b.combine_first(a) 

 

        elif isinstance(a, Mapping) or isinstance(b, Mapping): 

            a = OrderedDict() if a is None else OrderedDict(a) 

            b = OrderedDict() if b is None else OrderedDict(b) 

 

            for key in b: 

                b_val = b[key] 

                if key in a: 

                    val = self._clone_and_merge_submodels( 

                        a[key], b_val, '%s/%s' % (path, key)) 

                else: 

                    val = b_val 

                a[key] = val 

 

        elif (isinstance(a, Sequence) and not isinstance(a, basestring)) or \ 

                (isinstance(b, Sequence) and not isinstance(b, basestring)): 

            if b is not None: 

                val = b 

            else: 

                val = a 

 

            l = list() 

            for (i, item) in enumerate(val): 

                l.append( 

                    self._clone_and_merge_submodels(item, None, '%s[%i]' % (path, i))) 

            a = l 

 

        elif a is None and b is None: 

            raise ValidationError("Cannot merge Nones at path(%s)!" % path) 

 

        else: 

            if b is not None: 

                a = b 

 

        return a 

 

    def _resolve(self): 

        "Step-1" 

        if False: 

            yield 

 

    def _prevalidate(self): 

        "Step-1" 

        for (mdl, path_ops) in self._submodel_tuples: 

            schema = self._get_json_schema(is_prevalidation=True) 

            for err in self._validate_json_model(schema, mdl): 

                yield err 

 

    def _merge(self): 

        "Step-2" 

        for (mdl, path_ops) in self._submodel_tuples: 

            self.model = self._clone_and_merge_submodels(self.model, mdl) 

        if False: 

            yield  # Just mark method as generator. 

 

    def _validate(self): 

        "Step-3" 

        schema = self._get_json_schema(is_prevalidation=False) 

        for err in self._validate_json_model(schema, self.model): 

            yield err 

 

    def _curate(self): 

        "Step-4:  Invokes any curate-functions found in :attr:`_curate_funcs`." 

        if False: 

            yield  # To be overriden, just mark method as generator. 

        for curfunc in self._curate_funcs: 

            curfunc(self) 

 

    def add_submodel(self, model, path_ops=None): 

        """ 

        Pushes on top a submodel, along with its context-map. 

 

        :param model:               the model-tree (sequence, mapping, pandas-types) 

        :param dict path_ops:       A map of ``json_paths`` --> :class:`ModelOperations` instances acting on the 

                                    unified-model.  The `path_ops` may often be empty. 

 

        **Examples** 

 

        To change the default DataFrame --> dictionary convertor for a submodel, use the following: 

 

            >>> mdl = {'foo': 'bar'} 

            >>> submdl = ModelOperations(mdl, conv={(pd.DataFrame, dict): lambda df: df.to_dict('record')}) 

 

        """ 

 

        if path_ops: 

            assert isinstance(path_ops, Mapping), (type(path_ops), path_ops) 

 

        return self._submodel_tuples.append((model, path_ops)) 

 

    def build_iter(self): 

        """ 

        Iteratively build model, yielding any problems as :class:`ValidationError` instances. 

 

        For debugging, the unified model at :attr:`model` my contain intermediate results at any time, 

        even if construction has failed.  Check the :attr:`_errored` flag if neccessary. 

        """ 

 

        steps = [ 

            (self._prevalidate, 'prevalidate'), 

            (self._merge,       'merge'), 

            (self._validate,    'validate'), 

            (self._curate,      'curate'), 

        ] 

        self._errored = False 

        self.model = None 

 

        for (i, (step, step_name)) in enumerate(steps, start=1): 

            try: 

                for err in step(): 

                    yield err 

            except ValidationError as ex: 

                self._errored = True 

                yield ex 

 

            except Exception as ex: 

                self._errored = True 

 

                nex = ValidationError( 

                    'Model step-%i(%s) failed due to: %s' % (i, step_name, ex)) 

                nex.cause = ex 

 

                yield nex 

 

            if self._errored: 

                yield ValidationError('Gave-up building model after step %i.%s (out of %i).' % (i, step_name, len(steps))) 

                break 

 

    def build(self): 

        """ 

        Attempts to build the model by exhausting :meth:`build_iter()`, or raises its 1st error. 

 

        Use this method when you do not want to waste time getting the full list of errors. 

        """ 

 

        err = next(self.build_iter(), None) 

        if err: 

            raise err 

 

        return self.model 

 

    def get(self, path, **kws): 

        resolve_jsonpointer(self.model, path, **kws) 

 

 

def escape_jsonpointer_part(part): 

    return part.replace("~", "~0").replace("/", "~1") 

 

 

def unescape_jsonpointer_part(part): 

    return part.replace("~1", "/").replace("~0", "~") 

 

 

def iter_jsonpointer_parts(jsonpath): 

    """ 

    Generates the ``jsonpath`` parts according to jsonpointer spec. 

 

    :param str jsonpath:  a jsonpath to resolve within document 

    :return:              The parts of the path as generator), without 

                          converting any step to int, and None if None. 

 

    :author: Julian Berman, ankostis 

 

    Examples:: 

 

        >>> list(iter_jsonpointer_parts('/a/b')) 

        ['a', 'b'] 

 

        >>> list(iter_jsonpointer_parts('/a//b')) 

        ['a', '', 'b'] 

 

        >>> list(iter_jsonpointer_parts('/')) 

        [''] 

 

        >>> list(iter_jsonpointer_parts('')) 

        [] 

 

 

    But paths are strings begining (NOT_MPL: but not ending) with slash('/'):: 

 

        >>> list(iter_jsonpointer_parts(None)) 

        Traceback (most recent call last): 

        AttributeError: 'NoneType' object has no attribute 'split' 

 

        >>> list(iter_jsonpointer_parts('a')) 

        Traceback (most recent call last): 

        jsonschema.exceptions.RefResolutionError: Jsonpointer-path(a) must start with '/'! 

 

        #>>> list(iter_jsonpointer_parts('/a/')) 

        #Traceback (most recent call last): 

        #jsonschema.exceptions.RefResolutionError: Jsonpointer-path(a) must NOT ends with '/'! 

 

    """ 

 

#     if jsonpath.endswith('/'): 

#         msg = "Jsonpointer-path({}) must NOT finish with '/'!" 

#         raise RefResolutionError(msg.format(jsonpath)) 

    parts = jsonpath.split("/") 

    if parts.pop(0) != '': 

        msg = "Jsonpointer-path({}) must start with '/'!" 

        raise RefResolutionError(msg.format(jsonpath)) 

 

    for part in parts: 

        part = unescape_jsonpointer_part(part) 

 

        yield part 

 

 

def iter_jsonpointer_parts_relaxed(jsonpointer): 

    """ 

    Like :func:`iter_jsonpointer_parts()` but accepting also non-absolute paths. 

 

    The 1st step of absolute-paths is always ''. 

 

    Examples:: 

 

        >>> list(iter_jsonpointer_parts_relaxed('a')) 

        ['a'] 

        >>> list(iter_jsonpointer_parts_relaxed('a/')) 

        ['a', ''] 

        >>> list(iter_jsonpointer_parts_relaxed('a/b')) 

        ['a', 'b'] 

 

        >>> list(iter_jsonpointer_parts_relaxed('/a')) 

        ['', 'a'] 

        >>> list(iter_jsonpointer_parts_relaxed('/a/')) 

        ['', 'a', ''] 

 

        >>> list(iter_jsonpointer_parts_relaxed('/')) 

        ['', ''] 

 

        >>> list(iter_jsonpointer_parts_relaxed('')) 

        [''] 

 

    """ 

    for part in jsonpointer.split("/"): 

        yield unescape_jsonpointer_part(part) 

 

_scream = object() 

 

 

def resolve_jsonpointer(doc, jsonpointer, default=_scream): 

    """ 

    Resolve a ``jsonpointer`` within the referenced ``doc``. 

 

    :param doc: the referrant document 

    :param str jsonpointer: a jsonpointer to resolve within document 

    :return: the resolved doc-item or raises :class:`RefResolutionError` 

    :raises: RefResolutionError (if cannot resolve jsonpointer path) 

 

    Examples: 

 

        >>> dt = { 

        ...     'pi':3.14, 

        ...     'foo':'bar', 

        ...     'df': pd.DataFrame(np.ones((3,2)), columns=list('VN')), 

        ...     'sub': { 

        ...         'sr': pd.Series({'abc':'def'}), 

        ...     } 

        ... } 

        >>> resolve_jsonpointer(dt, '/pi', default=_scream) 

        3.14 

 

        >>> resolve_jsonpointer(dt, '/pi/BAD') 

        Traceback (most recent call last): 

        jsonschema.exceptions.RefResolutionError: Unresolvable JSON pointer('/pi/BAD')@(BAD) 

 

        >>> resolve_jsonpointer(dt, '/pi/BAD', 'Hi!') 

        'Hi!' 

 

    :author: Julian Berman, ankostis 

    """ 

    for part in iter_jsonpointer_parts(jsonpointer): 

        if isinstance(doc, Sequence): 

            # Array indexes should be turned into integers 

            try: 

                part = int(part) 

            except ValueError: 

                pass 

        try: 

            doc = doc[part] 

        except (TypeError, LookupError): 

            if default is _scream: 

                raise RefResolutionError( 

                    "Unresolvable JSON pointer(%r)@(%s)" % (jsonpointer, part)) 

            else: 

                return default 

 

    return doc 

 

 

def set_jsonpointer(doc, jsonpointer, value, object_factory=OrderedDict): 

    """ 

    Resolve a ``jsonpointer`` within the referenced ``doc``. 

 

    :param doc: the referrant document 

    :param str jsonpointer: a jsonpointer to the node to modify 

    :raises: RefResolutionError (if jsonpointer empty, missing, invalid-contet) 

    """ 

 

    parts = list(iter_jsonpointer_parts(jsonpointer)) 

 

    # Will scream if used on 1st iteration. 

    # 

    pdoc = None 

    ppart = None 

    for i, part in enumerate(parts): 

        if isinstance(doc, Sequence) and not isinstance(doc, basestring): 

            # Array indexes should be turned into integers 

            # 

            doclen = len(doc) 

            if part == '-': 

                part = doclen 

            else: 

                try: 

                    part = int(part) 

                except ValueError: 

                    raise RefResolutionError( 

                        "Expected numeric index(%s) for sequence at (%r)[%i]" % (part, jsonpointer, i)) 

                else: 

                    if part > doclen: 

                        raise RefResolutionError( 

                            "Index(%s) out of bounds(%i) of (%r)[%i]" % (part, doclen, jsonpointer, i)) 

        try: 

            ndoc = doc[part] 

        except (LookupError): 

            break  # Branch-extension needed. 

        except (TypeError):  # Maybe indexing a string... 

            ndoc = object_factory() 

            pdoc[ppart] = ndoc 

            doc = ndoc 

            break  # Branch-extension needed. 

 

        doc, pdoc, ppart = ndoc, doc, part 

    else: 

        doc = pdoc  # If loop exhausted, cancel last assignment. 

 

    # Build branch with value-leaf. 

    # 

    nbranch = value 

    for part2 in reversed(parts[i + 1:]): 

        ndoc = object_factory() 

        ndoc[part2] = nbranch 

        nbranch = ndoc 

 

    # Attach new-branch. 

    try: 

        doc[part] = nbranch 

    # Inserting last sequence-element raises IndexError("list assignment index 

    # out of range") 

    except IndexError: 

        doc.append(nbranch) 

 

#    except (IndexError, TypeError) as ex: 

# if isinstance(ex, IndexError) or 'list indices must be integers' in str(ex): 

#        raise RefResolutionError("Incompatible content of JSON pointer(%r)@(%s)" % (jsonpointer, part)) 

#        else: 

#            doc = {} 

#            parent_doc[parent_part] = doc 

#            doc[part] = value 

 

 

def build_all_jsonpaths(schema): 

    # Totally quick an dirty, TODO: Use json-validator to build all json-paths. 

    forks = ['oneOf', 'anyOf', 'allOf'] 

 

    def _visit(schema, path, paths): 

        for f in forks: 

            objlist = schema.get(f) 

            if objlist: 

                for obj in objlist: 

                    _visit(obj, path, paths) 

 

        props = schema.get('properties') 

        if props: 

            for p, obj in props.items(): 

                _visit(obj, path + '/' + p, paths) 

        else: 

            paths.append(path) 

 

    paths = [] 

    _visit(schema, '', paths) 

 

    return paths 

 

 

_NONE = object() 

"""Denotes non-existent json-schema attribute in :class:`JSchema`.""" 

 

 

class JSchema(object): 

 

    """ 

    Facilitates the construction of json-schema-v4 nodes on :class:`PStep` code. 

 

    It does just rudimentary args-name check.   Further validations 

    should apply using a proper json-schema validator. 

 

    :param type: if omitted, derived as 'object' if it has children 

    :param kws:  for all the rest see http://json-schema.org/latest/json-schema-validation.html 

 

    """ 

    type = _NONE,  # @ReservedAssignment 

    items = _NONE,  # @ReservedAssignment 

    required = _NONE, 

    title = _NONE, 

    description = _NONE, 

    minimum = _NONE, 

    exclusiveMinimum = _NONE, 

    maximum = _NONE, 

    exclusiveMaximum = _NONE, 

    patternProperties = _NONE, 

    pattern = _NONE, 

    enum = _NONE, 

    allOf = _NONE, 

    oneOf = _NONE, 

    anyOf = _NONE, 

 

    def todict(self): 

        return {k: v for k, v in vars(self).items() if v is not _NONE} 

 

 

class JSONCodec(): 

 

    """ 

    Json coders/decoders capable for (almost) all python objects, by pickling them. 

 

    Example:: 

 

        >>> import json 

        >>> obj_list = [ 

        ...    3.14, 

        ...    { 

        ...         'aa': pd.DataFrame([]), 

        ...         2: np.array([]), 

        ...         33: {'foo': 'bar'}, 

        ...     }, 

        ...     pd.DataFrame(np.random.randn(10, 2)), 

        ...     ('b', pd.Series({})), 

        ... ] 

        >>> for o in obj_list + [obj_list]: 

        ...     s = json.dumps(o, cls=JSONCodec.Encoder) 

        ...     oo = json.loads(s, cls=JSONCodec.Decoder) 

        ...     assert trees_equal(o, oo) 

        ... 

 

    .. seealso:: 

        For pickle-limitations: https://docs.python.org/3.4/library/pickle.html#pickle-picklable 

    """ 

    _ver_key = '_ver' 

    _ver = '0' 

    _obj = '$qpickle' 

 

    class Encoder(JSONEncoder): 

 

        def encode(self, o): 

            pickle_bytes = pickle.dumps(o) 

            pickle_str = binascii.b2a_qp(pickle_bytes).decode(encoding='utf8') 

            o = {JSONCodec._obj: pickle_str, 

                 JSONCodec._ver_key: JSONCodec._ver} 

            return JSONEncoder.encode(self, o) 

 

    class Decoder(JSONDecoder): 

 

        def decode(self, s,): 

            o = JSONDecoder.decode(self, s) 

            pickle_str = o.get(JSONCodec._obj, None) 

            if pickle_str: 

                #file_ver = o[JSONCodec._ver_key] 

                # if file_ver != JSONCodec._ver: 

                #     msg = 'Unsopported json-encoded version(%s != %s)!' 

                #     raise ValueError(msg % (file_ver, JSONCodec._ver)) 

                pickle_bytes = binascii.a2b_qp(pickle_str.encode( 

                    encoding='utf8')) 

                o = pickle.loads(pickle_bytes) 

            return o 

 

 

if __name__ == '__main__':  # pragma: no cover 

    raise NotImplementedError