Coverage for jbank/parsers.py : 88%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import logging
2import re
3from datetime import time, datetime, date
4from decimal import Decimal
5from typing import Any, Tuple, Optional, Dict, Sequence, Union
6from django.core.exceptions import ValidationError
7from django.utils.translation import gettext as _
9REGEX_SIMPLE_FIELD = re.compile(r"^(X|9)+$")
11REGEX_VARIABLE_FIELD = re.compile(r"^(X|9)\((\d+)\)$")
13logger = logging.getLogger(__name__)
16def parse_record_format(fmt: str) -> Tuple[str, int]:
17 """
18 :param fmt: Data format used in .TO files
19 :return: Data type ('X' or '9'), data length (number of characters)
20 """
21 res = REGEX_SIMPLE_FIELD.match(fmt)
22 data_type, data_len = None, None
23 if res:
24 data_type = res.group(1)
25 data_len = len(fmt)
26 else:
27 res = REGEX_VARIABLE_FIELD.match(fmt)
28 if res:
29 data_type = res.group(1)
30 data_len = int(res.group(2))
31 if not data_type or not data_len:
32 raise Exception("Failed to parse data format {}".format(fmt))
33 return data_type, data_len
36def parse_record_value(data_type, data_len, data, name: str, line_number: int) -> str:
37 value = data[:data_len]
38 if len(value) != data_len:
39 raise ValidationError(
40 _('Line {line}: Invalid field "{field}" value "{value}"').format(line=line_number, field=name, value=value)
41 )
42 if data_type == "X":
43 pass
44 elif data_type == "9":
45 charset = "0123456789"
46 for ch in value:
47 if ch not in charset:
48 raise ValidationError(
49 _('Line {line}: Invalid field "{field}" value "{value}"').format(
50 line=line_number, field=name, value=value
51 )
52 )
53 # logger.info('jbank.parsers.parse_record_value: {} = {}'.format(name, value))
54 else:
55 raise ValidationError(
56 _('Line {line}: Invalid field "{field}" value "{value}"').format(line=line_number, field=name, value=value)
57 )
58 return value
61def parse_records(
62 line: str,
63 specs: Sequence[Tuple[str, str, str]],
64 line_number: int,
65 check_record_length: bool = True,
66 record_length: Optional[int] = None,
67) -> Dict[str, Union[int, str]]:
68 i = 0
69 data: Dict[str, Union[int, str]] = dict()
70 data["line_number"] = line_number
71 for name, fmt, req in specs: # pylint: disable=unused-variable
72 data_type, data_len = parse_record_format(fmt)
73 value = parse_record_value(data_type, data_len, line[i:], name=name, line_number=line_number)
74 # print('[{}:{}] {}="{}"'.format(i, i+data_len, name, value))
75 data[name] = str(value).strip()
76 i += data_len
77 data["extra_data"] = line[i:]
79 rec_len = data.get("record_length", record_length)
80 if check_record_length and rec_len:
81 data["extra_data"] = str(data["extra_data"]).strip()
82 if i != rec_len and data["extra_data"] != "":
83 raise ValidationError(
84 _(
85 "Line {line}: Record length ({record_length}) does not match length of "
86 'parsed data ({data_length}). Extra data: "{extra_data}"'
87 ).format(
88 line=line_number,
89 data_length=i + len(str(data["extra_data"])),
90 record_length=rec_len,
91 extra_data=data["extra_data"],
92 )
93 )
94 return data
97def convert_date(v: Optional[str], field_name: str, date_fmt: str = "YYMMDD") -> date:
98 if v is None:
99 raise ValidationError(_("Date field missing: {}").format(field_name))
100 if len(v) != 6 or v == "000000":
101 raise ValidationError(_("Date format error in field {}: {}").format(field_name, v))
102 if date_fmt == "YYMMDD":
103 year = int(v[0:2]) + 2000
104 month = int(v[2:4])
105 day = int(v[4:6])
106 else:
107 raise ValidationError(_("Unsupported date format"))
108 return date(year=year, month=month, day=day)
111def convert_date_opt(v: Optional[str], field_name: str, date_fmt: str = "YYMMDD") -> Optional[date]:
112 if v is None or v == "000000":
113 return None
114 return convert_date(v, field_name, date_fmt)
117def convert_time(v: Optional[str], field_name: str) -> time:
118 if v is None:
119 raise ValidationError(_("Time field missing: {}").format(field_name))
120 if not re.match(r"^\d\d\d\d$", v):
121 raise ValidationError(_("Time format error in field {}: {}").format(field_name, v))
122 return time(int(v[0:2]), int(v[2:4]))
125def convert_date_fields(
126 data: dict, date_fields: Sequence[Union[str, Tuple[str, str]]], tz: Any, date_fmt: str = "YYMMDD"
127):
128 for k in date_fields:
129 # logger.debug('%s = %s (%s)', k, data.get(k), type(data.get(k)))
130 if isinstance(k, str):
131 data[k] = convert_date_opt(data.get(k), k, date_fmt)
132 elif isinstance(k, tuple):
133 if len(k) != 2:
134 raise ValidationError(_("Date format error in field {}").format(k))
135 k_date, k_time = k
136 v_date, v_time = data.get(k_date), data.get(k_time)
137 if v_date or v_time:
138 assert v_date is None or isinstance(v_date, str)
139 assert v_time is None or isinstance(v_time, str)
140 v_date = convert_date(v_date, k_date, date_fmt)
141 v_time = convert_time(v_time, k_time)
142 v_datetime = datetime.combine(v_date, v_time)
143 data[k_date] = tz.localize(v_datetime)
144 del data[k_time]
145 # logger.debug('%s = %s (%s)', k, data.get(k), type(data.get(k)))
148def convert_decimal_fields(data: dict, decimal_fields: Sequence[Union[Tuple[str, str], str]], neg_sign_val: str = "-"):
149 for field in decimal_fields:
150 if isinstance(field, str):
151 v_number = data.get(field)
152 if v_number is not None:
153 v = Decimal(v_number) * Decimal("0.01")
154 # logger.info('jbank.parsers.convert_decimal_fields: {} = {}'.format(field, v))
155 data[field] = v
156 elif isinstance(field, tuple) and len(field) == 2:
157 k_number, k_sign = field
158 v_number, v_sign = data.get(k_number), data.get(k_sign)
159 if v_number is not None:
160 v = Decimal(v_number) * Decimal("0.01")
161 if v_sign == neg_sign_val:
162 v = -v
163 data[k_number] = v
164 # logger.info('jbank.parsers.convert_decimal_fields: {} = {}'.format(k_number, v))
165 del data[k_sign]
166 else:
167 raise ValidationError(_("Invalid decimal field format: {}").format(field))
170def parse_filename_suffix(filename: str) -> str:
171 a = filename.rsplit(".", 1)
172 return a[len(a) - 1]