Coverage for jbank/parsers.py: 66%
151 statements
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-27 13:36 +0700
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-27 13:36 +0700
1import logging
2import re
3from datetime import time, datetime, date
4from decimal import Decimal
5from typing import Any, Tuple, Optional, Dict, Sequence, Union, List
6from django.core.exceptions import ValidationError
7from django.utils.translation import gettext as _
8from pytz import timezone
10REGEX_SIMPLE_FIELD = re.compile(r"^(X|9)+$")
12REGEX_VARIABLE_FIELD = re.compile(r"^(X|9)\((\d+)\)$")
14logger = logging.getLogger(__name__)
17def parse_record_format(fmt: str) -> Tuple[str, int]:
18 """
19 :param fmt: Data format used in .TO files
20 :return: Data type ('X' or '9'), data length (number of characters)
21 """
22 res = REGEX_SIMPLE_FIELD.match(fmt)
23 data_type, data_len = None, None
24 if res:
25 data_type = res.group(1)
26 data_len = len(fmt)
27 else:
28 res = REGEX_VARIABLE_FIELD.match(fmt)
29 if res:
30 data_type = res.group(1)
31 data_len = int(res.group(2))
32 if not data_type or not data_len:
33 raise Exception("Failed to parse data format {}".format(fmt))
34 return data_type, data_len
37def parse_record_value(data_type, data_len, data, name: str, line_number: int) -> str:
38 value = data[:data_len]
39 if len(value) != data_len:
40 raise ValidationError(_('Line {line}: Invalid field "{field}" value "{value}"').format(line=line_number, field=name, value=value))
41 if data_type == "X":
42 pass
43 elif data_type == "9":
44 charset = "0123456789"
45 for ch in value:
46 if ch not in charset:
47 raise ValidationError(_('Line {line}: Invalid field "{field}" value "{value}"').format(line=line_number, field=name, value=value))
48 # logger.info('jbank.parsers.parse_record_value: {} = {}'.format(name, value))
49 else:
50 raise ValidationError(_('Line {line}: Invalid field "{field}" value "{value}"').format(line=line_number, field=name, value=value))
51 return value
54def parse_records(
55 line: str,
56 specs: Sequence[Tuple[str, str, str]],
57 line_number: int,
58 check_record_length: bool = True,
59 record_length: Optional[int] = None,
60) -> Dict[str, Union[int, str]]:
61 i = 0
62 data: Dict[str, Union[int, str]] = {"line_number": line_number}
63 for name, fmt, req in specs: # pylint: disable=unused-variable
64 data_type, data_len = parse_record_format(fmt)
65 value = parse_record_value(data_type, data_len, line[i:], name=name, line_number=line_number)
66 # print('[{}:{}] {}="{}"'.format(i, i+data_len, name, value))
67 data[name] = str(value).strip()
68 i += data_len
69 data["extra_data"] = line[i:]
71 rec_len = data.get("record_length", record_length)
72 if check_record_length and rec_len:
73 data["extra_data"] = str(data["extra_data"]).strip()
74 if i != rec_len and data["extra_data"] != "":
75 raise ValidationError(
76 _("Line {line}: Record length ({record_length}) does not match length of " 'parsed data ({data_length}). Extra data: "{extra_data}"').format(
77 line=line_number,
78 data_length=i + len(str(data["extra_data"])),
79 record_length=rec_len,
80 extra_data=data["extra_data"],
81 )
82 )
83 return data
86def convert_date(v: Optional[str], field_name: str, date_fmt: str = "YYMMDD") -> date:
87 if v is None:
88 raise ValidationError(_("Date field missing: {}").format(field_name))
89 if len(v) != 6 or v == "000000":
90 raise ValidationError(_("Date format error in field {}: {}").format(field_name, v))
91 if date_fmt == "YYMMDD":
92 year = int(v[0:2]) + 2000
93 month = int(v[2:4])
94 day = int(v[4:6])
95 else:
96 raise ValidationError(_("Unsupported date format"))
97 return date(year=year, month=month, day=day)
100def convert_date_opt(v: Optional[str], field_name: str, date_fmt: str = "YYMMDD") -> Optional[date]:
101 if v is None or v == "000000":
102 return None
103 return convert_date(v, field_name, date_fmt)
106def convert_time(v: Optional[str], field_name: str) -> time:
107 if v is None:
108 raise ValidationError(_("Time field missing: {}").format(field_name))
109 if not re.match(r"^\d\d\d\d$", v):
110 raise ValidationError(_("Time format error in field {}: {}").format(field_name, v))
111 return time(int(v[0:2]), int(v[2:4]))
114def convert_date_fields(data: dict, date_fields: Sequence[Union[str, Tuple[str, str]]], tz: Any, date_fmt: str = "YYMMDD"):
115 for k in date_fields:
116 # logger.debug('%s = %s (%s)', k, data.get(k), type(data.get(k)))
117 if isinstance(k, str):
118 data[k] = convert_date_opt(data.get(k), k, date_fmt)
119 elif isinstance(k, tuple):
120 if len(k) != 2:
121 raise ValidationError(_("Date format error in field {}").format(k))
122 k_date, k_time = k
123 v_date, v_time = data.get(k_date), data.get(k_time)
124 if v_date or v_time:
125 assert v_date is None or isinstance(v_date, str)
126 assert v_time is None or isinstance(v_time, str)
127 v_date = convert_date(v_date, k_date, date_fmt)
128 v_time = convert_time(v_time, k_time)
129 v_datetime = datetime.combine(v_date, v_time)
130 data[k_date] = tz.localize(v_datetime)
131 del data[k_time]
132 # logger.debug('%s = %s (%s)', k, data.get(k), type(data.get(k)))
135def convert_decimal_fields(data: dict, decimal_fields: Sequence[Union[Tuple[str, str], str]], neg_sign_val: str = "-"):
136 for field in decimal_fields:
137 if isinstance(field, str):
138 v_number = data.get(field)
139 if v_number is not None:
140 v = Decimal(v_number.replace(",", "")) * Decimal("0.01")
141 # logger.info('jbank.parsers.convert_decimal_fields: {} = {}'.format(field, v))
142 data[field] = v
143 elif isinstance(field, tuple) and len(field) == 2:
144 k_number, k_sign = field
145 v_number, v_sign = data.get(k_number), data.get(k_sign)
146 if v_number is not None:
147 v = Decimal(v_number.replace(",", "")) * Decimal("0.01")
148 if v_sign == neg_sign_val:
149 v = -v
150 data[k_number] = v
151 # logger.info('jbank.parsers.convert_decimal_fields: {} = {}'.format(k_number, v))
152 del data[k_sign]
153 else:
154 raise ValidationError(_("Invalid decimal field format: {}").format(field))
157def parse_filename_suffix(filename: str) -> str:
158 a = filename.rsplit(".", 1)
159 return a[len(a) - 1]
162def parse_nordea_balance_query(content: str) -> Dict[str, Any]:
163 if not content:
164 raise Exception("No Nordea balance query content to parse")
165 if content[0] != "1":
166 raise Exception("Invalid file format (not matching expected Nordea SALDO)")
167 SALDO_FIELDS = (
168 ("file_format_identifier", "9(1)", "P"),
169 ("account_number", "9(14)", "P"),
170 ("pad_1", "X(15)", "P"),
171 ("balance_sign", "X(1)", "P"),
172 ("balance", "9(14)", "P"),
173 ("available_balance_sign", "X(1)", "P"),
174 ("available_balance", "9(14)", "P"),
175 ("record_datetime", "9(6)", "P"),
176 ("record_time", "9(4)", "P"),
177 ("credit_limit_sign", "X(1)", "P"),
178 ("credit_limit", "9(14)", "P"),
179 ("currency", "X(3)", "P"),
180 ("pad_2", "X(2)", "P"),
181 )
182 SALDO_DATE_FIELDS = (("record_datetime", "record_time"),)
183 SALDO_DECIMAL_FIELDS = (
184 ("balance", "balance_sign"),
185 ("available_balance", "available_balance_sign"),
186 ("credit_limit", "credit_limit_sign"),
187 )
188 tz = timezone("Europe/Helsinki")
189 lines = content.split("\n")
190 for line in lines:
191 if line.strip():
192 res = parse_records(content, SALDO_FIELDS, line_number=1)
193 convert_date_fields(res, SALDO_DATE_FIELDS, tz)
194 convert_decimal_fields(res, SALDO_DECIMAL_FIELDS)
195 return res
196 return {}
199def parse_samlink_real_time_statement(content: str) -> Dict[str, Any]:
200 if not content:
201 raise Exception("No Samlink real time statement (.RA) content to parse")
202 RA_HEADER_FIELDS = (
203 ("heading", "X(24)", "P"),
204 ("currency_unit", "X(1)", "P"), # "1" == euro
205 ("account_number", "9(14)", "P"),
206 ("record_date", "9(6)", "P"),
207 )
208 RA_BALANCE_FIELDS = (
209 ("pad_1", "9(1)", "P"),
210 ("record_time", "9(4)", "P"),
211 ("balance", "X(16)", "P"),
212 ("balance_sign", "X(1)", "P"),
213 ("available_balance", "X(16)", "P"),
214 ("available_balance_sign", "X(1)", "P"),
215 )
216 RA_TRANSACTION_FIELDS = (
217 ("const_1", "X(1)", "P"),
218 ("record_date", "9(6)", "P"),
219 ("record_number", "X(3)", "P"),
220 ("currency_unit", "X(1)", "P"), # "1" == euro
221 ("record_code", "X(3)", "P"),
222 ("amount", "X(16)", "P"),
223 ("amount_sign", "X(1)", "P"),
224 ("remittance_info", "X(20)", "P"),
225 ("payer_name", "X(20)", "P"),
226 ("record_description", "X(12)", "P"),
227 )
228 lines = content.split("\n")
229 if len(lines) < 3:
230 raise Exception("Invalid Samlink real time statement (.RA) content, less than 3 lines")
231 tz = timezone("Europe/Helsinki")
232 header = parse_records(lines[0], RA_HEADER_FIELDS, line_number=1)
233 convert_date_fields(header, ["record_date"], tz)
234 balance = parse_records(lines[1], RA_BALANCE_FIELDS, line_number=2)
235 balance["record_time"] = convert_time(balance.get("record_time"), "record_time") # type: ignore
236 convert_decimal_fields(balance, [("available_balance", "available_balance_sign"), ("balance", "balance_sign")])
237 records: List[Dict[str, Any]] = []
238 for ix, line in enumerate(lines[2:]):
239 if line.strip():
240 line_number = ix + 3
241 record = parse_records(line, RA_TRANSACTION_FIELDS, line_number)
242 convert_decimal_fields(record, [("amount", "amount_sign")])
243 convert_date_fields(record, ["record_date"], tz)
244 records.append(record)
245 return {
246 **header,
247 **balance,
248 "record_datetime": datetime.combine(header["record_date"], balance["record_time"], tzinfo=tz), # type: ignore
249 "records": records,
250 }