Source code for pydateparser.date_parser

""""
Date Parser Adapter.

"""

import attr
from ._loggers import logger
from collections import namedtuple
from .date_formats import DateFormats
from ._errors import DateParserException
from ._utils import _date_format_handler
from ._core_date_parser import CoreDateParser
from ._validators import _positive_integer_validator
from ._validators import _date_format_type_validator, _end_year_validator


_attributes = {'text': attr.ib(validator=attr.validators.instance_of(str)),
               'start_year': attr.ib(validator=[attr.validators.instance_of(int),
                                                _positive_integer_validator]),
               'end_year': attr.ib(validator=[attr.validators.instance_of(int),
                                              _end_year_validator,
                                              _positive_integer_validator]),
               'locale': attr.ib(default=None,
                                 validator=_date_format_type_validator,
                                 converter=_date_format_handler)}


[docs]@attr.s(slots=True, these=_attributes) class DateParser: """ CoreDateParser Adapter class. Parameters ---------- text: str a string/text document from which we can extract dates. start_year: int define the start year from which to look for the date. end_year: int define the end year from which to look for the date. locale: None, str, list define the type of dateformat(currently supports 'USA', 'EU'), default is None. or pass your own list of patterns. Returns ------- list list of `DATE` objects. Note ---- DATE is a namedtuple, which gives out the actual extracted `date`, `token_span`, `token_index` and `format` (matched format) items. """ @staticmethod def _format_date(date_object): _date = namedtuple( "DATE", ["date", "token_span", "token_index", "format"]) return _date(date_object[0], (date_object[1], date_object[2]), (date_object[4], date_object[5]), date_object[3]) @staticmethod def _parser(text, start_year, end_year, locale, formatter): DP = CoreDateParser(locale, start_year=start_year, end_year=end_year) try: logger.info('Extracting dates from the text.') dt = DP.parse_string(text) _dt = [formatter(i) for i in dt] return _dt except Exception: return None @property def date(self): return self._parser(self.text, self.start_year, self.end_year, self.locale, self._format_date)