Source code for schematics.types.net

# -*- coding: utf-8 -*-

from __future__ import unicode_literals, absolute_import

import encodings.idna
import random
import re

try: # PY3
    from urllib.request import urlopen
    from urllib.parse import urlunsplit, quote as urlquote
    from urllib.error import URLError
except ImportError: # PY2
    from urllib2 import urlopen, URLError
    from urlparse import urlunsplit
    from urllib import quote as urlquote

from ..common import * # pylint: disable=redefined-builtin
from ..exceptions import ValidationError, StopValidationError

from .base import StringType, fill_template


### Character ranges

HEX      = '0-9A-F'
ALPHA    = 'A-Z'
ALPHANUM = 'A-Z0-9'


### IP address patterns

IPV4_OCTET = '( 25[0-5] | 2[0-4][0-9] | [0-1]?[0-9]{1,2} )'
IPV4 = r'( ((%(oct)s\.){3} %(oct)s) )' % {'oct': IPV4_OCTET}

IPV6_H16 = '[%s]{1,4}' % HEX
IPV6_L32 = '(%(h16)s:%(h16)s|%(ipv4)s)' % {'h16': IPV6_H16, 'ipv4': IPV4}
IPV6 = r"""(
                                    (%(h16)s:){6}%(l32)s  |
                                ::  (%(h16)s:){5}%(l32)s  |
    (               %(h16)s )?  ::  (%(h16)s:){4}%(l32)s  |
    ( (%(h16)s:){,1}%(h16)s )?  ::  (%(h16)s:){3}%(l32)s  |
    ( (%(h16)s:){,2}%(h16)s )?  ::  (%(h16)s:){2}%(l32)s  |
    ( (%(h16)s:){,3}%(h16)s )?  ::  (%(h16)s:){1}%(l32)s  |
    ( (%(h16)s:){,4}%(h16)s )?  ::               %(l32)s  |
    ( (%(h16)s:){,5}%(h16)s )?  ::               %(h16)s  |
    ( (%(h16)s:){,6}%(h16)s )?  :: )""" % {'h16': IPV6_H16,
                                           'l32': IPV6_L32}


class IPAddressType(StringType):

    VERSION = None
    REGEX = re.compile('^%s|%s$' % (IPV4, IPV6), re.I + re.X)

    @classmethod
    def valid_ip(cls, value):
        return bool(cls.REGEX.match(value))

    def validate_(self, value, context=None):
        if not self.valid_ip(value):
            raise ValidationError('Invalid IP%s address' % (self.VERSION or ''))


class IPv4Type(IPAddressType):
    """A field that stores a valid IPv4 address."""

    VERSION = 'v4'
    REGEX = re.compile('^%s$' % IPV4, re.I + re.X)

    def _mock(self, context=None):
        return '.'.join(str(random.randrange(256)) for _ in range(4))


class IPv6Type(IPAddressType):
    """A field that stores a valid IPv6 address."""

    VERSION = 'v6'
    REGEX = re.compile('^%s$' % IPV6, re.I + re.X)

    def _mock(self, context=None):
        return '.'.join(str(random.randrange(256)) for _ in range(4))


### URI patterns

GEN_DELIMS = set(':/?#[]@')
SUB_DELIMS = set('!$&\'()*+,;=')
UNRESERVED = set('-_.~')
PCHAR = SUB_DELIMS | UNRESERVED | set('%:@')
QUERY_EXTRAS = set('[]') # nonstandard

VALID_CHARS = GEN_DELIMS | SUB_DELIMS | UNRESERVED | set('%')
VALID_CHAR_STRING = py_native_string(str.join('', VALID_CHARS))
UNSAFE_CHAR_STRING = '\x00-\x20<>{}|"`\\^\x7F-\x9F'

def _chrcls(allowed_chars):
    """
    Given a subset of the URL-compatible special characters ``!#$%&'()*+,-./:;=?@[]_~``,
    returns a regex character class matching any URL-compatible character apart from the
    special characters not present in the provided set.
    """
    return ('^'
            + UNSAFE_CHAR_STRING
            + str.join('', VALID_CHARS - allowed_chars).replace('%', '%%')
                                                       .replace(']', r'\]')
                                                       .replace('-', r'\-'))

URI_PATTERNS = {
    'scheme' : r'[%s]+' % ('A-Z0-9.+-'),
    'user'   : r'[%s]+' % _chrcls(UNRESERVED | SUB_DELIMS | set('%:')),
    'port'   : r'[0-9]{2,5}',
    'host4'  : IPV4,
    'host6'  : r'[%s]+' % (HEX + ':'),
    'hostn'  : r'[%s]+' % _chrcls(set('.-')),
    'path'   : r'[%s]*' % _chrcls(PCHAR | set('/')),
    'query'  : r'[%s]*' % _chrcls(PCHAR | set('/?') | QUERY_EXTRAS),
    'frag'   : r'[%s]*' % _chrcls(PCHAR | set('/?')),
}


class URLType(StringType):

    """A field that validates the input as a URL.

    If ``verify_exists=True``, the validation function will make sure
    the URL is accessible (server responds with HTTP 2xx).
    """

    MESSAGES = {
        'invalid_url': "Not a well-formed URL.",
        'not_found': "URL could not be retrieved.",
    }

    URL_REGEX = re.compile(r"""^(
            (?P<scheme> %(scheme)s ) ://
        (   (?P<user>   %(user)s   ) @   )?
        (\[ (?P<host6>  %(host6)s  ) ]
          | (?P<host4>  %(host4)s  )
          | (?P<hostn>  %(hostn)s  )     )
        ( : (?P<port>   %(port)s   )     )?
            (?P<path> / %(path)s   )?
        (\? (?P<query>  %(query)s  )     )?
        (\# (?P<frag>   %(frag)s   )     )?)$
        """ % URI_PATTERNS, re.I + re.X)

    TLD_REGEX = re.compile(r'^( ([a-z]{2,}) | (xn--[a-z0-9]{4,}) )$', re.I + re.X)

    def __init__(self, fqdn=True, verify_exists=False, **kwargs):
        self.schemes = ['http', 'https']
        self.fqdn = fqdn
        self.verify_exists = verify_exists
        super(URLType, self).__init__(**kwargs)

    def _mock(self, context=None):
        return fill_template('http://a%s.ZZ', self.min_length, self.max_length)

    def valid_url(self, value):
        match = self.URL_REGEX.match(value)
        if not match:
            return False
        url = match.groupdict()

        if url['scheme'].lower() not in self.schemes:
            return False
        if url['host6']:
            if IPv6Type.valid_ip(url['host6']):
                return url
            else:
                return False
        if url['host4']:
            return url

        try:
            hostname = url['hostn'].encode('ascii').decode('ascii')
        except UnicodeError:
            try:
                hostname = url['hostn'].encode('idna').decode('ascii')
            except UnicodeError:
                return False

        if hostname[-1] == '.':
            hostname = hostname[:-1]
        if len(hostname) > 253:
            return False

        labels = hostname.split('.')
        for label in labels:
            if not 0 < len(label) < 64:
                return False
            if '-' in (label[0], label[-1]):
                return False
        if self.fqdn:
            if len(labels) == 1 \
              or not self.TLD_REGEX.match(labels[-1]):
                return False

        url['hostn_enc'] = hostname

        return url

    def validate_(self, value, context=None):
        url = self.valid_url(value)
        if not url:
            raise StopValidationError(self.messages['invalid_url'])
        if self.verify_exists:
            url_string = urlquote(urlunsplit((
                url['scheme'],
                (url['host6'] or url['host4'] or url['hostn_enc']) + ':' + (url['port'] or ''),
                url['path'],
                url['query'],
                url['frag'])
                ).encode('utf-8'), safe=VALID_CHAR_STRING)
            try:
                urlopen(url_string)
            except URLError:
                raise StopValidationError(self.messages['not_found'])


class EmailType(StringType):

    """A field that validates input as an E-Mail-Address.
    """

    MESSAGES = {
        'email': "Not a well-formed email address."
    }

    EMAIL_REGEX = re.compile(r"""^(
        ( ( [%(atext)s]+ (\.[%(atext)s]+)* ) | ("( [%(qtext)s\s] | \\[%(vchar)s\s] )*") )
        @((?!-)[A-Z0-9-]{1,63}(?<!-)\.)+[A-Z]{2,63})$"""
        % {
            'atext': '-A-Z0-9!#$%&\'*+/=?^_`{|}~',
            'qtext': '\x21\x23-\x5B\\\x5D-\x7E',
            'vchar': '\x21-\x7E'
        },
        re.I + re.X)

    def _mock(self, context=None):
        return fill_template('%s@example.com', self.min_length,
                             self.max_length)

    def validate_email(self, value, context=None):
        if not EmailType.EMAIL_REGEX.match(value):
            raise StopValidationError(self.messages['email'])


__all__ = module_exports(__name__)