Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

from collections import namedtuple 

 

from ..exceptions import LocationParseError 

 

 

class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])): 

    """ 

    Datastructure for representing an HTTP URL. Used as a return value for 

    :func:`parse_url`. 

    """ 

    slots = () 

 

    def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None): 

        return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment) 

 

    @property 

    def hostname(self): 

        """For backwards-compatibility with urlparse. We're nice like that.""" 

        return self.host 

 

    @property 

    def request_uri(self): 

        """Absolute path including the query string.""" 

        uri = self.path or '/' 

 

        if self.query is not None: 

            uri += '?' + self.query 

 

        return uri 

 

    @property 

    def netloc(self): 

        """Network location including host and port""" 

        if self.port: 

            return '%s:%d' % (self.host, self.port) 

        return self.host 

 

 

def split_first(s, delims): 

    """ 

    Given a string and an iterable of delimiters, split on the first found 

    delimiter. Return two split parts and the matched delimiter. 

 

    If not found, then the first part is the full input string. 

 

    Example: :: 

 

        >>> split_first('foo/bar?baz', '?/=') 

        ('foo', 'bar?baz', '/') 

        >>> split_first('foo/bar?baz', '123') 

        ('foo/bar?baz', '', None) 

 

    Scales linearly with number of delims. Not ideal for large number of delims. 

    """ 

    min_idx = None 

    min_delim = None 

    for d in delims: 

        idx = s.find(d) 

        if idx < 0: 

            continue 

 

        if min_idx is None or idx < min_idx: 

            min_idx = idx 

            min_delim = d 

 

    if min_idx is None or min_idx < 0: 

        return s, '', None 

 

    return s[:min_idx], s[min_idx+1:], min_delim 

 

 

def parse_url(url): 

    """ 

    Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is 

    performed to parse incomplete urls. Fields not provided will be None. 

 

    Partly backwards-compatible with :mod:`urlparse`. 

 

    Example: :: 

 

        >>> parse_url('http://google.com/mail/') 

        Url(scheme='http', host='google.com', port=None, path='/', ...) 

        >>> parse_url('google.com:80') 

        Url(scheme=None, host='google.com', port=80, path=None, ...) 

        >>> parse_url('/foo?bar') 

        Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) 

    """ 

 

    # While this code has overlap with stdlib's urlparse, it is much 

    # simplified for our needs and less annoying. 

    # Additionally, this implementations does silly things to be optimal 

    # on CPython. 

 

    scheme = None 

    auth = None 

    host = None 

    port = None 

    path = None 

    fragment = None 

    query = None 

 

    # Scheme 

    if '://' in url: 

        scheme, url = url.split('://', 1) 

 

    # Find the earliest Authority Terminator 

    # (http://tools.ietf.org/html/rfc3986#section-3.2) 

    url, path_, delim = split_first(url, ['/', '?', '#']) 

 

    if delim: 

        # Reassemble the path 

        path = delim + path_ 

 

    # Auth 

    if '@' in url: 

        # Last '@' denotes end of auth part 

        auth, url = url.rsplit('@', 1) 

 

    # IPv6 

    if url and url[0] == '[': 

        host, url = url.split(']', 1) 

        host += ']' 

 

    # Port 

    if ':' in url: 

        _host, port = url.split(':', 1) 

 

        if not host: 

            host = _host 

 

        if port: 

            # If given, ports must be integers. 

            if not port.isdigit(): 

                raise LocationParseError(url) 

            port = int(port) 

        else: 

            # Blank ports are cool, too. (rfc3986#section-3.2.3) 

            port = None 

 

    elif not host and url: 

        host = url 

 

    if not path: 

        return Url(scheme, auth, host, port, path, query, fragment) 

 

    # Fragment 

    if '#' in path: 

        path, fragment = path.split('#', 1) 

 

    # Query 

    if '?' in path: 

        path, query = path.split('?', 1) 

 

    return Url(scheme, auth, host, port, path, query, fragment) 

 

 

def get_host(url): 

    """ 

    Deprecated. Use :func:`.parse_url` instead. 

    """ 

    p = parse_url(url) 

    return p.scheme or 'http', p.hostname, p.port