Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2module for generating and serializing xml and html structures 

3by using simple python objects. 

4 

5(c) holger krekel, holger at merlinux eu. 2009 

6""" 

7import sys, re 

8 

9if sys.version_info >= (3,0): 

10 def u(s): 

11 return s 

12 def unicode(x, errors=None): 

13 if hasattr(x, '__unicode__'): 

14 return x.__unicode__() 

15 return str(x) 

16else: 

17 def u(s): 

18 return unicode(s) 

19 unicode = unicode 

20 

21 

22class NamespaceMetaclass(type): 

23 def __getattr__(self, name): 

24 if name[:1] == '_': 

25 raise AttributeError(name) 

26 if self == Namespace: 

27 raise ValueError("Namespace class is abstract") 

28 tagspec = self.__tagspec__ 

29 if tagspec is not None and name not in tagspec: 

30 raise AttributeError(name) 

31 classattr = {} 

32 if self.__stickyname__: 

33 classattr['xmlname'] = name 

34 cls = type(name, (self.__tagclass__,), classattr) 

35 setattr(self, name, cls) 

36 return cls 

37 

38class Tag(list): 

39 class Attr(object): 

40 def __init__(self, **kwargs): 

41 self.__dict__.update(kwargs) 

42 

43 def __init__(self, *args, **kwargs): 

44 super(Tag, self).__init__(args) 

45 self.attr = self.Attr(**kwargs) 

46 

47 def __unicode__(self): 

48 return self.unicode(indent=0) 

49 __str__ = __unicode__ 

50 

51 def unicode(self, indent=2): 

52 l = [] 

53 SimpleUnicodeVisitor(l.append, indent).visit(self) 

54 return u("").join(l) 

55 

56 def __repr__(self): 

57 name = self.__class__.__name__ 

58 return "<%r tag object %d>" % (name, id(self)) 

59 

60Namespace = NamespaceMetaclass('Namespace', (object, ), { 

61 '__tagspec__': None, 

62 '__tagclass__': Tag, 

63 '__stickyname__': False, 

64}) 

65 

66class HtmlTag(Tag): 

67 def unicode(self, indent=2): 

68 l = [] 

69 HtmlVisitor(l.append, indent, shortempty=False).visit(self) 

70 return u("").join(l) 

71 

72# exported plain html namespace 

73class html(Namespace): 

74 __tagclass__ = HtmlTag 

75 __stickyname__ = True 

76 __tagspec__ = dict([(x,1) for x in ( 

77 'a,abbr,acronym,address,applet,area,article,aside,audio,b,' 

78 'base,basefont,bdi,bdo,big,blink,blockquote,body,br,button,' 

79 'canvas,caption,center,cite,code,col,colgroup,command,comment,' 

80 'datalist,dd,del,details,dfn,dir,div,dl,dt,em,embed,' 

81 'fieldset,figcaption,figure,footer,font,form,frame,frameset,h1,' 

82 'h2,h3,h4,h5,h6,head,header,hgroup,hr,html,i,iframe,img,input,' 

83 'ins,isindex,kbd,keygen,label,legend,li,link,listing,map,mark,' 

84 'marquee,menu,meta,meter,multicol,nav,nobr,noembed,noframes,' 

85 'noscript,object,ol,optgroup,option,output,p,param,pre,progress,' 

86 'q,rp,rt,ruby,s,samp,script,section,select,small,source,span,' 

87 'strike,strong,style,sub,summary,sup,table,tbody,td,textarea,' 

88 'tfoot,th,thead,time,title,tr,track,tt,u,ul,xmp,var,video,wbr' 

89 ).split(',') if x]) 

90 

91 class Style(object): 

92 def __init__(self, **kw): 

93 for x, y in kw.items(): 

94 x = x.replace('_', '-') 

95 setattr(self, x, y) 

96 

97 

98class raw(object): 

99 """just a box that can contain a unicode string that will be 

100 included directly in the output""" 

101 def __init__(self, uniobj): 

102 self.uniobj = uniobj 

103 

104class SimpleUnicodeVisitor(object): 

105 """ recursive visitor to write unicode. """ 

106 def __init__(self, write, indent=0, curindent=0, shortempty=True): 

107 self.write = write 

108 self.cache = {} 

109 self.visited = {} # for detection of recursion 

110 self.indent = indent 

111 self.curindent = curindent 

112 self.parents = [] 

113 self.shortempty = shortempty # short empty tags or not 

114 

115 def visit(self, node): 

116 """ dispatcher on node's class/bases name. """ 

117 cls = node.__class__ 

118 try: 

119 visitmethod = self.cache[cls] 

120 except KeyError: 

121 for subclass in cls.__mro__: 

122 visitmethod = getattr(self, subclass.__name__, None) 

123 if visitmethod is not None: 

124 break 

125 else: 

126 visitmethod = self.__object 

127 self.cache[cls] = visitmethod 

128 visitmethod(node) 

129 

130 # the default fallback handler is marked private 

131 # to avoid clashes with the tag name object 

132 def __object(self, obj): 

133 #self.write(obj) 

134 self.write(escape(unicode(obj))) 

135 

136 def raw(self, obj): 

137 self.write(obj.uniobj) 

138 

139 def list(self, obj): 

140 assert id(obj) not in self.visited 

141 self.visited[id(obj)] = 1 

142 for elem in obj: 

143 self.visit(elem) 

144 

145 def Tag(self, tag): 

146 assert id(tag) not in self.visited 

147 try: 

148 tag.parent = self.parents[-1] 

149 except IndexError: 

150 tag.parent = None 

151 self.visited[id(tag)] = 1 

152 tagname = getattr(tag, 'xmlname', tag.__class__.__name__) 

153 if self.curindent and not self._isinline(tagname): 

154 self.write("\n" + u(' ') * self.curindent) 

155 if tag: 

156 self.curindent += self.indent 

157 self.write(u('<%s%s>') % (tagname, self.attributes(tag))) 

158 self.parents.append(tag) 

159 for x in tag: 

160 self.visit(x) 

161 self.parents.pop() 

162 self.write(u('</%s>') % tagname) 

163 self.curindent -= self.indent 

164 else: 

165 nameattr = tagname+self.attributes(tag) 

166 if self._issingleton(tagname): 

167 self.write(u('<%s/>') % (nameattr,)) 

168 else: 

169 self.write(u('<%s></%s>') % (nameattr, tagname)) 

170 

171 def attributes(self, tag): 

172 # serialize attributes 

173 attrlist = dir(tag.attr) 

174 attrlist.sort() 

175 l = [] 

176 for name in attrlist: 

177 res = self.repr_attribute(tag.attr, name) 

178 if res is not None: 

179 l.append(res) 

180 l.extend(self.getstyle(tag)) 

181 return u("").join(l) 

182 

183 def repr_attribute(self, attrs, name): 

184 if name[:2] != '__': 

185 value = getattr(attrs, name) 

186 if name.endswith('_'): 

187 name = name[:-1] 

188 if isinstance(value, raw): 

189 insert = value.uniobj 

190 else: 

191 insert = escape(unicode(value)) 

192 return ' %s="%s"' % (name, insert) 

193 

194 def getstyle(self, tag): 

195 """ return attribute list suitable for styling. """ 

196 try: 

197 styledict = tag.style.__dict__ 

198 except AttributeError: 

199 return [] 

200 else: 

201 stylelist = [x+': ' + y for x,y in styledict.items()] 

202 return [u(' style="%s"') % u('; ').join(stylelist)] 

203 

204 def _issingleton(self, tagname): 

205 """can (and will) be overridden in subclasses""" 

206 return self.shortempty 

207 

208 def _isinline(self, tagname): 

209 """can (and will) be overridden in subclasses""" 

210 return False 

211 

212class HtmlVisitor(SimpleUnicodeVisitor): 

213 

214 single = dict([(x, 1) for x in 

215 ('br,img,area,param,col,hr,meta,link,base,' 

216 'input,frame').split(',')]) 

217 inline = dict([(x, 1) for x in 

218 ('a abbr acronym b basefont bdo big br cite code dfn em font ' 

219 'i img input kbd label q s samp select small span strike ' 

220 'strong sub sup textarea tt u var'.split(' '))]) 

221 

222 def repr_attribute(self, attrs, name): 

223 if name == 'class_': 

224 value = getattr(attrs, name) 

225 if value is None: 

226 return 

227 return super(HtmlVisitor, self).repr_attribute(attrs, name) 

228 

229 def _issingleton(self, tagname): 

230 return tagname in self.single 

231 

232 def _isinline(self, tagname): 

233 return tagname in self.inline 

234 

235 

236class _escape: 

237 def __init__(self): 

238 self.escape = { 

239 u('"') : u('&quot;'), u('<') : u('&lt;'), u('>') : u('&gt;'), 

240 u('&') : u('&amp;'), u("'") : u('&apos;'), 

241 } 

242 self.charef_rex = re.compile(u("|").join(self.escape.keys())) 

243 

244 def _replacer(self, match): 

245 return self.escape[match.group(0)] 

246 

247 def __call__(self, ustring): 

248 """ xml-escape the given unicode string. """ 

249 try: 

250 ustring = unicode(ustring) 

251 except UnicodeDecodeError: 

252 ustring = unicode(ustring, 'utf-8', errors='replace') 

253 return self.charef_rex.sub(self._replacer, ustring) 

254 

255escape = _escape()