-
Notifications
You must be signed in to change notification settings - Fork 102
/
Copy pathexcerpt.py
109 lines (91 loc) · 4.02 KB
/
excerpt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
Generate an excerpt from either the summary or a content of an entry.
Parameters:
width: maximum number of characters in the excerpt. Default: 500
omit: whitespace delimited list of html tags to remove. Default: none
target: name of element created. Default: planet:excerpt
Notes:
* if 'img' is in the list of tags to be omitted <img> tags are replaced with
hypertext links associated with the value of the 'alt' attribute. If there
is no alt attribute value, <img> is used instead. If the parent element
of the img tag is already an <a> tag, no additional hypertext links are
added.
"""
import sys, xml.dom.minidom, textwrap
from xml.dom import Node, minidom
atomNS = 'http://www.w3.org/2005/Atom'
planetNS = 'http://planet.intertwingly.net/'
args = dict(zip([name.lstrip('-') for name in sys.argv[1::2]], sys.argv[2::2]))
wrapper = textwrap.TextWrapper(width=int(args.get('width','500')))
omit = args.get('omit', '').split()
target = args.get('target', 'planet:excerpt')
class copy:
""" recursively copy a source to a target, up to a given width """
def __init__(self, dom, source, target):
self.dom = dom
self.full = False
self.text = []
self.textlen = 0
self.copyChildren(source, target)
def copyChildren(self, source, target):
""" copy child nodes of a source to the target """
for child in source.childNodes:
if child.nodeType == Node.ELEMENT_NODE:
self.copyElement(child, target)
elif child.nodeType == Node.TEXT_NODE:
self.copyText(child.data, target)
if self.full: break
def copyElement(self, source, target):
""" copy source element to the target """
# check the omit list
if source.nodeName in omit:
if source.nodeName == 'img':
return self.elideImage(source, target)
return self.copyChildren(source, target)
# copy element, attributes, and children
child = self.dom.createElementNS(source.namespaceURI, source.nodeName)
target.appendChild(child)
for i in range(0, source.attributes.length):
attr = source.attributes.item(i)
child.setAttributeNS(attr.namespaceURI, attr.name, attr.value)
self.copyChildren(source, child)
def elideImage(self, source, target):
""" copy an elided form of the image element to the target """
alt = source.getAttribute('alt') or '<img>'
src = source.getAttribute('src')
if target.nodeName == 'a' or not src:
self.copyText(alt, target)
else:
child = self.dom.createElement('a')
child.setAttribute('href', src)
self.copyText(alt, child)
target.appendChild(child)
def copyText(self, source, target):
""" copy text to the target, until the point where it would wrap """
if not source.isspace() and source.strip():
self.text.append(source.strip())
lines = wrapper.wrap(' '.join(self.text))
if len(lines) == 1:
target.appendChild(self.dom.createTextNode(source))
self.textlen = len(lines[0])
elif lines:
excerpt = source[:len(lines[0])-self.textlen] + u' \u2026'
target.appendChild(dom.createTextNode(excerpt))
self.full = True
# select summary or content element
dom = minidom.parse(sys.stdin)
source = dom.getElementsByTagNameNS(atomNS, 'summary')
if not source:
source = dom.getElementsByTagNameNS(atomNS, 'content')
# if present, recursively copy it to a planet:excerpt element
if source:
if target.startswith('planet:'):
dom.documentElement.setAttribute('xmlns:planet', planetNS)
if target.startswith('atom:'): target = target.split(':',1)[1]
excerpt = dom.createElementNS(planetNS, target)
source[0].parentNode.appendChild(excerpt)
copy(dom, source[0], excerpt)
if source[0].nodeName == excerpt.nodeName:
source[0].parentNode.removeChild(source[0])
# print out results
print dom.toxml('utf-8')