blob: 490e82b41521488e94e9350d3880513322bec540 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from pelican import signals
import typogrify.filters as filters
import re
def french_insecable(text):
"""Replace the space between each double sign punctuation by a thin
non-breaking space.
This conform with the french typographic rules.
>>> french_insecable('Foo !')
u'Foo<span style="white-space:nowrap"> </span>!'
>>> french_insecable('Foo ?')
u'Foo<span style="white-space:nowrap"> </span>?'
>>> french_insecable('Foo : bar')
u'Foo<span style="white-space:nowrap"> </span>: bar'
>>> french_insecable('Foo ; bar')
u'Foo<span style="white-space:nowrap"> </span>; bar'
>>> french_insecable(u'\xab bar \xbb')
u'\\xab<span style="white-space:nowrap"> </span>bar<span style="white-space:nowrap"> </span>\\xbb'
>>> french_insecable('123 456')
u'123<span style="white-space:nowrap"> </span>456'
>>> french_insecable('123 %')
u'123<span style="white-space:nowrap"> </span>%'
Space inside attributes should be preserved :
>>> french_insecable('<a title="foo !">')
'<a title="foo !">'
"""
tag_pattern = '</?\w+((\s+\w+(\s*=\s*(?:".*?"|\'.*?\'|[^\'">\s]+))?)+\s*|\s*)/?>'
intra_tag_finder = re.compile(r'(?P<prefix>(%s)?)(?P<text>([^<]*))(?P<suffix>(%s)?)' % (tag_pattern, tag_pattern))
nnbsp = u'<span style="white-space:nowrap"> </span>'
space_finder = re.compile(r"""(?:
(\w\s[:;!\?\xbb])| # Group 1, space before punctuation
([\xab]\s\w)|
([0-9]\s[0-9])|
([0-9]\s\%)
)""", re.VERBOSE)
def _insecable_wrapper(groups):
"""This is necessary to keep dotted cap strings to pick up extra spaces"""
def substitute(matchobj):
return matchobj.group(0).replace(" ", nnbsp)
prefix = groups.group('prefix') or ''
text = space_finder.sub(substitute, groups.group('text'))
suffix = groups.group('suffix') or ''
return prefix + text + suffix
output = intra_tag_finder.sub(_insecable_wrapper, text)
return output
def typogrify(text, ignore_tags=None):
"""The super typography filter
Applies filters to text that are not in tags contained in the
ignore_tags list.
"""
section_list = filters.process_ignores(text, ignore_tags)
rendered_text = ""
for text_item, should_process in section_list:
if should_process:
rendered_text += french_insecable(filters.applyfilters(text_item))
else:
rendered_text += text_item
# apply widont at the end, as its already smart about tags. Hopefully.
return filters.widont(rendered_text)
def apply(data):
if not data._content:
return
data._content = typogrify(data._content)
metadata = data.metadata
metadata['title'] = typogrify(metadata['title'])
def register():
signals.content_object_init.connect(apply)
|