aboutsummaryrefslogtreecommitdiff
path: root/plugins/my_typogrify/mytypogrify.py
blob: 490e82b41521488e94e9350d3880513322bec540 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from pelican import signals
import typogrify.filters as filters
import re

def french_insecable(text):
    """Replace the space between each double sign punctuation by a thin
    non-breaking space.

    This conform with the french typographic rules.

    >>> french_insecable('Foo !')
    u'Foo<span style="white-space:nowrap">&thinsp;</span>!'

    >>> french_insecable('Foo ?')
    u'Foo<span style="white-space:nowrap">&thinsp;</span>?'

    >>> french_insecable('Foo : bar')
    u'Foo<span style="white-space:nowrap">&thinsp;</span>: bar'

    >>> french_insecable('Foo ; bar')
    u'Foo<span style="white-space:nowrap">&thinsp;</span>; bar'

    >>> french_insecable(u'\xab bar \xbb')
    u'\\xab<span style="white-space:nowrap">&thinsp;</span>bar<span style="white-space:nowrap">&thinsp;</span>\\xbb'

    >>> french_insecable('123 456')
    u'123<span style="white-space:nowrap">&thinsp;</span>456'

    >>> french_insecable('123 %')
    u'123<span style="white-space:nowrap">&thinsp;</span>%'

    Space inside attributes should be preserved :

    >>> french_insecable('<a title="foo !">')
    '<a title="foo !">'
    """

    tag_pattern = '</?\w+((\s+\w+(\s*=\s*(?:".*?"|\'.*?\'|[^\'">\s]+))?)+\s*|\s*)/?>'
    intra_tag_finder = re.compile(r'(?P<prefix>(%s)?)(?P<text>([^<]*))(?P<suffix>(%s)?)' % (tag_pattern, tag_pattern))

    nnbsp = u'<span style="white-space:nowrap">&thinsp;</span>'
    space_finder = re.compile(r"""(?:
                            (\w\s[:;!\?\xbb])|       # Group 1, space before punctuation
                            ([\xab]\s\w)|
                            ([0-9]\s[0-9])|
                            ([0-9]\s\%)
                            )""", re.VERBOSE)

    def _insecable_wrapper(groups):
        """This is necessary to keep dotted cap strings to pick up extra spaces"""
        def substitute(matchobj):
            return matchobj.group(0).replace(" ", nnbsp)

        prefix = groups.group('prefix') or ''
        text = space_finder.sub(substitute, groups.group('text'))
        suffix = groups.group('suffix') or ''
        return prefix + text + suffix

    output = intra_tag_finder.sub(_insecable_wrapper, text)
    return output

def typogrify(text, ignore_tags=None):
    """The super typography filter

        Applies filters to text that are not in tags contained in the
        ignore_tags list.
    """

    section_list = filters.process_ignores(text, ignore_tags)

    rendered_text = ""
    for text_item, should_process in section_list:
        if should_process:
            rendered_text += french_insecable(filters.applyfilters(text_item))
        else:
            rendered_text += text_item

    # apply widont at the end, as its already smart about tags. Hopefully.
    return filters.widont(rendered_text)

def apply(data):

    if not data._content:
        return


    data._content = typogrify(data._content)

    metadata = data.metadata
    metadata['title'] = typogrify(metadata['title'])

def register():
    signals.content_object_init.connect(apply)