aboutsummaryrefslogtreecommitdiff
path: root/plugins/my_typogrify/mytypogrify.py
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/my_typogrify/mytypogrify.py')
-rwxr-xr-xplugins/my_typogrify/mytypogrify.py96
1 files changed, 96 insertions, 0 deletions
diff --git a/plugins/my_typogrify/mytypogrify.py b/plugins/my_typogrify/mytypogrify.py
new file mode 100755
index 0000000..490e82b
--- /dev/null
+++ b/plugins/my_typogrify/mytypogrify.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from pelican import signals
+import typogrify.filters as filters
+import re
+
+def french_insecable(text):
+ """Replace the space between each double sign punctuation by a thin
+ non-breaking space.
+
+ This conform with the french typographic rules.
+
+ >>> french_insecable('Foo !')
+ u'Foo<span style="white-space:nowrap">&thinsp;</span>!'
+
+ >>> french_insecable('Foo ?')
+ u'Foo<span style="white-space:nowrap">&thinsp;</span>?'
+
+ >>> french_insecable('Foo : bar')
+ u'Foo<span style="white-space:nowrap">&thinsp;</span>: bar'
+
+ >>> french_insecable('Foo ; bar')
+ u'Foo<span style="white-space:nowrap">&thinsp;</span>; bar'
+
+ >>> french_insecable(u'\xab bar \xbb')
+ u'\\xab<span style="white-space:nowrap">&thinsp;</span>bar<span style="white-space:nowrap">&thinsp;</span>\\xbb'
+
+ >>> french_insecable('123 456')
+ u'123<span style="white-space:nowrap">&thinsp;</span>456'
+
+ >>> french_insecable('123 %')
+ u'123<span style="white-space:nowrap">&thinsp;</span>%'
+
+ Space inside attributes should be preserved :
+
+ >>> french_insecable('<a title="foo !">')
+ '<a title="foo !">'
+ """
+
+ tag_pattern = '</?\w+((\s+\w+(\s*=\s*(?:".*?"|\'.*?\'|[^\'">\s]+))?)+\s*|\s*)/?>'
+ intra_tag_finder = re.compile(r'(?P<prefix>(%s)?)(?P<text>([^<]*))(?P<suffix>(%s)?)' % (tag_pattern, tag_pattern))
+
+ nnbsp = u'<span style="white-space:nowrap">&thinsp;</span>'
+ space_finder = re.compile(r"""(?:
+ (\w\s[:;!\?\xbb])| # Group 1, space before punctuation
+ ([\xab]\s\w)|
+ ([0-9]\s[0-9])|
+ ([0-9]\s\%)
+ )""", re.VERBOSE)
+
+ def _insecable_wrapper(groups):
+ """This is necessary to keep dotted cap strings to pick up extra spaces"""
+ def substitute(matchobj):
+ return matchobj.group(0).replace(" ", nnbsp)
+
+ prefix = groups.group('prefix') or ''
+ text = space_finder.sub(substitute, groups.group('text'))
+ suffix = groups.group('suffix') or ''
+ return prefix + text + suffix
+
+ output = intra_tag_finder.sub(_insecable_wrapper, text)
+ return output
+
+def typogrify(text, ignore_tags=None):
+ """The super typography filter
+
+ Applies filters to text that are not in tags contained in the
+ ignore_tags list.
+ """
+
+ section_list = filters.process_ignores(text, ignore_tags)
+
+ rendered_text = ""
+ for text_item, should_process in section_list:
+ if should_process:
+ rendered_text += french_insecable(filters.applyfilters(text_item))
+ else:
+ rendered_text += text_item
+
+ # apply widont at the end, as its already smart about tags. Hopefully.
+ return filters.widont(rendered_text)
+
+def apply(data):
+
+ if not data._content:
+ return
+
+
+ data._content = typogrify(data._content)
+
+ metadata = data.metadata
+ metadata['title'] = typogrify(metadata['title'])
+
+def register():
+ signals.content_object_init.connect(apply)