#!/usr/bin/env python
# -*- coding: utf-8 -*-
from pelican import signals
import typogrify.filters as filters
import re
def french_insecable(text):
"""Replace the space between each double sign punctuation by a thin
non-breaking space.
This conform with the french typographic rules.
>>> french_insecable('Foo !')
u'Foo !'
>>> french_insecable('Foo ?')
u'Foo ?'
>>> french_insecable('Foo : bar')
u'Foo : bar'
>>> french_insecable('Foo ; bar')
u'Foo ; bar'
>>> french_insecable(u'\xab bar \xbb')
u'\\xab bar \\xbb'
>>> french_insecable('123 456')
u'123 456'
>>> french_insecable('123 %')
u'123 %'
Space inside attributes should be preserved :
>>> french_insecable('')
''
"""
tag_pattern = '?\w+((\s+\w+(\s*=\s*(?:".*?"|\'.*?\'|[^\'">\s]+))?)+\s*|\s*)/?>'
intra_tag_finder = re.compile(r'(?P(%s)?)(?P([^<]*))(?P(%s)?)' % (tag_pattern, tag_pattern))
nnbsp = u' '
space_finder = re.compile(r"""(?:
(\w\s[:;!\?\xbb])| # Group 1, space before punctuation
([\xab]\s\w)|
([0-9]\s[0-9])|
([0-9]\s\%)
)""", re.VERBOSE)
def _insecable_wrapper(groups):
"""This is necessary to keep dotted cap strings to pick up extra spaces"""
def substitute(matchobj):
return matchobj.group(0).replace(" ", nnbsp)
prefix = groups.group('prefix') or ''
text = space_finder.sub(substitute, groups.group('text'))
suffix = groups.group('suffix') or ''
return prefix + text + suffix
output = intra_tag_finder.sub(_insecable_wrapper, text)
return output
def typogrify(text, ignore_tags=None):
"""The super typography filter
Applies filters to text that are not in tags contained in the
ignore_tags list.
"""
section_list = filters.process_ignores(text, ignore_tags)
rendered_text = ""
for text_item, should_process in section_list:
if should_process:
rendered_text += french_insecable(filters.applyfilters(text_item))
else:
rendered_text += text_item
# apply widont at the end, as its already smart about tags. Hopefully.
return filters.widont(rendered_text)
def apply(data):
if not data._content:
return
data._content = typogrify(data._content)
metadata = data.metadata
metadata['title'] = typogrify(metadata['title'])
def register():
signals.content_object_init.connect(apply)