# HG changeset patch # User Goffi # Date 1516865667 -3600 # Node ID d3ac6fb10fd51d6c9284ed8820a14d5d5612db37 # Parent 6c98c0baa0386c97b679e566d12c2daa32ef5a3b pages (common/blog): tranform special characters to their ascii equivalent diff -r 6c98c0baa038 -r d3ac6fb10fd5 src/pages/common/blog/page_meta.py --- a/src/pages/common/blog/page_meta.py Thu Jan 25 08:17:29 2018 +0100 +++ b/src/pages/common/blog/page_meta.py Thu Jan 25 08:34:27 2018 +0100 @@ -9,6 +9,7 @@ from sat.core.log import getLogger from sat.tools.common.template import safe from libervia.server import utils +import unicodedata import re import cgi log = getLogger('pages/common/blog') @@ -18,7 +19,7 @@ template = u"blog/articles.html" uri_handlers = {(u'pubsub', u'microblog'): 'microblog_uri'} -RE_TEXT_URL = re.compile(ur'[^a-zA-Zéèêôà,_]+') +RE_TEXT_URL = re.compile(ur'[^a-zA-Z,_]+') TEXT_MAX_LEN = 60 TEXT_WORD_MIN_LENGHT = 4 URL_LIMIT_MARK = 90 # if canonical URL is longer than that, text will not be appended @@ -221,6 +222,8 @@ # we add text from title or body at the end of URL # to make it more readable text = item.title or item.content + # we change special chars to ascii one, trick found at https://stackoverflow.com/a/3194567 + text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore') text = RE_TEXT_URL.sub(u' ', text).lower() text = u'-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT]) while len(text) > TEXT_MAX_LEN: