comparison src/pages/common/blog/page_meta.py @ 1051:d3ac6fb10fd5

pages (common/blog): tranform special characters to their ascii equivalent
author Goffi <goffi@goffi.org>
date Thu, 25 Jan 2018 08:34:27 +0100
parents d4290178662c
children 50ba8947a6e8
comparison
equal deleted inserted replaced
1050:6c98c0baa038 1051:d3ac6fb10fd5
7 from libervia.server import session_iface 7 from libervia.server import session_iface
8 from sat.core.i18n import _ 8 from sat.core.i18n import _
9 from sat.core.log import getLogger 9 from sat.core.log import getLogger
10 from sat.tools.common.template import safe 10 from sat.tools.common.template import safe
11 from libervia.server import utils 11 from libervia.server import utils
12 import unicodedata
12 import re 13 import re
13 import cgi 14 import cgi
14 log = getLogger('pages/common/blog') 15 log = getLogger('pages/common/blog')
15 16
16 """generic blog (with service/node provided)""" 17 """generic blog (with service/node provided)"""
17 name = u'blog' 18 name = u'blog'
18 template = u"blog/articles.html" 19 template = u"blog/articles.html"
19 uri_handlers = {(u'pubsub', u'microblog'): 'microblog_uri'} 20 uri_handlers = {(u'pubsub', u'microblog'): 'microblog_uri'}
20 21
21 RE_TEXT_URL = re.compile(ur'[^a-zA-Zéèêôà,_]+') 22 RE_TEXT_URL = re.compile(ur'[^a-zA-Z,_]+')
22 TEXT_MAX_LEN = 60 23 TEXT_MAX_LEN = 60
23 TEXT_WORD_MIN_LENGHT = 4 24 TEXT_WORD_MIN_LENGHT = 4
24 URL_LIMIT_MARK = 90 # if canonical URL is longer than that, text will not be appended 25 URL_LIMIT_MARK = 90 # if canonical URL is longer than that, text will not be appended
25 26
26 def microblog_uri(self, uri_data): 27 def microblog_uri(self, uri_data):
219 blog_url = blog_canonical_url 220 blog_url = blog_canonical_url
220 else: 221 else:
221 # we add text from title or body at the end of URL 222 # we add text from title or body at the end of URL
222 # to make it more readable 223 # to make it more readable
223 text = item.title or item.content 224 text = item.title or item.content
225 # we change special chars to ascii one, trick found at https://stackoverflow.com/a/3194567
226 text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore')
224 text = RE_TEXT_URL.sub(u' ', text).lower() 227 text = RE_TEXT_URL.sub(u' ', text).lower()
225 text = u'-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT]) 228 text = u'-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT])
226 while len(text) > TEXT_MAX_LEN: 229 while len(text) > TEXT_MAX_LEN:
227 if u'-' in text: 230 if u'-' in text:
228 text = text.rsplit(u'-', 1)[0] 231 text = text.rsplit(u'-', 1)[0]