Mercurial > libervia-web

--- a/libervia/pages/blog/view/page_meta.py	Fri Apr 16 18:40:17 2021 +0200
+++ b/libervia/pages/blog/view/page_meta.py	Fri Apr 16 18:44:49 2021 +0200
@@ -1,7 +1,5 @@
 #!/usr/bin/env python3

-import unicodedata
-import re
 import html
 from libervia.server.constants import Const as C
 from twisted.words.protocols.jabber import jid
@@ -9,9 +7,10 @@
 from sat.tools.common.template import safe
 from sat.tools.common import uri
 from sat.tools.common import data_format
+from sat.tools.common import regex
+from sat.core.log import getLogger
 from libervia.server import utils
 from libervia.server.utils import SubPage
-from sat.core.log import getLogger

 log = getLogger(__name__)

@@ -20,9 +19,6 @@
 template = "blog/articles.html"
 uri_handlers = {('pubsub', 'microblog'): 'microblog_uri'}

-RE_TEXT_URL = re.compile(r'[^a-zA-Z,_]+')
-TEXT_MAX_LEN = 60
-TEXT_WORD_MIN_LENGHT = 4
 URL_LIMIT_MARK = 90  # if canonical URL is longer than that, text will not be appended


@@ -247,23 +243,20 @@
         blog_canonical_url = '/'.join([blog_base_url_item, utils.quote(item['id'])])
         if len(blog_canonical_url) > URL_LIMIT_MARK:
             blog_url = blog_canonical_url
-        else:
+        elif '-' not in item['id']:
             # we add text from title or body at the end of URL
             # to make it more human readable
-            text = item.get('title', item['content'])
-            # we change special chars to ascii one, trick found at https://stackoverflow.com/a/3194567
-            text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode('utf-8')
-            text = RE_TEXT_URL.sub(' ', text).lower()
-            text = '-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT])
-            while len(text) > TEXT_MAX_LEN:
-                if '-' in text:
-                    text = text.rsplit('-', 1)[0]
-                else:
-                    text = text[:TEXT_MAX_LEN]
+            # we do it only if there is no "-", as a "-" probably means that
+            # item's id is already user friendly.
+            # TODO: to be removed,  this is only kept for a transition period until
+            #   user friendly item IDs are more common.
+            text = regex.urlFriendlyText(item.get('title', item['content']))
             if text:
                 blog_url = blog_canonical_url + '/' + text
             else:
                 blog_url = blog_canonical_url
+        else:
+            blog_url = blog_canonical_url

         items_http_uri[item['id']] = self.host.getExtBaseURL(request, blog_url)
         for tag in item['tags']: