changeset 1406:cffa3ae4d0aa

pages (blog/view): move URL friendly code to backend tools: - the code to render an URL friendly is now in `sat.tools.common.regex` - user friendly extra text is now only displayed when no `-` is found in ID. This is a temporary transition behaviour because new blog items IDs are now user friendly by default, and thus extra text is not wanted anymore. For older IDs it is still needed though, and the presence of `-` is used to guess when an ID is user friendly or not.
author Goffi <goffi@goffi.org>
date Fri, 16 Apr 2021 18:44:49 +0200
parents 3dff555fe691
children 5a132b85e1ac
files libervia/pages/blog/view/page_meta.py
diffstat 1 files changed, 10 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/libervia/pages/blog/view/page_meta.py	Fri Apr 16 18:40:17 2021 +0200
+++ b/libervia/pages/blog/view/page_meta.py	Fri Apr 16 18:44:49 2021 +0200
@@ -1,7 +1,5 @@
 #!/usr/bin/env python3
 
-import unicodedata
-import re
 import html
 from libervia.server.constants import Const as C
 from twisted.words.protocols.jabber import jid
@@ -9,9 +7,10 @@
 from sat.tools.common.template import safe
 from sat.tools.common import uri
 from sat.tools.common import data_format
+from sat.tools.common import regex
+from sat.core.log import getLogger
 from libervia.server import utils
 from libervia.server.utils import SubPage
-from sat.core.log import getLogger
 
 log = getLogger(__name__)
 
@@ -20,9 +19,6 @@
 template = "blog/articles.html"
 uri_handlers = {('pubsub', 'microblog'): 'microblog_uri'}
 
-RE_TEXT_URL = re.compile(r'[^a-zA-Z,_]+')
-TEXT_MAX_LEN = 60
-TEXT_WORD_MIN_LENGHT = 4
 URL_LIMIT_MARK = 90  # if canonical URL is longer than that, text will not be appended
 
 
@@ -247,23 +243,20 @@
         blog_canonical_url = '/'.join([blog_base_url_item, utils.quote(item['id'])])
         if len(blog_canonical_url) > URL_LIMIT_MARK:
             blog_url = blog_canonical_url
-        else:
+        elif '-' not in item['id']:
             # we add text from title or body at the end of URL
             # to make it more human readable
-            text = item.get('title', item['content'])
-            # we change special chars to ascii one, trick found at https://stackoverflow.com/a/3194567
-            text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode('utf-8')
-            text = RE_TEXT_URL.sub(' ', text).lower()
-            text = '-'.join([t for t in text.split() if t and len(t)>=TEXT_WORD_MIN_LENGHT])
-            while len(text) > TEXT_MAX_LEN:
-                if '-' in text:
-                    text = text.rsplit('-', 1)[0]
-                else:
-                    text = text[:TEXT_MAX_LEN]
+            # we do it only if there is no "-", as a "-" probably means that
+            # item's id is already user friendly.
+            # TODO: to be removed,  this is only kept for a transition period until
+            #   user friendly item IDs are more common.
+            text = regex.urlFriendlyText(item.get('title', item['content']))
             if text:
                 blog_url = blog_canonical_url + '/' + text
             else:
                 blog_url = blog_canonical_url
+        else:
+            blog_url = blog_canonical_url
 
         items_http_uri[item['id']] = self.host.getExtBaseURL(request, blog_url)
         for tag in item['tags']: