comparison sat/plugins/plugin_blog_import_dokuwiki.py @ 3028:ab2696e34d29

Python 3 port: /!\ this is a huge commit /!\ starting from this commit, SàT is needs Python 3.6+ /!\ SàT maybe be instable or some feature may not work anymore, this will improve with time This patch port backend, bridge and frontends to Python 3. Roughly this has been done this way: - 2to3 tools has been applied (with python 3.7) - all references to python2 have been replaced with python3 (notably shebangs) - fixed files not handled by 2to3 (notably the shell script) - several manual fixes - fixed issues reported by Python 3 that where not handled in Python 2 - replaced "async" with "async_" when needed (it's a reserved word from Python 3.7) - replaced zope's "implements" with @implementer decorator - temporary hack to handle data pickled in database, as str or bytes may be returned, to be checked later - fixed hash comparison for password - removed some code which is not needed anymore with Python 3 - deactivated some code which needs to be checked (notably certificate validation) - tested with jp, fixed reported issues until some basic commands worked - ported Primitivus (after porting dependencies like urwid satext) - more manual fixes
author Goffi <goffi@goffi.org>
date Tue, 13 Aug 2019 19:08:41 +0200
parents 003b8b4b56a7
children 9d0df638c8b4
comparison
equal deleted inserted replaced
3027:ff5bcb12ae60 3028:ab2696e34d29
1 #!/usr/bin/env python2 1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*- 2 # -*- coding: utf-8 -*-
3 3
4 # SàT plugin to import dokuwiki blogs 4 # SàT plugin to import dokuwiki blogs
5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org) 5 # Copyright (C) 2009-2019 Jérôme Poisson (goffi@goffi.org)
6 # Copyright (C) 2013-2016 Adrien Cossa (souliane@mailoo.org) 6 # Copyright (C) 2013-2016 Adrien Cossa (souliane@mailoo.org)
26 from sat.core import exceptions 26 from sat.core import exceptions
27 from sat.tools import xml_tools 27 from sat.tools import xml_tools
28 from twisted.internet import threads 28 from twisted.internet import threads
29 from collections import OrderedDict 29 from collections import OrderedDict
30 import calendar 30 import calendar
31 import urllib 31 import urllib.request, urllib.parse, urllib.error
32 import urlparse 32 import urllib.parse
33 import tempfile 33 import tempfile
34 import re 34 import re
35 import time 35 import time
36 import os.path 36 import os.path
37 37
38 try: 38 try:
39 from dokuwiki import DokuWiki, DokuWikiError # this is a new dependency 39 from dokuwiki import DokuWiki, DokuWikiError # this is a new dependency
40 except ImportError: 40 except ImportError:
41 raise exceptions.MissingModule( 41 raise exceptions.MissingModule(
42 u'Missing module dokuwiki, please install it with "pip install dokuwiki"' 42 'Missing module dokuwiki, please install it with "pip install dokuwiki"'
43 ) 43 )
44 try: 44 try:
45 from PIL import Image # this is already needed by plugin XEP-0054 45 from PIL import Image # this is already needed by plugin XEP-0054
46 except: 46 except:
47 raise exceptions.MissingModule( 47 raise exceptions.MissingModule(
48 u"Missing module pillow, please download/install it from https://python-pillow.github.io" 48 "Missing module pillow, please download/install it from https://python-pillow.github.io"
49 ) 49 )
50 50
51 PLUGIN_INFO = { 51 PLUGIN_INFO = {
52 C.PI_NAME: "Dokuwiki import", 52 C.PI_NAME: "Dokuwiki import",
53 C.PI_IMPORT_NAME: "IMPORT_DOKUWIKI", 53 C.PI_IMPORT_NAME: "IMPORT_DOKUWIKI",
56 C.PI_MAIN: "DokuwikiImport", 56 C.PI_MAIN: "DokuwikiImport",
57 C.PI_HANDLER: "no", 57 C.PI_HANDLER: "no",
58 C.PI_DESCRIPTION: _("""Blog importer for Dokuwiki blog engine."""), 58 C.PI_DESCRIPTION: _("""Blog importer for Dokuwiki blog engine."""),
59 } 59 }
60 60
61 SHORT_DESC = D_(u"import posts from Dokuwiki blog engine") 61 SHORT_DESC = D_("import posts from Dokuwiki blog engine")
62 62
63 LONG_DESC = D_( 63 LONG_DESC = D_(
64 u"""This importer handle Dokuwiki blog engine. 64 """This importer handle Dokuwiki blog engine.
65 65
66 To use it, you need an admin access to a running Dokuwiki website 66 To use it, you need an admin access to a running Dokuwiki website
67 (local or on the Internet). The importer retrieves the data using 67 (local or on the Internet). The importer retrieves the data using
68 the XMLRPC Dokuwiki API. 68 the XMLRPC Dokuwiki API.
69 69
127 """Return a unique and constant post id 127 """Return a unique and constant post id
128 128
129 @param post(dict): parsed post data 129 @param post(dict): parsed post data
130 @return (unicode): post unique item id 130 @return (unicode): post unique item id
131 """ 131 """
132 return unicode(post["id"]) 132 return str(post["id"])
133 133
134 def getPostUpdated(self, post): 134 def getPostUpdated(self, post):
135 """Return the update date. 135 """Return the update date.
136 136
137 @param post(dict): parsed post data 137 @param post(dict): parsed post data
138 @return (unicode): update date 138 @return (unicode): update date
139 """ 139 """
140 return unicode(post["mtime"]) 140 return str(post["mtime"])
141 141
142 def getPostPublished(self, post): 142 def getPostPublished(self, post):
143 """Try to parse the date from the message ID, else use "mtime". 143 """Try to parse the date from the message ID, else use "mtime".
144 144
145 The date can be extracted if the message ID looks like one of: 145 The date can be extracted if the message ID looks like one of:
146 - namespace:YYMMDD_short_title 146 - namespace:YYMMDD_short_title
147 - namespace:YYYYMMDD_short_title 147 - namespace:YYYYMMDD_short_title
148 @param post (dict): parsed post data 148 @param post (dict): parsed post data
149 @return (unicode): publication date 149 @return (unicode): publication date
150 """ 150 """
151 id_, default = unicode(post["id"]), unicode(post["mtime"]) 151 id_, default = str(post["id"]), str(post["mtime"])
152 try: 152 try:
153 date = id_.split(":")[-1].split("_")[0] 153 date = id_.split(":")[-1].split("_")[0]
154 except KeyError: 154 except KeyError:
155 return default 155 return default
156 try: 156 try:
158 except ValueError: 158 except ValueError:
159 try: 159 try:
160 time_struct = time.strptime(date, "%Y%m%d") 160 time_struct = time.strptime(date, "%Y%m%d")
161 except ValueError: 161 except ValueError:
162 return default 162 return default
163 return unicode(calendar.timegm(time_struct)) 163 return str(calendar.timegm(time_struct))
164 164
165 def processPost(self, post, profile_jid): 165 def processPost(self, post, profile_jid):
166 """Process a single page. 166 """Process a single page.
167 167
168 @param post (dict): parsed post data 168 @param post (dict): parsed post data
233 self.processPost(page, profile_jid) 233 self.processPost(page, profile_jid)
234 count += 1 234 count += 1
235 if count >= self.limit: 235 if count >= self.limit:
236 break 236 break
237 237
238 return (self.posts_data.itervalues(), len(self.posts_data)) 238 return (iter(self.posts_data.values()), len(self.posts_data))
239 239
240 def processContent(self, text, backlinks, profile_jid): 240 def processContent(self, text, backlinks, profile_jid):
241 """Do text substitutions and file copy. 241 """Do text substitutions and file copy.
242 242
243 @param text (unicode): message content 243 @param text (unicode): message content
244 @param backlinks (list[unicode]): list of backlinks 244 @param backlinks (list[unicode]): list of backlinks
245 """ 245 """
246 text = text.strip(u"\ufeff") # this is at the beginning of the file (BOM) 246 text = text.strip("\ufeff") # this is at the beginning of the file (BOM)
247 247
248 for backlink in backlinks: 248 for backlink in backlinks:
249 src = '/doku.php?id=%s"' % backlink 249 src = '/doku.php?id=%s"' % backlink
250 tgt = '/blog/%s/%s" target="#"' % (profile_jid.user, backlink) 250 tgt = '/blog/%s/%s" target="#"' % (profile_jid.user, backlink)
251 text = text.replace(src, tgt) 251 text = text.replace(src, tgt)
259 if re.match(r"^\w*://", link): # absolute URL to link directly 259 if re.match(r"^\w*://", link): # absolute URL to link directly
260 continue 260 continue
261 if self.media_repo: 261 if self.media_repo:
262 self.moveMedia(link, subs) 262 self.moveMedia(link, subs)
263 elif link not in subs: 263 elif link not in subs:
264 subs[link] = urlparse.urljoin(self.url, link) 264 subs[link] = urllib.parse.urljoin(self.url, link)
265 265
266 for url, new_url in subs.iteritems(): 266 for url, new_url in subs.items():
267 text = text.replace(url, new_url) 267 text = text.replace(url, new_url)
268 return text 268 return text
269 269
270 def moveMedia(self, link, subs): 270 def moveMedia(self, link, subs):
271 """Move a media from the DokuWiki host to the new repository. 271 """Move a media from the DokuWiki host to the new repository.
272 272
273 This also updates the hyperlinks to internal media files. 273 This also updates the hyperlinks to internal media files.
274 @param link (unicode): media link 274 @param link (unicode): media link
275 @param subs (dict): substitutions data 275 @param subs (dict): substitutions data
276 """ 276 """
277 url = urlparse.urljoin(self.url, link) 277 url = urllib.parse.urljoin(self.url, link)
278 user_media = re.match(r"(/lib/exe/\w+.php\?)(.*)", link) 278 user_media = re.match(r"(/lib/exe/\w+.php\?)(.*)", link)
279 thumb_width = None 279 thumb_width = None
280 280
281 if user_media: # media that has been added by the user 281 if user_media: # media that has been added by the user
282 params = urlparse.parse_qs(urlparse.urlparse(url).query) 282 params = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
283 try: 283 try:
284 media = params["media"][0] 284 media = params["media"][0]
285 except KeyError: 285 except KeyError:
286 log.warning("No media found in fetch URL: %s" % user_media.group(2)) 286 log.warning("No media found in fetch URL: %s" % user_media.group(2))
287 return 287 return
293 except KeyError: 293 except KeyError:
294 pass 294 pass
295 295
296 filename = media.replace(":", "/") 296 filename = media.replace(":", "/")
297 # XXX: avoid "precondition failed" error (only keep the media parameter) 297 # XXX: avoid "precondition failed" error (only keep the media parameter)
298 url = urlparse.urljoin(self.url, "/lib/exe/fetch.php?media=%s" % media) 298 url = urllib.parse.urljoin(self.url, "/lib/exe/fetch.php?media=%s" % media)
299 299
300 elif link.startswith("/lib/plugins/"): 300 elif link.startswith("/lib/plugins/"):
301 # other link added by a plugin or something else 301 # other link added by a plugin or something else
302 filename = link[13:] 302 filename = link[13:]
303 else: # fake alert... there's no media (or we don't handle it yet) 303 else: # fake alert... there's no media (or we don't handle it yet)
322 """ 322 """
323 dirname = os.path.dirname(dest) 323 dirname = os.path.dirname(dest)
324 if not os.path.exists(dest): 324 if not os.path.exists(dest):
325 if not os.path.exists(dirname): 325 if not os.path.exists(dirname):
326 os.makedirs(dirname) 326 os.makedirs(dirname)
327 urllib.urlretrieve(source, dest) 327 urllib.request.urlretrieve(source, dest)
328 log.debug("DokuWiki media file copied to %s" % dest) 328 log.debug("DokuWiki media file copied to %s" % dest)
329 329
330 def createThumbnail(self, source, dest, width): 330 def createThumbnail(self, source, dest, width):
331 """Create a thumbnail. 331 """Create a thumbnail.
332 332