comparison src/plugins/plugin_blog_import_dokuwiki.py @ 1842:9fd517248dc8

plugin blog_import_dokuwiki: refactor to make it look more similar to blog_import_dotclear
author souliane <souliane@mailoo.org>
date Thu, 04 Feb 2016 17:36:22 +0100
parents 7717975b3ec3
children a51355982f11
comparison
equal deleted inserted replaced
1841:7717975b3ec3 1842:9fd517248dc8
117 self.temp_dir = None 117 self.temp_dir = None
118 if self.media_repo: 118 if self.media_repo:
119 self.temp_dir = tempfile.mkdtemp() 119 self.temp_dir = tempfile.mkdtemp()
120 self.info_msg = _("DokuWiki media files will be downloaded to %s - to finish the import you will need to upload them to %s" % (self.temp_dir, self.media_repo)) 120 self.info_msg = _("DokuWiki media files will be downloaded to %s - to finish the import you will need to upload them to %s" % (self.temp_dir, self.media_repo))
121 else: 121 else:
122 self.info_msg = _("DokuWiki media files will stay on %s - some of them may be protected by DokuWiki ACL and will not be accessible from XMPP." % url) 122 self.info_msg = _("DokuWiki media files will stay on %s - some of them may be protected by DokuWiki ACL and will not be accessible." % url)
123 self.limit = limit 123 self.limit = limit
124 self.posts_data = OrderedDict() 124 self.posts_data = OrderedDict()
125 125
126 def process(self, client, namespace=DEFAULT_NAMESPACE): 126 def getPostId(self, post):
127 """Process a namespace or a single page. 127 """Return a unique and constant post id
128 128
129 @param namespace (unicode): DokuWiki namespace (or page) to import 129 @param post(dict): parsed post data
130 """ 130 @return (unicode): post unique item id
131 profile_jid = client.jid 131 """
132 log.info("Importing data from DokuWiki %s" % self.version) 132 return unicode(post['id'])
133 try: 133
134 pages_list = self.pages.list(namespace) 134 def getPostUpdated(self, post):
135 except DokuWikiError: 135 """Return the update date.
136 log.warning('Could not list Dokuwiki pages: please turn the "display_errors" setting to "Off" in the php.ini of the webserver hosting DokuWiki.') 136
137 return 137 @param post(dict): parsed post data
138 138 @return (unicode): update date
139 if not pages_list: # namespace is actually a page? 139 """
140 names = namespace.split(":") 140 return unicode(post['mtime'])
141 real_namespace = ":".join(names[0:-1]) 141
142 pages_list = self.pages.list(real_namespace) 142 def getPostPublished(self, post):
143 pages_list = [page for page in pages_list if page["id"] == namespace]
144 namespace = real_namespace
145
146 count = 0
147 for page in pages_list:
148
149 # get main information
150 id_, updated, published = unicode(page['id']), unicode(page['mtime']), self.getOriginalDate(page)
151
152 # manage links
153 backlinks = self.pages.backlinks(id_)
154 for link in self.pages.links(id_):
155 if link["type"] != "extern":
156 assert link["type"] == "local"
157 page = link["page"]
158 backlinks.append(page[1:] if page.startswith(":") else page)
159
160 content = self.pages.get(id_)
161 content_xhtml = self.processContent(self.pages.html(id_), backlinks, profile_jid)
162
163 # XXX: title is already in content_xhtml and difficult to remove, so leave it
164 # title = content.split("\n")[0].strip(u"\ufeff= ")
165
166 # build the extra data dictionary
167 mb_data = {"id": id_,
168 "published": published,
169 "updated": updated,
170 "author": profile_jid.user,
171 # "content": content, # when passed, it is displayed in Libervia instead of content_xhtml
172 "content_xhtml": content_xhtml,
173 # "title": title,
174 "allow_comments": "true",
175 }
176
177 # find out if the message access is public or restricted
178 namespace = id_.split(":")[0]
179 if namespace and namespace.lower() not in ("public", "/"):
180 mb_data["group"] = namespace # roster group must exist
181
182 self.posts_data[id_] = {'blog': mb_data, 'comments':[[]]}
183
184 count += 1
185 if count >= self.limit :
186 break
187
188 return (self.posts_data.itervalues(), len(self.posts_data))
189
190 def getOriginalDate(self, page):
191 """Try to parse the date from the message ID, else use "mtime". 143 """Try to parse the date from the message ID, else use "mtime".
192 144
193 The date can be extracted if the message ID looks like one of: 145 The date can be extracted if the message ID looks like one of:
194 - namespace:YYMMDD_short_title 146 - namespace:YYMMDD_short_title
195 - namespace:YYYYMMDD_short_title 147 - namespace:YYYYMMDD_short_title
196 @param page (dict): message page 148 @param post (dict): parsed post data
197 @return unicode 149 @return (unicode): publication date
198 """ 150 """
199 id_, default = unicode(page["id"]), unicode(page["mtime"]) 151 id_, default = unicode(post["id"]), unicode(post["mtime"])
200 try: 152 try:
201 date = id_.split(":")[-1].split("_")[0] 153 date = id_.split(":")[-1].split("_")[0]
202 except KeyError: 154 except KeyError:
203 return default 155 return default
204 try: 156 try:
208 time_struct = time.strptime(date, "%Y%m%d") 160 time_struct = time.strptime(date, "%Y%m%d")
209 except ValueError: 161 except ValueError:
210 return default 162 return default
211 return unicode(calendar.timegm(time_struct)) 163 return unicode(calendar.timegm(time_struct))
212 164
165 def processPost(self, post, profile_jid):
166 """Process a single page.
167
168 @param post (dict): parsed post data
169 @param profile_jid
170 """
171 # get main information
172 id_ = self.getPostId(post)
173 updated = self.getPostUpdated(post)
174 published = self.getPostPublished(post)
175
176 # manage links
177 backlinks = self.pages.backlinks(id_)
178 for link in self.pages.links(id_):
179 if link["type"] != "extern":
180 assert link["type"] == "local"
181 page = link["page"]
182 backlinks.append(page[1:] if page.startswith(":") else page)
183
184 content = self.pages.get(id_)
185 content_xhtml = self.processContent(self.pages.html(id_), backlinks, profile_jid)
186
187 # XXX: title is already in content_xhtml and difficult to remove, so leave it
188 # title = content.split("\n")[0].strip(u"\ufeff= ")
189
190 # build the extra data dictionary
191 mb_data = {"id": id_,
192 "published": published,
193 "updated": updated,
194 "author": profile_jid.user,
195 # "content": content, # when passed, it is displayed in Libervia instead of content_xhtml
196 "content_xhtml": content_xhtml,
197 # "title": title,
198 "allow_comments": "true",
199 }
200
201 # find out if the message access is public or restricted
202 namespace = id_.split(":")[0]
203 if namespace and namespace.lower() not in ("public", "/"):
204 mb_data["group"] = namespace # roster group must exist
205
206 self.posts_data[id_] = {'blog': mb_data, 'comments':[[]]}
207
208 def process(self, client, namespace=DEFAULT_NAMESPACE):
209 """Process a namespace or a single page.
210
211 @param namespace (unicode): DokuWiki namespace (or page) to import
212 """
213 profile_jid = client.jid
214 log.info("Importing data from DokuWiki %s" % self.version)
215 try:
216 pages_list = self.pages.list(namespace)
217 except DokuWikiError:
218 log.warning('Could not list Dokuwiki pages: please turn the "display_errors" setting to "Off" in the php.ini of the webserver hosting DokuWiki.')
219 return
220
221 if not pages_list: # namespace is actually a page?
222 names = namespace.split(":")
223 real_namespace = ":".join(names[0:-1])
224 pages_list = self.pages.list(real_namespace)
225 pages_list = [page for page in pages_list if page["id"] == namespace]
226 namespace = real_namespace
227
228 count = 0
229 for page in pages_list:
230 self.processPost(page, profile_jid)
231 count += 1
232 if count >= self.limit :
233 break
234
235 return (self.posts_data.itervalues(), len(self.posts_data))
213 236
214 def processContent(self, text, backlinks, profile_jid): 237 def processContent(self, text, backlinks, profile_jid):
215 """Do text substitutions and file copy. 238 """Do text substitutions and file copy.
216 239
217 @param text (unicode): message content 240 @param text (unicode): message content