comparison sat/memory/sqlite.py @ 2721:4aaa47f62d8d

core (memory/sqlite): fixed v7 update performance issue: Performance issue was due to bad ordering in table dropping, this has been fixed. "infos" message are also deleted by this update as they are containing only presence data and take a lot of space for a barely useful data. A config option may be available in the future to store presence data in logs.
author Goffi <goffi@goffi.org>
date Tue, 11 Dec 2018 23:53:27 +0100
parents 453a12ff6f51
children 14e1db0c1383
comparison
equal deleted inserted replaced
2720:453a12ff6f51 2721:4aaa47f62d8d
1191 1191
1192 @defer.inlineCallbacks 1192 @defer.inlineCallbacks
1193 def update_v7(self): 1193 def update_v7(self):
1194 """Update database from v6 to v7 (history unique constraint change)""" 1194 """Update database from v6 to v7 (history unique constraint change)"""
1195 log.info(u"Database update to v7, this may take a while") 1195 log.info(u"Database update to v7, this may take a while")
1196
1197 log.info(u"some cleaning first")
1198 # we need to fix duplicate stanza_id, as it can result in conflicts with the new schema
1199 # normally database should not contain any, but better safe than sorry.
1200 rows = yield self.dbpool.runQuery(
1201 u"SELECT stanza_id, COUNT(*) as c FROM history WHERE stanza_id is not NULL "
1202 u"GROUP BY stanza_id HAVING c>1")
1203 if rows:
1204 count = sum([r[1] for r in rows]) - len(rows)
1205 log.info(u"{count} duplicate stanzas found, cleaning".format(count=count))
1206 for stanza_id, count in rows:
1207 log.info(u"cleaning duplicate stanza {stanza_id}".format(stanza_id=stanza_id))
1208 row_uids = yield self.dbpool.runQuery(
1209 "SELECT uid FROM history WHERE stanza_id = ? LIMIT ?",
1210 (stanza_id, count-1))
1211 uids = [r[0] for r in row_uids]
1212 yield self.dbpool.runQuery(
1213 "DELETE FROM history WHERE uid IN ({})".format(u",".join(u"?"*len(uids))),
1214 uids)
1215
1216 log.info(u'Deleting "info" messages (this can take a while, please be patient)')
1217 yield self.dbpool.runQuery(u"DELETE FROM history where type='info'");
1218 log.info(u"Cleaning done")
1219
1196 # we have to rename table we will replace 1220 # we have to rename table we will replace
1197 # tables referencing history need to be replaced to, else reference would 1221 # tables referencing history need to be replaced to, else reference would
1198 # be to the old table (which will be dropped at the end). This buggy behaviour 1222 # be to the old table (which will be dropped at the end). This buggy behaviour
1199 # seems to be fixed in new version of Sqlite 1223 # seems to be fixed in new version of Sqlite
1200 yield self.dbpool.runQuery("ALTER TABLE history RENAME TO history_old") 1224 yield self.dbpool.runQuery("ALTER TABLE history RENAME TO history_old")
1201 yield self.dbpool.runQuery("ALTER TABLE message RENAME TO message_old") 1225 yield self.dbpool.runQuery("ALTER TABLE message RENAME TO message_old")
1202 yield self.dbpool.runQuery("ALTER TABLE subject RENAME TO subject_old") 1226 yield self.dbpool.runQuery("ALTER TABLE subject RENAME TO subject_old")
1203 yield self.dbpool.runQuery("ALTER TABLE thread RENAME TO thread_old") 1227 yield self.dbpool.runQuery("ALTER TABLE thread RENAME TO thread_old")
1204
1205 # we need to fix duplicate stanza_id, as it can result in conflicts with the new schema
1206 # normally database should not contain any, but better safe than sorry.
1207 rows = yield self.dbpool.runQuery(
1208 u"SELECT stanza_id, COUNT(*) as c FROM history_old WHERE stanza_id is not NULL "
1209 u"GROUP BY stanza_id HAVING c>1")
1210 if rows:
1211 count = sum([r[1] for r in rows]) - len(rows)
1212 log.info(u"{count} duplicate stanzas found, cleaning".format(count=count))
1213 for stanza_id, count in rows:
1214 log.info(u"cleaning duplicate stanza {stanza_id}".format(stanza_id=stanza_id))
1215 row_uids = yield self.dbpool.runQuery(
1216 "SELECT uid FROM history_old WHERE stanza_id = ? LIMIT ?",
1217 (stanza_id, count-1))
1218 uids = [r[0] for r in row_uids]
1219 yield self.dbpool.runQuery(
1220 "DELETE FROM history_old WHERE uid IN ({})".format(u",".join(u"?"*len(uids))),
1221 uids)
1222 log.info(u"Cleaning done")
1223 1228
1224 # history 1229 # history
1225 query = (u"CREATE TABLE history (uid TEXT PRIMARY KEY, stanza_id TEXT, " 1230 query = (u"CREATE TABLE history (uid TEXT PRIMARY KEY, stanza_id TEXT, "
1226 u"update_uid TEXT, profile_id INTEGER, source TEXT, dest TEXT, " 1231 u"update_uid TEXT, profile_id INTEGER, source TEXT, dest TEXT, "
1227 u"source_res TEXT, dest_res TEXT, timestamp DATETIME NOT NULL, " 1232 u"source_res TEXT, dest_res TEXT, timestamp DATETIME NOT NULL, "
1247 query = (u"CREATE TABLE thread (id INTEGER PRIMARY KEY ASC, history_uid INTEGER" 1252 query = (u"CREATE TABLE thread (id INTEGER PRIMARY KEY ASC, history_uid INTEGER"
1248 u", thread_id TEXT, parent_id TEXT, FOREIGN KEY(history_uid) REFERENCES " 1253 u", thread_id TEXT, parent_id TEXT, FOREIGN KEY(history_uid) REFERENCES "
1249 u"history(uid) ON DELETE CASCADE)") 1254 u"history(uid) ON DELETE CASCADE)")
1250 yield self.dbpool.runQuery(query) 1255 yield self.dbpool.runQuery(query)
1251 1256
1252 log.info(u"Now transfering old data to new tables, this can be really long " 1257 log.info(u"Now transfering old data to new tables, please be patient.")
1253 u"depending on your history size, please be patient.")
1254 1258
1255 log.info(u"\nTransfering table history") 1259 log.info(u"\nTransfering table history")
1256 query = (u"INSERT INTO history (uid, stanza_id, update_uid, profile_id, source, " 1260 query = (u"INSERT INTO history (uid, stanza_id, update_uid, profile_id, source, "
1257 u"dest, source_res, dest_res, timestamp, received_timestamp, type, extra" 1261 u"dest, source_res, dest_res, timestamp, received_timestamp, type, extra"
1258 u") SELECT uid, stanza_id, update_uid, profile_id, source, dest, " 1262 u") SELECT uid, stanza_id, update_uid, profile_id, source, dest, "
1273 log.info(u"\nTransfering table thread") 1277 log.info(u"\nTransfering table thread")
1274 query = (u"INSERT INTO thread (id, history_uid, thread_id, parent_id) SELECT id" 1278 query = (u"INSERT INTO thread (id, history_uid, thread_id, parent_id) SELECT id"
1275 u", history_uid, thread_id, parent_id FROM thread_old") 1279 u", history_uid, thread_id, parent_id FROM thread_old")
1276 yield self.dbpool.runQuery(query) 1280 yield self.dbpool.runQuery(query)
1277 1281
1282 # because of foreign keys, tables referencing history_old
1283 # must be deleted first
1284 yield self.dbpool.runQuery("DROP TABLE thread_old")
1285 yield self.dbpool.runQuery("DROP TABLE subject_old")
1286 yield self.dbpool.runQuery("DROP TABLE message_old")
1278 yield self.dbpool.runQuery("DROP TABLE history_old") 1287 yield self.dbpool.runQuery("DROP TABLE history_old")
1279 yield self.dbpool.runQuery("DROP TABLE message_old")
1280 yield self.dbpool.runQuery("DROP TABLE subject_old")
1281 yield self.dbpool.runQuery("DROP TABLE thread_old")
1282 log.info(u"Database update done :)") 1288 log.info(u"Database update done :)")
1283 1289
1284 @defer.inlineCallbacks 1290 @defer.inlineCallbacks
1285 def update_v3(self): 1291 def update_v3(self):
1286 """Update database from v2 to v3 (message refactoring)""" 1292 """Update database from v2 to v3 (message refactoring)"""