diff sat/memory/sqlite.py @ 2721:4aaa47f62d8d

core (memory/sqlite): fixed v7 update performance issue: Performance issue was due to bad ordering in table dropping, this has been fixed. "infos" message are also deleted by this update as they are containing only presence data and take a lot of space for a barely useful data. A config option may be available in the future to store presence data in logs.
author Goffi <goffi@goffi.org>
date Tue, 11 Dec 2018 23:53:27 +0100
parents 453a12ff6f51
children 14e1db0c1383
line wrap: on
line diff
--- a/sat/memory/sqlite.py	Mon Dec 10 21:06:20 2018 +0100
+++ b/sat/memory/sqlite.py	Tue Dec 11 23:53:27 2018 +0100
@@ -1193,6 +1193,30 @@
     def update_v7(self):
         """Update database from v6 to v7 (history unique constraint change)"""
         log.info(u"Database update to v7, this may take a while")
+
+        log.info(u"some cleaning first")
+        # we need to fix duplicate stanza_id, as it can result in conflicts with the new schema
+        # normally database should not contain any, but better safe than sorry.
+        rows = yield self.dbpool.runQuery(
+            u"SELECT stanza_id, COUNT(*) as c FROM history WHERE stanza_id is not NULL "
+            u"GROUP BY stanza_id HAVING c>1")
+        if rows:
+            count = sum([r[1] for r in rows]) - len(rows)
+            log.info(u"{count} duplicate stanzas found, cleaning".format(count=count))
+            for stanza_id, count in rows:
+                log.info(u"cleaning duplicate stanza {stanza_id}".format(stanza_id=stanza_id))
+                row_uids = yield self.dbpool.runQuery(
+                    "SELECT uid FROM history WHERE stanza_id = ? LIMIT ?",
+                    (stanza_id, count-1))
+                uids = [r[0] for r in row_uids]
+                yield self.dbpool.runQuery(
+                    "DELETE FROM history WHERE uid IN ({})".format(u",".join(u"?"*len(uids))),
+                    uids)
+
+        log.info(u'Deleting "info" messages (this can take a while, please be patient)')
+        yield self.dbpool.runQuery(u"DELETE FROM history where type='info'");
+        log.info(u"Cleaning done")
+
         # we have to rename table we will replace
         # tables referencing history need to be replaced to, else reference would
         # be to the old table (which will be dropped at the end). This buggy behaviour
@@ -1202,25 +1226,6 @@
         yield self.dbpool.runQuery("ALTER TABLE subject RENAME TO subject_old")
         yield self.dbpool.runQuery("ALTER TABLE thread RENAME TO thread_old")
 
-        # we need to fix duplicate stanza_id, as it can result in conflicts with the new schema
-        # normally database should not contain any, but better safe than sorry.
-        rows = yield self.dbpool.runQuery(
-            u"SELECT stanza_id, COUNT(*) as c FROM history_old WHERE stanza_id is not NULL "
-            u"GROUP BY stanza_id HAVING c>1")
-        if rows:
-            count = sum([r[1] for r in rows]) - len(rows)
-            log.info(u"{count} duplicate stanzas found, cleaning".format(count=count))
-            for stanza_id, count in rows:
-                log.info(u"cleaning duplicate stanza {stanza_id}".format(stanza_id=stanza_id))
-                row_uids = yield self.dbpool.runQuery(
-                    "SELECT uid FROM history_old WHERE stanza_id = ? LIMIT ?",
-                    (stanza_id, count-1))
-                uids = [r[0] for r in row_uids]
-                yield self.dbpool.runQuery(
-                    "DELETE FROM history_old WHERE uid IN ({})".format(u",".join(u"?"*len(uids))),
-                    uids)
-            log.info(u"Cleaning done")
-
         # history
         query = (u"CREATE TABLE history (uid TEXT PRIMARY KEY, stanza_id TEXT, "
                  u"update_uid TEXT, profile_id INTEGER, source TEXT, dest TEXT, "
@@ -1249,8 +1254,7 @@
                  u"history(uid) ON DELETE CASCADE)")
         yield self.dbpool.runQuery(query)
 
-        log.info(u"Now transfering old data to new tables, this can be really long "
-                 u"depending on your history size, please be patient.")
+        log.info(u"Now transfering old data to new tables, please be patient.")
 
         log.info(u"\nTransfering table history")
         query = (u"INSERT INTO history (uid, stanza_id, update_uid, profile_id, source, "
@@ -1275,10 +1279,12 @@
                  u", history_uid, thread_id, parent_id FROM thread_old")
         yield self.dbpool.runQuery(query)
 
-        yield self.dbpool.runQuery("DROP TABLE history_old")
+        # because of foreign keys, tables referencing history_old
+        # must be deleted first
+        yield self.dbpool.runQuery("DROP TABLE thread_old")
+        yield self.dbpool.runQuery("DROP TABLE subject_old")
         yield self.dbpool.runQuery("DROP TABLE message_old")
-        yield self.dbpool.runQuery("DROP TABLE subject_old")
-        yield self.dbpool.runQuery("DROP TABLE thread_old")
+        yield self.dbpool.runQuery("DROP TABLE history_old")
         log.info(u"Database update done :)")
 
     @defer.inlineCallbacks