libervia-backend: sat/memory/sqlite.py comparison

comparison sat/memory/sqlite.py @ 2787:298408833ec2

memory (sqlite): optimizations indexes were missing on foreign keys for "message", "subject" and "thread" tables, seriously impacting performances. In addition to those indexes, two indexes have been added to speed ordering by timestamp on "history", and one on "files" table. history.rowid is not used anymore as there is an index on (profile_id, received_timestamp) which will speed up the query. Primary keys order has been changed to use automatic index in most common cases (filtering by profile_id then namespace).

author	Goffi <goffi@goffi.org>
date	Sat, 19 Jan 2019 22:49:32 +0100
parents	003b8b4b56a7
children	a425c1ca51d0

comparison

equal deleted inserted replaced

-:be8405795e09
+:298408833ec2
 import cPickle as pickle
 import hashlib
 import sqlite3
 import json
-CURRENT_DB_VERSION = 7
+CURRENT_DB_VERSION = 8
 # XXX: DATABASE schemas are used in the following way:
 #      - 'current' key is for the actual database schema, for a new base
 #      - x(int) is for update needed between x-1 and x. All number are needed between y and z to do an update
 #        e.g.: if CURRENT_DB_VERSION is 6, 'current' is the actuel DB, and to update from version 3, numbers 4, 5 and 6 are needed
 #      a 'current' data dict can contains the keys:
 #      - 'CREATE': it contains an Ordered dict with table to create as keys, and a len 2 tuple as value, where value[0] are the columns definitions and value[1] are the table constraints
 #      - 'INSERT': it contains an Ordered dict with table where values have to be inserted, and many tuples containing values to insert in the order of the rows (#TODO: manage named columns)
+#      - 'INDEX':
 #      an update data dict (the ones with a number) can contains the keys 'create', 'delete', 'cols create', 'cols delete', 'cols modify', 'insert' or 'specific'. See Updater.generateUpdateData for more infos. This method can be used to autogenerate update_data, to ease the work of the developers.
 # TODO: indexes need to be improved
 DATABASE_SCHEMAS = {
 "current": {'CREATE': OrderedDict((
 ("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))),
 ('subject',        (("id INTEGER PRIMARY KEY ASC", "history_uid INTEGER", "subject TEXT", "language TEXT"),
 ("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))),
 ('thread',          (("id INTEGER PRIMARY KEY ASC", "history_uid INTEGER", "thread_id TEXT", "parent_id TEXT"),("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))),
 ('param_gen',       (("category TEXT", "name TEXT", "value TEXT"),
-("PRIMARY KEY (category,name)",))),
+("PRIMARY KEY (category, name)",))),
 ('param_ind',       (("category TEXT", "name TEXT", "profile_id INTEGER", "value TEXT"),
-("PRIMARY KEY (category,name,profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
+("PRIMARY KEY (profile_id, category, name)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
 ('private_gen',     (("namespace TEXT", "key TEXT", "value TEXT"),
 ("PRIMARY KEY (namespace, key)",))),
 ('private_ind',     (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value TEXT"),
-("PRIMARY KEY (namespace, key, profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
+("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
 ('private_gen_bin', (("namespace TEXT", "key TEXT", "value BLOB"),
 ("PRIMARY KEY (namespace, key)",))),
 ('private_ind_bin', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"),
-("PRIMARY KEY (namespace, key, profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
+("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
 ('files',           (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL",
 "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format(
 file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY),
 "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER",
 "namespace TEXT", "mime_type TEXT",
 ("'headline'",),
 ("'normal'",),
 ("'info'",) # info is not standard, but used to keep track of info like join/leave in a MUC
 )),
 )),
+'INDEX': (('history', (('profile_id', 'timestamp'),
+('profile_id', 'received_timestamp'))),
+('message', ('history_uid',)),
+('subject', ('history_uid',)),
+('thread', ('history_uid',)),
+('files', ('profile_id', 'mime_type', 'owner', 'parent'))),
 },
+8:         {'specific': 'update_v8'
+},
 7:         {'specific': 'update_v7'
 },
 6:         {'cols create': {'history': ('stanza_id TEXT',)},
 },
 5:         {'create': {'files': (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL",
 def getNewBaseSql():
 log.info(_("The database is new, creating the tables"))
 database_creation = ["PRAGMA user_version=%d" % CURRENT_DB_VERSION]
 database_creation.extend(Updater.createData2Raw(DATABASE_SCHEMAS['current']['CREATE']))
 database_creation.extend(Updater.insertData2Raw(DATABASE_SCHEMAS['current']['INSERT']))
+database_creation.extend(Updater.indexData2Raw(DATABASE_SCHEMAS['current']['INDEX']))
 return database_creation
 def getUpdateSql():
 updater = Updater(self, sat_version)
 return updater.checkUpdates()
 # while we were offline, using MAM (XEP-0313).
 # It must be set after all other filters, because it contains an ORDER BY
 if (filters[u'last_stanza_id'] is not True
 or limit != 1):
 raise ValueError(u"Unexpected values for last_stanza_id filter")
-query_parts.append(u"AND stanza_id IS NOT NULL ORDER BY history.rowid DESC")
+query_parts.append(u"AND stanza_id IS NOT NULL ORDER BY history.received_timestamp DESC")
 order = True
 if not order:
-# timestamp may be identical for 2 close message (specially when delay is used)
+# timestamp may be identical for 2 close messages (specially when delay is
-# that's why we order ties by rowid (which is in the same order as received_timestamp
+# used) that's why we order ties by received_timestamp
-# but has an index so is quick to order).
 # We'll reverse the order in sqliteHistoryToList
-query_parts.append(u"ORDER BY timestamp DESC, history.rowid DESC")
+query_parts.append(u"ORDER BY timestamp DESC, history.received_timestamp DESC")
 # we use DESC here so LIMIT keep the last messages
 if limit is not None:
 query_parts.append(u"LIMIT ?")
 values.append(limit)
 class Updater(object):
 stmnt_regex = re.compile(r"[\w/' ]+(?:\(.*?\))?[^,]*")
 clean_regex = re.compile(r"^ +|(?<= ) +|(?<=,) +| +$")
 CREATE_SQL = "CREATE TABLE %s (%s)"
 INSERT_SQL = "INSERT INTO %s VALUES (%s)"
+INDEX_SQL = "CREATE INDEX %s ON %s(%s)"
 DROP_SQL = "DROP TABLE %s"
 ALTER_SQL = "ALTER TABLE %s ADD COLUMN %s"
 RENAME_TABLE_SQL = "ALTER TABLE %s RENAME TO %s"
 CONSTRAINTS = ('PRIMARY', 'UNIQUE', 'CHECK', 'FOREIGN')
 def checkUpdates(self):
 """ Check if a database schema/content update is needed, according to DATABASE_SCHEMAS
 @return: deferred which fire a list of SQL update statements, or None if no update is needed
 """
+# TODO: only "table" type (i.e. "CREATE" statements) is checked,
+#       "index" should be checked too.
+#       This may be not relevant is we move to a higher level library (alchimia?)
 local_version = yield self.getLocalVersion()
 raw_local_sch = yield self.getLocalSchema()
 local_sch = self.rawStatements2data(raw_local_sch)
 current_sch = DATABASE_SCHEMAS['current']['CREATE']
 local_hash = self.statementHash(local_sch)
 current_hash = self.statementHash(current_sch)
 # Force the update if the schemas are unchanged but a specific update is needed
 force_update = local_hash == current_hash and local_version < CURRENT_DB_VERSION \
-and 'specific' in DATABASE_SCHEMAS[CURRENT_DB_VERSION]
+and {'index', 'specific'}.intersection(DATABASE_SCHEMAS[CURRENT_DB_VERSION])
 if local_hash == current_hash and not force_update:
 if local_version != CURRENT_DB_VERSION:
 log.warning(_("Your local schema is up-to-date, but database versions mismatch, fixing it..."))
 yield self._setLocalVersion(CURRENT_DB_VERSION)
 for values in values_tuple:
 assert isinstance(values, tuple)
 ret.append(Updater.INSERT_SQL % (table, ', '.join(values)))
 return ret
+@staticmethod
+def indexData2Raw(data):
+""" Generate SQL statements from statements data
+@param data: dictionary with table as key, and statements data in tuples as value
+@return: list of strings with raw statements
+"""
+ret = []
+assert isinstance(data, tuple)
+for table, col_data in data:
+assert isinstance(table, basestring)
+assert isinstance(col_data, tuple)
+for cols in col_data:
+if isinstance(cols, tuple):
+assert all([isinstance(c, basestring) for c in cols])
+indexed_cols = u','.join(cols)
+elif isinstance(cols, basestring):
+indexed_cols = cols
+else:
+raise exceptions.InternalError(u"unexpected index columns value")
+index_name = table + u'__' + indexed_cols.replace(u',', u'_')
+ret.append(Updater.INDEX_SQL % (index_name, table, indexed_cols))
+return ret
 def statementHash(self, data):
 """ Generate hash of template data
 useful to compare schemas
 @param data: dictionary of "CREATE" statement, with tables names as key,
 ret.append(self.DROP_SQL % self.TMP_TABLE)
 insert = update.get('insert', {})
 ret.extend(self.insertData2Raw(insert))
+index = update.get('index', tuple())
+ret.extend(self.indexData2Raw(index))
 specific = update.get('specific', None)
 if specific:
 cmds = yield getattr(self, specific)()
 ret.extend(cmds or [])
 defer.returnValue(ret)
+def update_v8(self):
+"""Update database from v7 to v8 (primary keys order changes + indexes)"""
+log.info(u"Database update to v8")
+statements = ["PRAGMA foreign_keys = OFF"]
+# here is a copy of create and index data, we can't use "current" table
+# because it may change in a future version, which would break the update
+# when doing v8
+create = {
+'param_gen': (
+("category TEXT", "name TEXT", "value TEXT"),
+("PRIMARY KEY (category, name)",)),
+'param_ind': (
+("category TEXT", "name TEXT", "profile_id INTEGER", "value TEXT"),
+("PRIMARY KEY (profile_id, category, name)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE")),
+'private_ind': (
+("namespace TEXT", "key TEXT", "profile_id INTEGER", "value TEXT"),
+("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE")),
+'private_ind_bin': (
+("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"),
+("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE")),
+}
+index = (
+('history', (('profile_id', 'timestamp'),
+('profile_id', 'received_timestamp'))),
+('message', ('history_uid',)),
+('subject', ('history_uid',)),
+('thread', ('history_uid',)),
+('files', ('profile_id', 'mime_type', 'owner', 'parent')))
+for table in ('param_gen', 'param_ind', 'private_ind', 'private_ind_bin'):
+statements.append("ALTER TABLE {0} RENAME TO {0}_old".format(table))
+schema = {table: create[table]}
+cols = [d.split()[0] for d in schema[table][0]]
+statements.extend(Updater.createData2Raw(schema))
+statements.append(u"INSERT INTO {table}({cols}) "
+u"SELECT {cols} FROM {table}_old".format(
+table=table,
+cols=u','.join(cols)))
+statements.append(u"DROP TABLE {}_old".format(table))
+statements.extend(Updater.indexData2Raw(index))
+statements.append("PRAGMA foreign_keys = ON")
+return statements
 @defer.inlineCallbacks
 def update_v7(self):
 """Update database from v6 to v7 (history unique constraint change)"""
 log.info(u"Database update to v7, this may be long depending on your history "

Mercurial > libervia-backend

comparison sat/memory/sqlite.py @ 2787:298408833ec2