comparison sat/memory/sqlite.py @ 2787:298408833ec2

memory (sqlite): optimizations indexes were missing on foreign keys for "message", "subject" and "thread" tables, seriously impacting performances. In addition to those indexes, two indexes have been added to speed ordering by timestamp on "history", and one on "files" table. history.rowid is not used anymore as there is an index on (profile_id, received_timestamp) which will speed up the query. Primary keys order has been changed to use automatic index in most common cases (filtering by profile_id then namespace).
author Goffi <goffi@goffi.org>
date Sat, 19 Jan 2019 22:49:32 +0100
parents 003b8b4b56a7
children a425c1ca51d0
comparison
equal deleted inserted replaced
2786:be8405795e09 2787:298408833ec2
34 import cPickle as pickle 34 import cPickle as pickle
35 import hashlib 35 import hashlib
36 import sqlite3 36 import sqlite3
37 import json 37 import json
38 38
39 CURRENT_DB_VERSION = 7 39 CURRENT_DB_VERSION = 8
40 40
41 # XXX: DATABASE schemas are used in the following way: 41 # XXX: DATABASE schemas are used in the following way:
42 # - 'current' key is for the actual database schema, for a new base 42 # - 'current' key is for the actual database schema, for a new base
43 # - x(int) is for update needed between x-1 and x. All number are needed between y and z to do an update 43 # - x(int) is for update needed between x-1 and x. All number are needed between y and z to do an update
44 # e.g.: if CURRENT_DB_VERSION is 6, 'current' is the actuel DB, and to update from version 3, numbers 4, 5 and 6 are needed 44 # e.g.: if CURRENT_DB_VERSION is 6, 'current' is the actuel DB, and to update from version 3, numbers 4, 5 and 6 are needed
45 # a 'current' data dict can contains the keys: 45 # a 'current' data dict can contains the keys:
46 # - 'CREATE': it contains an Ordered dict with table to create as keys, and a len 2 tuple as value, where value[0] are the columns definitions and value[1] are the table constraints 46 # - 'CREATE': it contains an Ordered dict with table to create as keys, and a len 2 tuple as value, where value[0] are the columns definitions and value[1] are the table constraints
47 # - 'INSERT': it contains an Ordered dict with table where values have to be inserted, and many tuples containing values to insert in the order of the rows (#TODO: manage named columns) 47 # - 'INSERT': it contains an Ordered dict with table where values have to be inserted, and many tuples containing values to insert in the order of the rows (#TODO: manage named columns)
48 # - 'INDEX':
48 # an update data dict (the ones with a number) can contains the keys 'create', 'delete', 'cols create', 'cols delete', 'cols modify', 'insert' or 'specific'. See Updater.generateUpdateData for more infos. This method can be used to autogenerate update_data, to ease the work of the developers. 49 # an update data dict (the ones with a number) can contains the keys 'create', 'delete', 'cols create', 'cols delete', 'cols modify', 'insert' or 'specific'. See Updater.generateUpdateData for more infos. This method can be used to autogenerate update_data, to ease the work of the developers.
49 # TODO: indexes need to be improved 50 # TODO: indexes need to be improved
50 51
51 DATABASE_SCHEMAS = { 52 DATABASE_SCHEMAS = {
52 "current": {'CREATE': OrderedDict(( 53 "current": {'CREATE': OrderedDict((
66 ("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))), 67 ("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))),
67 ('subject', (("id INTEGER PRIMARY KEY ASC", "history_uid INTEGER", "subject TEXT", "language TEXT"), 68 ('subject', (("id INTEGER PRIMARY KEY ASC", "history_uid INTEGER", "subject TEXT", "language TEXT"),
68 ("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))), 69 ("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))),
69 ('thread', (("id INTEGER PRIMARY KEY ASC", "history_uid INTEGER", "thread_id TEXT", "parent_id TEXT"),("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))), 70 ('thread', (("id INTEGER PRIMARY KEY ASC", "history_uid INTEGER", "thread_id TEXT", "parent_id TEXT"),("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))),
70 ('param_gen', (("category TEXT", "name TEXT", "value TEXT"), 71 ('param_gen', (("category TEXT", "name TEXT", "value TEXT"),
71 ("PRIMARY KEY (category,name)",))), 72 ("PRIMARY KEY (category, name)",))),
72 ('param_ind', (("category TEXT", "name TEXT", "profile_id INTEGER", "value TEXT"), 73 ('param_ind', (("category TEXT", "name TEXT", "profile_id INTEGER", "value TEXT"),
73 ("PRIMARY KEY (category,name,profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), 74 ("PRIMARY KEY (profile_id, category, name)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
74 ('private_gen', (("namespace TEXT", "key TEXT", "value TEXT"), 75 ('private_gen', (("namespace TEXT", "key TEXT", "value TEXT"),
75 ("PRIMARY KEY (namespace, key)",))), 76 ("PRIMARY KEY (namespace, key)",))),
76 ('private_ind', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value TEXT"), 77 ('private_ind', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value TEXT"),
77 ("PRIMARY KEY (namespace, key, profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), 78 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
78 ('private_gen_bin', (("namespace TEXT", "key TEXT", "value BLOB"), 79 ('private_gen_bin', (("namespace TEXT", "key TEXT", "value BLOB"),
79 ("PRIMARY KEY (namespace, key)",))), 80 ("PRIMARY KEY (namespace, key)",))),
80 ('private_ind_bin', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"), 81 ('private_ind_bin', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"),
81 ("PRIMARY KEY (namespace, key, profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), 82 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
82 ('files', (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL", 83 ('files', (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL",
83 "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format( 84 "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format(
84 file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY), 85 file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY),
85 "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER", 86 "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER",
86 "namespace TEXT", "mime_type TEXT", 87 "namespace TEXT", "mime_type TEXT",
95 ("'headline'",), 96 ("'headline'",),
96 ("'normal'",), 97 ("'normal'",),
97 ("'info'",) # info is not standard, but used to keep track of info like join/leave in a MUC 98 ("'info'",) # info is not standard, but used to keep track of info like join/leave in a MUC
98 )), 99 )),
99 )), 100 )),
101 'INDEX': (('history', (('profile_id', 'timestamp'),
102 ('profile_id', 'received_timestamp'))),
103 ('message', ('history_uid',)),
104 ('subject', ('history_uid',)),
105 ('thread', ('history_uid',)),
106 ('files', ('profile_id', 'mime_type', 'owner', 'parent'))),
100 }, 107 },
108 8: {'specific': 'update_v8'
109 },
101 7: {'specific': 'update_v7' 110 7: {'specific': 'update_v7'
102 }, 111 },
103 6: {'cols create': {'history': ('stanza_id TEXT',)}, 112 6: {'cols create': {'history': ('stanza_id TEXT',)},
104 }, 113 },
105 5: {'create': {'files': (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL", 114 5: {'create': {'files': (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL",
199 def getNewBaseSql(): 208 def getNewBaseSql():
200 log.info(_("The database is new, creating the tables")) 209 log.info(_("The database is new, creating the tables"))
201 database_creation = ["PRAGMA user_version=%d" % CURRENT_DB_VERSION] 210 database_creation = ["PRAGMA user_version=%d" % CURRENT_DB_VERSION]
202 database_creation.extend(Updater.createData2Raw(DATABASE_SCHEMAS['current']['CREATE'])) 211 database_creation.extend(Updater.createData2Raw(DATABASE_SCHEMAS['current']['CREATE']))
203 database_creation.extend(Updater.insertData2Raw(DATABASE_SCHEMAS['current']['INSERT'])) 212 database_creation.extend(Updater.insertData2Raw(DATABASE_SCHEMAS['current']['INSERT']))
213 database_creation.extend(Updater.indexData2Raw(DATABASE_SCHEMAS['current']['INDEX']))
204 return database_creation 214 return database_creation
205 215
206 def getUpdateSql(): 216 def getUpdateSql():
207 updater = Updater(self, sat_version) 217 updater = Updater(self, sat_version)
208 return updater.checkUpdates() 218 return updater.checkUpdates()
601 # while we were offline, using MAM (XEP-0313). 611 # while we were offline, using MAM (XEP-0313).
602 # It must be set after all other filters, because it contains an ORDER BY 612 # It must be set after all other filters, because it contains an ORDER BY
603 if (filters[u'last_stanza_id'] is not True 613 if (filters[u'last_stanza_id'] is not True
604 or limit != 1): 614 or limit != 1):
605 raise ValueError(u"Unexpected values for last_stanza_id filter") 615 raise ValueError(u"Unexpected values for last_stanza_id filter")
606 query_parts.append(u"AND stanza_id IS NOT NULL ORDER BY history.rowid DESC") 616 query_parts.append(u"AND stanza_id IS NOT NULL ORDER BY history.received_timestamp DESC")
607 order = True 617 order = True
608 618
609 619
610 if not order: 620 if not order:
611 # timestamp may be identical for 2 close message (specially when delay is used) 621 # timestamp may be identical for 2 close messages (specially when delay is
612 # that's why we order ties by rowid (which is in the same order as received_timestamp 622 # used) that's why we order ties by received_timestamp
613 # but has an index so is quick to order).
614
615 # We'll reverse the order in sqliteHistoryToList 623 # We'll reverse the order in sqliteHistoryToList
616 query_parts.append(u"ORDER BY timestamp DESC, history.rowid DESC") 624 query_parts.append(u"ORDER BY timestamp DESC, history.received_timestamp DESC")
617 # we use DESC here so LIMIT keep the last messages 625 # we use DESC here so LIMIT keep the last messages
618 if limit is not None: 626 if limit is not None:
619 query_parts.append(u"LIMIT ?") 627 query_parts.append(u"LIMIT ?")
620 values.append(limit) 628 values.append(limit)
621 629
937 class Updater(object): 945 class Updater(object):
938 stmnt_regex = re.compile(r"[\w/' ]+(?:\(.*?\))?[^,]*") 946 stmnt_regex = re.compile(r"[\w/' ]+(?:\(.*?\))?[^,]*")
939 clean_regex = re.compile(r"^ +|(?<= ) +|(?<=,) +| +$") 947 clean_regex = re.compile(r"^ +|(?<= ) +|(?<=,) +| +$")
940 CREATE_SQL = "CREATE TABLE %s (%s)" 948 CREATE_SQL = "CREATE TABLE %s (%s)"
941 INSERT_SQL = "INSERT INTO %s VALUES (%s)" 949 INSERT_SQL = "INSERT INTO %s VALUES (%s)"
950 INDEX_SQL = "CREATE INDEX %s ON %s(%s)"
942 DROP_SQL = "DROP TABLE %s" 951 DROP_SQL = "DROP TABLE %s"
943 ALTER_SQL = "ALTER TABLE %s ADD COLUMN %s" 952 ALTER_SQL = "ALTER TABLE %s ADD COLUMN %s"
944 RENAME_TABLE_SQL = "ALTER TABLE %s RENAME TO %s" 953 RENAME_TABLE_SQL = "ALTER TABLE %s RENAME TO %s"
945 954
946 CONSTRAINTS = ('PRIMARY', 'UNIQUE', 'CHECK', 'FOREIGN') 955 CONSTRAINTS = ('PRIMARY', 'UNIQUE', 'CHECK', 'FOREIGN')
982 def checkUpdates(self): 991 def checkUpdates(self):
983 """ Check if a database schema/content update is needed, according to DATABASE_SCHEMAS 992 """ Check if a database schema/content update is needed, according to DATABASE_SCHEMAS
984 993
985 @return: deferred which fire a list of SQL update statements, or None if no update is needed 994 @return: deferred which fire a list of SQL update statements, or None if no update is needed
986 """ 995 """
996 # TODO: only "table" type (i.e. "CREATE" statements) is checked,
997 # "index" should be checked too.
998 # This may be not relevant is we move to a higher level library (alchimia?)
987 local_version = yield self.getLocalVersion() 999 local_version = yield self.getLocalVersion()
988 raw_local_sch = yield self.getLocalSchema() 1000 raw_local_sch = yield self.getLocalSchema()
989 1001
990 local_sch = self.rawStatements2data(raw_local_sch) 1002 local_sch = self.rawStatements2data(raw_local_sch)
991 current_sch = DATABASE_SCHEMAS['current']['CREATE'] 1003 current_sch = DATABASE_SCHEMAS['current']['CREATE']
992 local_hash = self.statementHash(local_sch) 1004 local_hash = self.statementHash(local_sch)
993 current_hash = self.statementHash(current_sch) 1005 current_hash = self.statementHash(current_sch)
994 1006
995 # Force the update if the schemas are unchanged but a specific update is needed 1007 # Force the update if the schemas are unchanged but a specific update is needed
996 force_update = local_hash == current_hash and local_version < CURRENT_DB_VERSION \ 1008 force_update = local_hash == current_hash and local_version < CURRENT_DB_VERSION \
997 and 'specific' in DATABASE_SCHEMAS[CURRENT_DB_VERSION] 1009 and {'index', 'specific'}.intersection(DATABASE_SCHEMAS[CURRENT_DB_VERSION])
998 1010
999 if local_hash == current_hash and not force_update: 1011 if local_hash == current_hash and not force_update:
1000 if local_version != CURRENT_DB_VERSION: 1012 if local_version != CURRENT_DB_VERSION:
1001 log.warning(_("Your local schema is up-to-date, but database versions mismatch, fixing it...")) 1013 log.warning(_("Your local schema is up-to-date, but database versions mismatch, fixing it..."))
1002 yield self._setLocalVersion(CURRENT_DB_VERSION) 1014 yield self._setLocalVersion(CURRENT_DB_VERSION)
1071 for values in values_tuple: 1083 for values in values_tuple:
1072 assert isinstance(values, tuple) 1084 assert isinstance(values, tuple)
1073 ret.append(Updater.INSERT_SQL % (table, ', '.join(values))) 1085 ret.append(Updater.INSERT_SQL % (table, ', '.join(values)))
1074 return ret 1086 return ret
1075 1087
1088 @staticmethod
1089 def indexData2Raw(data):
1090 """ Generate SQL statements from statements data
1091
1092 @param data: dictionary with table as key, and statements data in tuples as value
1093 @return: list of strings with raw statements
1094 """
1095 ret = []
1096 assert isinstance(data, tuple)
1097 for table, col_data in data:
1098 assert isinstance(table, basestring)
1099 assert isinstance(col_data, tuple)
1100 for cols in col_data:
1101 if isinstance(cols, tuple):
1102 assert all([isinstance(c, basestring) for c in cols])
1103 indexed_cols = u','.join(cols)
1104 elif isinstance(cols, basestring):
1105 indexed_cols = cols
1106 else:
1107 raise exceptions.InternalError(u"unexpected index columns value")
1108 index_name = table + u'__' + indexed_cols.replace(u',', u'_')
1109 ret.append(Updater.INDEX_SQL % (index_name, table, indexed_cols))
1110 return ret
1111
1076 def statementHash(self, data): 1112 def statementHash(self, data):
1077 """ Generate hash of template data 1113 """ Generate hash of template data
1078 1114
1079 useful to compare schemas 1115 useful to compare schemas
1080 @param data: dictionary of "CREATE" statement, with tables names as key, 1116 @param data: dictionary of "CREATE" statement, with tables names as key,
1220 ret.append(self.DROP_SQL % self.TMP_TABLE) 1256 ret.append(self.DROP_SQL % self.TMP_TABLE)
1221 1257
1222 insert = update.get('insert', {}) 1258 insert = update.get('insert', {})
1223 ret.extend(self.insertData2Raw(insert)) 1259 ret.extend(self.insertData2Raw(insert))
1224 1260
1261 index = update.get('index', tuple())
1262 ret.extend(self.indexData2Raw(index))
1263
1225 specific = update.get('specific', None) 1264 specific = update.get('specific', None)
1226 if specific: 1265 if specific:
1227 cmds = yield getattr(self, specific)() 1266 cmds = yield getattr(self, specific)()
1228 ret.extend(cmds or []) 1267 ret.extend(cmds or [])
1229 defer.returnValue(ret) 1268 defer.returnValue(ret)
1269
1270 def update_v8(self):
1271 """Update database from v7 to v8 (primary keys order changes + indexes)"""
1272 log.info(u"Database update to v8")
1273 statements = ["PRAGMA foreign_keys = OFF"]
1274
1275 # here is a copy of create and index data, we can't use "current" table
1276 # because it may change in a future version, which would break the update
1277 # when doing v8
1278 create = {
1279 'param_gen': (
1280 ("category TEXT", "name TEXT", "value TEXT"),
1281 ("PRIMARY KEY (category, name)",)),
1282 'param_ind': (
1283 ("category TEXT", "name TEXT", "profile_id INTEGER", "value TEXT"),
1284 ("PRIMARY KEY (profile_id, category, name)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE")),
1285 'private_ind': (
1286 ("namespace TEXT", "key TEXT", "profile_id INTEGER", "value TEXT"),
1287 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE")),
1288 'private_ind_bin': (
1289 ("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"),
1290 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE")),
1291 }
1292 index = (
1293 ('history', (('profile_id', 'timestamp'),
1294 ('profile_id', 'received_timestamp'))),
1295 ('message', ('history_uid',)),
1296 ('subject', ('history_uid',)),
1297 ('thread', ('history_uid',)),
1298 ('files', ('profile_id', 'mime_type', 'owner', 'parent')))
1299
1300 for table in ('param_gen', 'param_ind', 'private_ind', 'private_ind_bin'):
1301 statements.append("ALTER TABLE {0} RENAME TO {0}_old".format(table))
1302 schema = {table: create[table]}
1303 cols = [d.split()[0] for d in schema[table][0]]
1304 statements.extend(Updater.createData2Raw(schema))
1305 statements.append(u"INSERT INTO {table}({cols}) "
1306 u"SELECT {cols} FROM {table}_old".format(
1307 table=table,
1308 cols=u','.join(cols)))
1309 statements.append(u"DROP TABLE {}_old".format(table))
1310
1311 statements.extend(Updater.indexData2Raw(index))
1312 statements.append("PRAGMA foreign_keys = ON")
1313 return statements
1230 1314
1231 @defer.inlineCallbacks 1315 @defer.inlineCallbacks
1232 def update_v7(self): 1316 def update_v7(self):
1233 """Update database from v6 to v7 (history unique constraint change)""" 1317 """Update database from v6 to v7 (history unique constraint change)"""
1234 log.info(u"Database update to v7, this may be long depending on your history " 1318 log.info(u"Database update to v7, this may be long depending on your history "