Mercurial > libervia-backend
comparison sat/memory/sqlite.py @ 2787:298408833ec2
memory (sqlite): optimizations
indexes were missing on foreign keys for "message", "subject" and "thread" tables, seriously impacting performances.
In addition to those indexes, two indexes have been added to speed ordering by timestamp on "history", and one on "files" table.
history.rowid is not used anymore as there is an index on (profile_id, received_timestamp) which will speed up the query.
Primary keys order has been changed to use automatic index in most common cases (filtering by profile_id then namespace).
author | Goffi <goffi@goffi.org> |
---|---|
date | Sat, 19 Jan 2019 22:49:32 +0100 |
parents | 003b8b4b56a7 |
children | a425c1ca51d0 |
comparison
equal
deleted
inserted
replaced
2786:be8405795e09 | 2787:298408833ec2 |
---|---|
34 import cPickle as pickle | 34 import cPickle as pickle |
35 import hashlib | 35 import hashlib |
36 import sqlite3 | 36 import sqlite3 |
37 import json | 37 import json |
38 | 38 |
39 CURRENT_DB_VERSION = 7 | 39 CURRENT_DB_VERSION = 8 |
40 | 40 |
41 # XXX: DATABASE schemas are used in the following way: | 41 # XXX: DATABASE schemas are used in the following way: |
42 # - 'current' key is for the actual database schema, for a new base | 42 # - 'current' key is for the actual database schema, for a new base |
43 # - x(int) is for update needed between x-1 and x. All number are needed between y and z to do an update | 43 # - x(int) is for update needed between x-1 and x. All number are needed between y and z to do an update |
44 # e.g.: if CURRENT_DB_VERSION is 6, 'current' is the actuel DB, and to update from version 3, numbers 4, 5 and 6 are needed | 44 # e.g.: if CURRENT_DB_VERSION is 6, 'current' is the actuel DB, and to update from version 3, numbers 4, 5 and 6 are needed |
45 # a 'current' data dict can contains the keys: | 45 # a 'current' data dict can contains the keys: |
46 # - 'CREATE': it contains an Ordered dict with table to create as keys, and a len 2 tuple as value, where value[0] are the columns definitions and value[1] are the table constraints | 46 # - 'CREATE': it contains an Ordered dict with table to create as keys, and a len 2 tuple as value, where value[0] are the columns definitions and value[1] are the table constraints |
47 # - 'INSERT': it contains an Ordered dict with table where values have to be inserted, and many tuples containing values to insert in the order of the rows (#TODO: manage named columns) | 47 # - 'INSERT': it contains an Ordered dict with table where values have to be inserted, and many tuples containing values to insert in the order of the rows (#TODO: manage named columns) |
48 # - 'INDEX': | |
48 # an update data dict (the ones with a number) can contains the keys 'create', 'delete', 'cols create', 'cols delete', 'cols modify', 'insert' or 'specific'. See Updater.generateUpdateData for more infos. This method can be used to autogenerate update_data, to ease the work of the developers. | 49 # an update data dict (the ones with a number) can contains the keys 'create', 'delete', 'cols create', 'cols delete', 'cols modify', 'insert' or 'specific'. See Updater.generateUpdateData for more infos. This method can be used to autogenerate update_data, to ease the work of the developers. |
49 # TODO: indexes need to be improved | 50 # TODO: indexes need to be improved |
50 | 51 |
51 DATABASE_SCHEMAS = { | 52 DATABASE_SCHEMAS = { |
52 "current": {'CREATE': OrderedDict(( | 53 "current": {'CREATE': OrderedDict(( |
66 ("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))), | 67 ("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))), |
67 ('subject', (("id INTEGER PRIMARY KEY ASC", "history_uid INTEGER", "subject TEXT", "language TEXT"), | 68 ('subject', (("id INTEGER PRIMARY KEY ASC", "history_uid INTEGER", "subject TEXT", "language TEXT"), |
68 ("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))), | 69 ("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))), |
69 ('thread', (("id INTEGER PRIMARY KEY ASC", "history_uid INTEGER", "thread_id TEXT", "parent_id TEXT"),("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))), | 70 ('thread', (("id INTEGER PRIMARY KEY ASC", "history_uid INTEGER", "thread_id TEXT", "parent_id TEXT"),("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))), |
70 ('param_gen', (("category TEXT", "name TEXT", "value TEXT"), | 71 ('param_gen', (("category TEXT", "name TEXT", "value TEXT"), |
71 ("PRIMARY KEY (category,name)",))), | 72 ("PRIMARY KEY (category, name)",))), |
72 ('param_ind', (("category TEXT", "name TEXT", "profile_id INTEGER", "value TEXT"), | 73 ('param_ind', (("category TEXT", "name TEXT", "profile_id INTEGER", "value TEXT"), |
73 ("PRIMARY KEY (category,name,profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), | 74 ("PRIMARY KEY (profile_id, category, name)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), |
74 ('private_gen', (("namespace TEXT", "key TEXT", "value TEXT"), | 75 ('private_gen', (("namespace TEXT", "key TEXT", "value TEXT"), |
75 ("PRIMARY KEY (namespace, key)",))), | 76 ("PRIMARY KEY (namespace, key)",))), |
76 ('private_ind', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value TEXT"), | 77 ('private_ind', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value TEXT"), |
77 ("PRIMARY KEY (namespace, key, profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), | 78 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), |
78 ('private_gen_bin', (("namespace TEXT", "key TEXT", "value BLOB"), | 79 ('private_gen_bin', (("namespace TEXT", "key TEXT", "value BLOB"), |
79 ("PRIMARY KEY (namespace, key)",))), | 80 ("PRIMARY KEY (namespace, key)",))), |
80 ('private_ind_bin', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"), | 81 ('private_ind_bin', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"), |
81 ("PRIMARY KEY (namespace, key, profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), | 82 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), |
82 ('files', (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL", | 83 ('files', (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL", |
83 "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format( | 84 "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format( |
84 file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY), | 85 file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY), |
85 "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER", | 86 "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER", |
86 "namespace TEXT", "mime_type TEXT", | 87 "namespace TEXT", "mime_type TEXT", |
95 ("'headline'",), | 96 ("'headline'",), |
96 ("'normal'",), | 97 ("'normal'",), |
97 ("'info'",) # info is not standard, but used to keep track of info like join/leave in a MUC | 98 ("'info'",) # info is not standard, but used to keep track of info like join/leave in a MUC |
98 )), | 99 )), |
99 )), | 100 )), |
101 'INDEX': (('history', (('profile_id', 'timestamp'), | |
102 ('profile_id', 'received_timestamp'))), | |
103 ('message', ('history_uid',)), | |
104 ('subject', ('history_uid',)), | |
105 ('thread', ('history_uid',)), | |
106 ('files', ('profile_id', 'mime_type', 'owner', 'parent'))), | |
100 }, | 107 }, |
108 8: {'specific': 'update_v8' | |
109 }, | |
101 7: {'specific': 'update_v7' | 110 7: {'specific': 'update_v7' |
102 }, | 111 }, |
103 6: {'cols create': {'history': ('stanza_id TEXT',)}, | 112 6: {'cols create': {'history': ('stanza_id TEXT',)}, |
104 }, | 113 }, |
105 5: {'create': {'files': (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL", | 114 5: {'create': {'files': (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL", |
199 def getNewBaseSql(): | 208 def getNewBaseSql(): |
200 log.info(_("The database is new, creating the tables")) | 209 log.info(_("The database is new, creating the tables")) |
201 database_creation = ["PRAGMA user_version=%d" % CURRENT_DB_VERSION] | 210 database_creation = ["PRAGMA user_version=%d" % CURRENT_DB_VERSION] |
202 database_creation.extend(Updater.createData2Raw(DATABASE_SCHEMAS['current']['CREATE'])) | 211 database_creation.extend(Updater.createData2Raw(DATABASE_SCHEMAS['current']['CREATE'])) |
203 database_creation.extend(Updater.insertData2Raw(DATABASE_SCHEMAS['current']['INSERT'])) | 212 database_creation.extend(Updater.insertData2Raw(DATABASE_SCHEMAS['current']['INSERT'])) |
213 database_creation.extend(Updater.indexData2Raw(DATABASE_SCHEMAS['current']['INDEX'])) | |
204 return database_creation | 214 return database_creation |
205 | 215 |
206 def getUpdateSql(): | 216 def getUpdateSql(): |
207 updater = Updater(self, sat_version) | 217 updater = Updater(self, sat_version) |
208 return updater.checkUpdates() | 218 return updater.checkUpdates() |
601 # while we were offline, using MAM (XEP-0313). | 611 # while we were offline, using MAM (XEP-0313). |
602 # It must be set after all other filters, because it contains an ORDER BY | 612 # It must be set after all other filters, because it contains an ORDER BY |
603 if (filters[u'last_stanza_id'] is not True | 613 if (filters[u'last_stanza_id'] is not True |
604 or limit != 1): | 614 or limit != 1): |
605 raise ValueError(u"Unexpected values for last_stanza_id filter") | 615 raise ValueError(u"Unexpected values for last_stanza_id filter") |
606 query_parts.append(u"AND stanza_id IS NOT NULL ORDER BY history.rowid DESC") | 616 query_parts.append(u"AND stanza_id IS NOT NULL ORDER BY history.received_timestamp DESC") |
607 order = True | 617 order = True |
608 | 618 |
609 | 619 |
610 if not order: | 620 if not order: |
611 # timestamp may be identical for 2 close message (specially when delay is used) | 621 # timestamp may be identical for 2 close messages (specially when delay is |
612 # that's why we order ties by rowid (which is in the same order as received_timestamp | 622 # used) that's why we order ties by received_timestamp |
613 # but has an index so is quick to order). | |
614 | |
615 # We'll reverse the order in sqliteHistoryToList | 623 # We'll reverse the order in sqliteHistoryToList |
616 query_parts.append(u"ORDER BY timestamp DESC, history.rowid DESC") | 624 query_parts.append(u"ORDER BY timestamp DESC, history.received_timestamp DESC") |
617 # we use DESC here so LIMIT keep the last messages | 625 # we use DESC here so LIMIT keep the last messages |
618 if limit is not None: | 626 if limit is not None: |
619 query_parts.append(u"LIMIT ?") | 627 query_parts.append(u"LIMIT ?") |
620 values.append(limit) | 628 values.append(limit) |
621 | 629 |
937 class Updater(object): | 945 class Updater(object): |
938 stmnt_regex = re.compile(r"[\w/' ]+(?:\(.*?\))?[^,]*") | 946 stmnt_regex = re.compile(r"[\w/' ]+(?:\(.*?\))?[^,]*") |
939 clean_regex = re.compile(r"^ +|(?<= ) +|(?<=,) +| +$") | 947 clean_regex = re.compile(r"^ +|(?<= ) +|(?<=,) +| +$") |
940 CREATE_SQL = "CREATE TABLE %s (%s)" | 948 CREATE_SQL = "CREATE TABLE %s (%s)" |
941 INSERT_SQL = "INSERT INTO %s VALUES (%s)" | 949 INSERT_SQL = "INSERT INTO %s VALUES (%s)" |
950 INDEX_SQL = "CREATE INDEX %s ON %s(%s)" | |
942 DROP_SQL = "DROP TABLE %s" | 951 DROP_SQL = "DROP TABLE %s" |
943 ALTER_SQL = "ALTER TABLE %s ADD COLUMN %s" | 952 ALTER_SQL = "ALTER TABLE %s ADD COLUMN %s" |
944 RENAME_TABLE_SQL = "ALTER TABLE %s RENAME TO %s" | 953 RENAME_TABLE_SQL = "ALTER TABLE %s RENAME TO %s" |
945 | 954 |
946 CONSTRAINTS = ('PRIMARY', 'UNIQUE', 'CHECK', 'FOREIGN') | 955 CONSTRAINTS = ('PRIMARY', 'UNIQUE', 'CHECK', 'FOREIGN') |
982 def checkUpdates(self): | 991 def checkUpdates(self): |
983 """ Check if a database schema/content update is needed, according to DATABASE_SCHEMAS | 992 """ Check if a database schema/content update is needed, according to DATABASE_SCHEMAS |
984 | 993 |
985 @return: deferred which fire a list of SQL update statements, or None if no update is needed | 994 @return: deferred which fire a list of SQL update statements, or None if no update is needed |
986 """ | 995 """ |
996 # TODO: only "table" type (i.e. "CREATE" statements) is checked, | |
997 # "index" should be checked too. | |
998 # This may be not relevant is we move to a higher level library (alchimia?) | |
987 local_version = yield self.getLocalVersion() | 999 local_version = yield self.getLocalVersion() |
988 raw_local_sch = yield self.getLocalSchema() | 1000 raw_local_sch = yield self.getLocalSchema() |
989 | 1001 |
990 local_sch = self.rawStatements2data(raw_local_sch) | 1002 local_sch = self.rawStatements2data(raw_local_sch) |
991 current_sch = DATABASE_SCHEMAS['current']['CREATE'] | 1003 current_sch = DATABASE_SCHEMAS['current']['CREATE'] |
992 local_hash = self.statementHash(local_sch) | 1004 local_hash = self.statementHash(local_sch) |
993 current_hash = self.statementHash(current_sch) | 1005 current_hash = self.statementHash(current_sch) |
994 | 1006 |
995 # Force the update if the schemas are unchanged but a specific update is needed | 1007 # Force the update if the schemas are unchanged but a specific update is needed |
996 force_update = local_hash == current_hash and local_version < CURRENT_DB_VERSION \ | 1008 force_update = local_hash == current_hash and local_version < CURRENT_DB_VERSION \ |
997 and 'specific' in DATABASE_SCHEMAS[CURRENT_DB_VERSION] | 1009 and {'index', 'specific'}.intersection(DATABASE_SCHEMAS[CURRENT_DB_VERSION]) |
998 | 1010 |
999 if local_hash == current_hash and not force_update: | 1011 if local_hash == current_hash and not force_update: |
1000 if local_version != CURRENT_DB_VERSION: | 1012 if local_version != CURRENT_DB_VERSION: |
1001 log.warning(_("Your local schema is up-to-date, but database versions mismatch, fixing it...")) | 1013 log.warning(_("Your local schema is up-to-date, but database versions mismatch, fixing it...")) |
1002 yield self._setLocalVersion(CURRENT_DB_VERSION) | 1014 yield self._setLocalVersion(CURRENT_DB_VERSION) |
1071 for values in values_tuple: | 1083 for values in values_tuple: |
1072 assert isinstance(values, tuple) | 1084 assert isinstance(values, tuple) |
1073 ret.append(Updater.INSERT_SQL % (table, ', '.join(values))) | 1085 ret.append(Updater.INSERT_SQL % (table, ', '.join(values))) |
1074 return ret | 1086 return ret |
1075 | 1087 |
1088 @staticmethod | |
1089 def indexData2Raw(data): | |
1090 """ Generate SQL statements from statements data | |
1091 | |
1092 @param data: dictionary with table as key, and statements data in tuples as value | |
1093 @return: list of strings with raw statements | |
1094 """ | |
1095 ret = [] | |
1096 assert isinstance(data, tuple) | |
1097 for table, col_data in data: | |
1098 assert isinstance(table, basestring) | |
1099 assert isinstance(col_data, tuple) | |
1100 for cols in col_data: | |
1101 if isinstance(cols, tuple): | |
1102 assert all([isinstance(c, basestring) for c in cols]) | |
1103 indexed_cols = u','.join(cols) | |
1104 elif isinstance(cols, basestring): | |
1105 indexed_cols = cols | |
1106 else: | |
1107 raise exceptions.InternalError(u"unexpected index columns value") | |
1108 index_name = table + u'__' + indexed_cols.replace(u',', u'_') | |
1109 ret.append(Updater.INDEX_SQL % (index_name, table, indexed_cols)) | |
1110 return ret | |
1111 | |
1076 def statementHash(self, data): | 1112 def statementHash(self, data): |
1077 """ Generate hash of template data | 1113 """ Generate hash of template data |
1078 | 1114 |
1079 useful to compare schemas | 1115 useful to compare schemas |
1080 @param data: dictionary of "CREATE" statement, with tables names as key, | 1116 @param data: dictionary of "CREATE" statement, with tables names as key, |
1220 ret.append(self.DROP_SQL % self.TMP_TABLE) | 1256 ret.append(self.DROP_SQL % self.TMP_TABLE) |
1221 | 1257 |
1222 insert = update.get('insert', {}) | 1258 insert = update.get('insert', {}) |
1223 ret.extend(self.insertData2Raw(insert)) | 1259 ret.extend(self.insertData2Raw(insert)) |
1224 | 1260 |
1261 index = update.get('index', tuple()) | |
1262 ret.extend(self.indexData2Raw(index)) | |
1263 | |
1225 specific = update.get('specific', None) | 1264 specific = update.get('specific', None) |
1226 if specific: | 1265 if specific: |
1227 cmds = yield getattr(self, specific)() | 1266 cmds = yield getattr(self, specific)() |
1228 ret.extend(cmds or []) | 1267 ret.extend(cmds or []) |
1229 defer.returnValue(ret) | 1268 defer.returnValue(ret) |
1269 | |
1270 def update_v8(self): | |
1271 """Update database from v7 to v8 (primary keys order changes + indexes)""" | |
1272 log.info(u"Database update to v8") | |
1273 statements = ["PRAGMA foreign_keys = OFF"] | |
1274 | |
1275 # here is a copy of create and index data, we can't use "current" table | |
1276 # because it may change in a future version, which would break the update | |
1277 # when doing v8 | |
1278 create = { | |
1279 'param_gen': ( | |
1280 ("category TEXT", "name TEXT", "value TEXT"), | |
1281 ("PRIMARY KEY (category, name)",)), | |
1282 'param_ind': ( | |
1283 ("category TEXT", "name TEXT", "profile_id INTEGER", "value TEXT"), | |
1284 ("PRIMARY KEY (profile_id, category, name)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE")), | |
1285 'private_ind': ( | |
1286 ("namespace TEXT", "key TEXT", "profile_id INTEGER", "value TEXT"), | |
1287 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE")), | |
1288 'private_ind_bin': ( | |
1289 ("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"), | |
1290 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE")), | |
1291 } | |
1292 index = ( | |
1293 ('history', (('profile_id', 'timestamp'), | |
1294 ('profile_id', 'received_timestamp'))), | |
1295 ('message', ('history_uid',)), | |
1296 ('subject', ('history_uid',)), | |
1297 ('thread', ('history_uid',)), | |
1298 ('files', ('profile_id', 'mime_type', 'owner', 'parent'))) | |
1299 | |
1300 for table in ('param_gen', 'param_ind', 'private_ind', 'private_ind_bin'): | |
1301 statements.append("ALTER TABLE {0} RENAME TO {0}_old".format(table)) | |
1302 schema = {table: create[table]} | |
1303 cols = [d.split()[0] for d in schema[table][0]] | |
1304 statements.extend(Updater.createData2Raw(schema)) | |
1305 statements.append(u"INSERT INTO {table}({cols}) " | |
1306 u"SELECT {cols} FROM {table}_old".format( | |
1307 table=table, | |
1308 cols=u','.join(cols))) | |
1309 statements.append(u"DROP TABLE {}_old".format(table)) | |
1310 | |
1311 statements.extend(Updater.indexData2Raw(index)) | |
1312 statements.append("PRAGMA foreign_keys = ON") | |
1313 return statements | |
1230 | 1314 |
1231 @defer.inlineCallbacks | 1315 @defer.inlineCallbacks |
1232 def update_v7(self): | 1316 def update_v7(self): |
1233 """Update database from v6 to v7 (history unique constraint change)""" | 1317 """Update database from v6 to v7 (history unique constraint change)""" |
1234 log.info(u"Database update to v7, this may be long depending on your history " | 1318 log.info(u"Database update to v7, this may be long depending on your history " |