comparison sat/memory/sqlite.py @ 3288:780fb8dd07ef

core (memory/sqlite): new database schema (v9): this new schema brings the new `public_id` column, which will be used to serve files via HTTP, splits `mime_type` in 2 (`media_type` and `media_subtype`) to make indexing/media filtering easier, and fixes indexes for `files` table.
author Goffi <goffi@goffi.org>
date Fri, 29 May 2020 21:50:49 +0200
parents 7255286a298a
children 83f25da66bec
comparison
equal deleted inserted replaced
3287:a4b8c9bcfb57 3288:780fb8dd07ef
36 import sqlite3 36 import sqlite3
37 import json 37 import json
38 38
39 log = getLogger(__name__) 39 log = getLogger(__name__)
40 40
41 CURRENT_DB_VERSION = 8 41 CURRENT_DB_VERSION = 9
42 42
43 # XXX: DATABASE schemas are used in the following way: 43 # XXX: DATABASE schemas are used in the following way:
44 # - 'current' key is for the actual database schema, for a new base 44 # - 'current' key is for the actual database schema, for a new base
45 # - x(int) is for update needed between x-1 and x. All number are needed between y and z to do an update 45 # - x(int) is for update needed between x-1 and x. All number are needed between y and z to do an update
46 # e.g.: if CURRENT_DB_VERSION is 6, 'current' is the actuel DB, and to update from version 3, numbers 4, 5 and 6 are needed 46 # e.g.: if CURRENT_DB_VERSION is 6, 'current' is the actuel DB, and to update from version 3, numbers 4, 5 and 6 are needed
80 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), 80 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
81 ('private_gen_bin', (("namespace TEXT", "key TEXT", "value BLOB"), 81 ('private_gen_bin', (("namespace TEXT", "key TEXT", "value BLOB"),
82 ("PRIMARY KEY (namespace, key)",))), 82 ("PRIMARY KEY (namespace, key)",))),
83 ('private_ind_bin', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"), 83 ('private_ind_bin', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"),
84 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), 84 ("PRIMARY KEY (profile_id, namespace, key)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
85 ('files', (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL", 85 ('files', (("id TEXT NOT NULL", "public_id TEXT", "version TEXT NOT NULL",
86 "parent TEXT NOT NULL",
86 "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format( 87 "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format(
87 file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY), 88 file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY),
88 "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER", 89 "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER",
89 "namespace TEXT", "mime_type TEXT", 90 "namespace TEXT", "media_type TEXT", "media_subtype TEXT",
90 "created DATETIME NOT NULL", "modified DATETIME", 91 "created DATETIME NOT NULL", "modified DATETIME",
91 "owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"), 92 "owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"),
92 ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))), 93 ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE",
94 "UNIQUE (public_id)"))),
93 )), 95 )),
94 'INSERT': OrderedDict(( 96 'INSERT': OrderedDict((
95 ('message_types', (("'chat'",), 97 ('message_types', (("'chat'",),
96 ("'error'",), 98 ("'error'",),
97 ("'groupchat'",), 99 ("'groupchat'",),
103 'INDEX': (('history', (('profile_id', 'timestamp'), 105 'INDEX': (('history', (('profile_id', 'timestamp'),
104 ('profile_id', 'received_timestamp'))), 106 ('profile_id', 'received_timestamp'))),
105 ('message', ('history_uid',)), 107 ('message', ('history_uid',)),
106 ('subject', ('history_uid',)), 108 ('subject', ('history_uid',)),
107 ('thread', ('history_uid',)), 109 ('thread', ('history_uid',)),
108 ('files', ('profile_id', 'mime_type', 'owner', 'parent'))), 110 ('files', (('profile_id', 'owner', 'media_type', 'media_subtype'),
111 ('profile_id', 'owner', 'parent'))),
112 )
109 }, 113 },
114 9: {'specific': 'update_v9'
115 },
110 8: {'specific': 'update_v8' 116 8: {'specific': 'update_v8'
111 }, 117 },
112 7: {'specific': 'update_v7' 118 7: {'specific': 'update_v7'
113 }, 119 },
114 6: {'cols create': {'history': ('stanza_id TEXT',)}, 120 6: {'cols create': {'history': ('stanza_id TEXT',)},
832 ## Files 838 ## Files
833 839
834 @defer.inlineCallbacks 840 @defer.inlineCallbacks
835 def getFiles(self, client, file_id=None, version='', parent=None, type_=None, 841 def getFiles(self, client, file_id=None, version='', parent=None, type_=None,
836 file_hash=None, hash_algo=None, name=None, namespace=None, mime_type=None, 842 file_hash=None, hash_algo=None, name=None, namespace=None, mime_type=None,
837 owner=None, access=None, projection=None, unique=False): 843 public_id=None, owner=None, access=None, projection=None, unique=False):
838 """retrieve files with with given filters 844 """retrieve files with with given filters
839 845
840 @param file_id(unicode, None): id of the file 846 @param file_id(unicode, None): id of the file
841 None to ignore 847 None to ignore
842 @param version(unicode, None): version of the file 848 @param version(unicode, None): version of the file
854 query_parts = ["SELECT"] 860 query_parts = ["SELECT"]
855 if unique: 861 if unique:
856 query_parts.append('DISTINCT') 862 query_parts.append('DISTINCT')
857 if projection is None: 863 if projection is None:
858 projection = ['id', 'version', 'parent', 'type', 'file_hash', 'hash_algo', 'name', 864 projection = ['id', 'version', 'parent', 'type', 'file_hash', 'hash_algo', 'name',
859 'size', 'namespace', 'mime_type', 'created', 'modified', 'owner', 865 'size', 'namespace', 'media_type', 'media_subtype', 'public_id', 'created', 'modified', 'owner',
860 'access', 'extra'] 866 'access', 'extra']
861 query_parts.append(','.join(projection)) 867 query_parts.append(','.join(projection))
862 query_parts.append("FROM files WHERE") 868 query_parts.append("FROM files WHERE")
863 filters = ['profile_id=?'] 869 if client is not None:
864 args = [self.profiles[client.profile]] 870 filters = ['profile_id=?']
871 args = [self.profiles[client.profile]]
872 else:
873 if public_id is None:
874 raise exceptions.InternalError(
875 "client can only be omitted when public_id is set")
876 filters = []
877 args = []
865 878
866 if file_id is not None: 879 if file_id is not None:
867 filters.append('id=?') 880 filters.append('id=?')
868 args.append(file_id) 881 args.append(file_id)
869 if version is not None: 882 if version is not None:
886 args.append(name) 899 args.append(name)
887 if namespace is not None: 900 if namespace is not None:
888 filters.append('namespace=?') 901 filters.append('namespace=?')
889 args.append(namespace) 902 args.append(namespace)
890 if mime_type is not None: 903 if mime_type is not None:
891 filters.append('mime_type=?') 904 if '/' in mime_type:
892 args.append(mime_type) 905 filters.extend('media_type=?', 'media_subtype=?')
906 args.extend(mime_type.split('/', 1))
907 else:
908 filters.append('media_type=?')
909 args.append(mime_type)
910 if public_id is not None:
911 filters.append('public_id=?')
912 args.append(public_id)
893 if owner is not None: 913 if owner is not None:
894 filters.append('owner=?') 914 filters.append('owner=?')
895 args.append(owner.full()) 915 args.append(owner.full())
896 if access is not None: 916 if access is not None:
897 raise NotImplementedError('Access check is not implemented yet') 917 raise NotImplementedError('Access check is not implemented yet')
915 file_data['owner'] = jid.JID(owner) 935 file_data['owner'] = jid.JID(owner)
916 defer.returnValue(files_data) 936 defer.returnValue(files_data)
917 937
918 def setFile(self, client, name, file_id, version='', parent=None, type_=C.FILE_TYPE_FILE, 938 def setFile(self, client, name, file_id, version='', parent=None, type_=C.FILE_TYPE_FILE,
919 file_hash=None, hash_algo=None, size=None, namespace=None, mime_type=None, 939 file_hash=None, hash_algo=None, size=None, namespace=None, mime_type=None,
920 created=None, modified=None, owner=None, access=None, extra=None): 940 public_id=None, created=None, modified=None, owner=None, access=None, extra=None):
921 """set a file metadata 941 """set a file metadata
922 942
923 @param client(SatXMPPClient): client owning the file 943 @param client(SatXMPPClient): client owning the file
924 @param name(unicode): name of the file (must not contain "/") 944 @param name(str): name of the file (must not contain "/")
925 @param file_id(unicode): unique id of the file 945 @param file_id(str): unique id of the file
926 @param version(unicode): version of this file 946 @param version(str): version of this file
927 @param parent(unicode): id of the directory containing this file 947 @param parent(str): id of the directory containing this file
928 None if it is a root file/directory 948 None if it is a root file/directory
929 @param type_(unicode): one of: 949 @param type_(str): one of:
930 - file 950 - file
931 - directory 951 - directory
932 @param file_hash(unicode): unique hash of the payload 952 @param file_hash(str): unique hash of the payload
933 @param hash_algo(unicode): algorithm used for hashing the file (usually sha-256) 953 @param hash_algo(str): algorithm used for hashing the file (usually sha-256)
934 @param size(int): size in bytes 954 @param size(int): size in bytes
935 @param namespace(unicode, None): identifier (human readable is better) to group files 955 @param namespace(str, None): identifier (human readable is better) to group files
936 for instance, namespace could be used to group files in a specific photo album 956 for instance, namespace could be used to group files in a specific photo album
937 @param mime_type(unicode): MIME type of the file, or None if not known/guessed 957 @param mime_type(str): media type of the file, or None if not known/guessed
958 @param public_id(str): ID used to server the file publicly via HTTP
938 @param created(int): UNIX time of creation 959 @param created(int): UNIX time of creation
939 @param modified(int,None): UNIX time of last modification, or None to use created date 960 @param modified(int,None): UNIX time of last modification, or None to use created date
940 @param owner(jid.JID, None): jid of the owner of the file (mainly useful for component) 961 @param owner(jid.JID, None): jid of the owner of the file (mainly useful for component)
941 @param access(dict, None): serialisable dictionary with access rules. See [memory.memory] for details 962 @param access(dict, None): serialisable dictionary with access rules. See [memory.memory] for details
942 @param extra(dict, None): serialisable dictionary of any extra data 963 @param extra(dict, None): serialisable dictionary of any extra data
943 will be encoded to json in database 964 will be encoded to json in database
944 """ 965 """
945 if extra is not None: 966 if extra is not None:
946 assert isinstance(extra, dict) 967 assert isinstance(extra, dict)
968
969 if mime_type is None:
970 media_type = media_subtype = None
971 elif '/' in mime_type:
972 media_type, media_subtype = mime_type.split('/', 1)
973 else:
974 media_type, media_subtype = mime_type, None
975
947 query = ('INSERT INTO files(id, version, parent, type, file_hash, hash_algo, name, size, namespace, ' 976 query = ('INSERT INTO files(id, version, parent, type, file_hash, hash_algo, name, size, namespace, '
948 'mime_type, created, modified, owner, access, extra, profile_id) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)') 977 'media_type, media_subtype, public_id, created, modified, owner, access, extra, profile_id) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)')
949 d = self.dbpool.runQuery(query, (file_id, version.strip(), parent, type_, 978 d = self.dbpool.runQuery(query, (file_id, version.strip(), parent, type_,
950 file_hash, hash_algo, 979 file_hash, hash_algo,
951 name, size, namespace, 980 name, size, namespace,
952 mime_type, created, modified, 981 media_type, media_subtype, public_id, created, modified,
953 owner.full() if owner is not None else None, 982 owner.full() if owner is not None else None,
954 json.dumps(access) if access else None, 983 json.dumps(access) if access else None,
955 json.dumps(extra) if extra else None, 984 json.dumps(extra) if extra else None,
956 self.profiles[client.profile])) 985 self.profiles[client.profile]))
957 d.addErrback(lambda failure: log.error(_("Can't save file metadata for [{profile}]: {reason}".format(profile=client.profile, reason=failure)))) 986 d.addErrback(lambda failure: log.error(_("Can't save file metadata for [{profile}]: {reason}".format(profile=client.profile, reason=failure))))
1355 if specific: 1384 if specific:
1356 cmds = yield getattr(self, specific)() 1385 cmds = yield getattr(self, specific)()
1357 ret.extend(cmds or []) 1386 ret.extend(cmds or [])
1358 defer.returnValue(ret) 1387 defer.returnValue(ret)
1359 1388
1389 def update_v9(self):
1390 """Update database from v8 to v9
1391
1392 (public_id on file with UNIQUE constraint, files indexes fix, media_type split)
1393 """
1394 # we have to do a specific update because we can't set UNIQUE constraint when adding a column
1395 # (see https://sqlite.org/lang_altertable.html#alter_table_add_column)
1396 log.info("Database update to v9")
1397
1398 create = {
1399 'files': (("id TEXT NOT NULL", "public_id TEXT", "version TEXT NOT NULL",
1400 "parent TEXT NOT NULL",
1401 "type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format(
1402 file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY),
1403 "file_hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER",
1404 "namespace TEXT", "media_type TEXT", "media_subtype TEXT",
1405 "created DATETIME NOT NULL", "modified DATETIME",
1406 "owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"),
1407 ("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE",
1408 "UNIQUE (public_id)")),
1409
1410 }
1411 index = tuple({'files': (('profile_id', 'owner', 'media_type', 'media_subtype'),
1412 ('profile_id', 'owner', 'parent'))}.items())
1413 # XXX: Sqlite doc recommends to do the other way around (i.e. create new table,
1414 # copy, drop old table then rename), but the RENAME would then add
1415 # "IF NOT EXISTS" which breaks the (admittely fragile) schema comparison.
1416 # TODO: rework sqlite update management, don't try to automatically detect
1417 # update, the database version is now enough.
1418 statements = ["ALTER TABLE files RENAME TO files_old"]
1419 statements.extend(Updater.createData2Raw(create))
1420 cols = ','.join([col_stmt.split()[0] for col_stmt in create['files'][0] if "public_id" not in col_stmt])
1421 old_cols = cols[:]
1422 # we need to split mime_type to the new media_type and media_subtype
1423 old_cols = old_cols.replace(
1424 'media_type,media_subtype',
1425 "substr(mime_type, 0, instr(mime_type,'/')),substr(mime_type, instr(mime_type,'/')+1)"
1426 )
1427 statements.extend([
1428 f"INSERT INTO files({cols}) SELECT {old_cols} FROM files_old",
1429 f"DROP TABLE files_old",
1430 ])
1431 statements.extend(Updater.indexData2Raw(index))
1432 return statements
1433
1360 def update_v8(self): 1434 def update_v8(self):
1361 """Update database from v7 to v8 (primary keys order changes + indexes)""" 1435 """Update database from v7 to v8 (primary keys order changes + indexes)"""
1362 log.info("Database update to v8") 1436 log.info("Database update to v8")
1363 statements = ["PRAGMA foreign_keys = OFF"] 1437 statements = ["PRAGMA foreign_keys = OFF"]
1364 1438