libervia-backend: src/memory/sqlite.py comparison

comparison src/memory/sqlite.py @ 2500:898b6e1fdc7a

memory (sqlite): files handling: /!\ new database schema version - added a table to handle files metadata and hierarchy. - new methods getFiles and setFile allow respectively to retrieve files corresponding to filters, and to set metadata for one file. - fixed stmnt_regex to detect correcly new schema (during schema validation/update on init)

author	Goffi <goffi@goffi.org>
date	Wed, 28 Feb 2018 18:28:39 +0100
parents	0046283a285d
children	4c45df43ea44

comparison

equal deleted inserted replaced

-:af4a38ebf52a
+:898b6e1fdc7a
 log = getLogger(__name__)
 from sat.memory.crypto import BlockCipher, PasswordHasher
 from sat.tools.config import fixConfigOption
 from twisted.enterprise import adbapi
 from twisted.internet import defer
+from twisted.words.protocols.jabber import jid
 from twisted.python import failure
 from collections import OrderedDict
 import re
 import os.path
 import cPickle as pickle
 import hashlib
 import sqlite3
+import json
-CURRENT_DB_VERSION = 4
+CURRENT_DB_VERSION = 5
 # XXX: DATABASE schemas are used in the following way:
 #      - 'current' key is for the actual database schema, for a new base
 #      - x(int) is for update needed between x-1 and x. All number are needed between y and z to do an update
 #        e.g.: if CURRENT_DB_VERSION is 6, 'current' is the actuel DB, and to update from version 3, numbers 4, 5 and 6 are needed
 #      a 'current' data dict can contains the keys:
 #      - 'CREATE': it contains an Ordered dict with table to create as keys, and a len 2 tuple as value, where value[0] are the columns definitions and value[1] are the table constraints
 #      - 'INSERT': it contains an Ordered dict with table where values have to be inserted, and many tuples containing values to insert in the order of the rows (#TODO: manage named columns)
 #      an update data dict (the ones with a number) can contains the keys 'create', 'delete', 'cols create', 'cols delete', 'cols modify', 'insert' or 'specific'. See Updater.generateUpdateData for more infos. This method can be used to autogenerate update_data, to ease the work of the developers.
-# TODO: this database currently doesn't use indexes, it should
+# TODO: indexes need to be improved
 DATABASE_SCHEMAS = {
 "current": {'CREATE': OrderedDict((
 ('profiles',        (("id INTEGER PRIMARY KEY ASC", "name TEXT"),
 ("UNIQUE (name)",))),
 ('components',      (("profile_id INTEGER PRIMARY KEY", "entry_point TEXT NOT NULL"),
 ("FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE",))),
 ('message_types',   (("type TEXT PRIMARY KEY",),
-tuple())),
+())),
 ('history',         (("uid TEXT PRIMARY KEY", "update_uid TEXT", "profile_id INTEGER", "source TEXT", "dest TEXT", "source_res TEXT", "dest_res TEXT",
 "timestamp DATETIME NOT NULL", "received_timestamp DATETIME", # XXX: timestamp is the time when the message was emitted. If received time stamp is not NULL, the message was delayed and timestamp is the declared value (and received_timestamp the time of reception)
 "type TEXT", "extra BLOB"),
 ("FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE", "FOREIGN KEY(type) REFERENCES message_types(type)",
-"UNIQUE (profile_id, timestamp, source, dest, source_res, dest_res)" # avoid storing 2 time the same message (specially for delayed cones)
+"UNIQUE (profile_id, timestamp, source, dest, source_res, dest_res)" # avoid storing 2 time the same message (specially for delayed ones)
 ))),
 ('message',        (("id INTEGER PRIMARY KEY ASC", "history_uid INTEGER", "message TEXT", "language TEXT"),
 ("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))),
 ('subject',        (("id INTEGER PRIMARY KEY ASC", "history_uid INTEGER", "subject TEXT", "language TEXT"),
 ("FOREIGN KEY(history_uid) REFERENCES history(uid) ON DELETE CASCADE",))),
 ('private_ind',     (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value TEXT"),
 ("PRIMARY KEY (namespace, key, profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
 ('private_gen_bin', (("namespace TEXT", "key TEXT", "value BLOB"),
 ("PRIMARY KEY (namespace, key)",))),
 ('private_ind_bin', (("namespace TEXT", "key TEXT", "profile_id INTEGER", "value BLOB"),
-("PRIMARY KEY (namespace, key, profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE")))
+("PRIMARY KEY (namespace, key, profile_id)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
+('files',           (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL",
+"type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format(
+file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY),
+"hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER",
+"namespace TEXT", "mime_type TEXT",
+"created DATETIME NOT NULL", "modified DATETIME",
+"owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"),
+("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))),
 )),
 'INSERT': OrderedDict((
 ('message_types', (("'chat'",),
 ("'error'",),
 ("'groupchat'",),
 ("'normal'",),
 ("'info'",) # info is not standard, but used to keep track of info like join/leave in a MUC
 )),
 )),
 },
+5:         {'create': {'files': (("id TEXT NOT NULL", "version TEXT NOT NULL", "parent TEXT NOT NULL",
+"type TEXT CHECK(type in ('{file}', '{directory}')) NOT NULL DEFAULT '{file}'".format(
+file=C.FILE_TYPE_FILE, directory=C.FILE_TYPE_DIRECTORY),
+"hash TEXT", "hash_algo TEXT", "name TEXT NOT NULL", "size INTEGER",
+"namespace TEXT", "mime_type TEXT",
+"created DATETIME NOT NULL", "modified DATETIME",
+"owner TEXT", "access TEXT", "extra TEXT", "profile_id INTEGER"),
+("PRIMARY KEY (id, version)", "FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE"))},
+},
 4:         {'create': {'components': (('profile_id INTEGER PRIMARY KEY', 'entry_point TEXT NOT NULL'), ('FOREIGN KEY(profile_id) REFERENCES profiles(id) ON DELETE CASCADE',))}
 },
 3:         {'specific': 'update_v3'
 },
 2:         {'specific': 'update2raw_v2'
 def _updateDb(self, interaction, statements):
 for statement in statements:
 interaction.execute(statement)
-#Profiles
+## Profiles
 def _cacheComponentsAndProfiles(self, components_result):
 """Get components results and send requests profiles
 they will be both put in cache in _profilesCache
 d = self.dbpool.runInteraction(delete)
 d.addCallback(lambda ignore: log.info(_("Profile [%s] deleted") % name))
 d.addErrback(deletionError)
 return d
-#Params
+## Params
 def loadGenParams(self, params_gen):
 """Load general parameters
 @param params_gen: dictionary to fill
 @return: deferred
 d.addCallback(self.__getFirstResult)
 return d
 def setGenParam(self, category, name, value):
 """Save the general parameters in database
 @param category: category of the parameter
 @param name: name of the parameter
 @param value: value to set
 @return: deferred"""
 d = self.dbpool.runQuery("REPLACE INTO param_gen(category,name,value) VALUES (?,?,?)", (category, name, value))
 """
 d = self.dbpool.runQuery("REPLACE INTO param_ind(category,name,profile_id,value) VALUES (?,?,?,?)", (category, name, self.profiles[profile], value))
 d.addErrback(lambda ignore: log.error(_(u"Can't set individual parameter (%(category)s/%(name)s) for [%(profile)s] in database" % {"category": category, "name": name, "profile": profile})))
 return d
-#History
+## History
 def _addToHistoryCb(self, dummy, data):
 # Message metadata were successfuly added to history
 # now we can add message and subject
 uid = data['uid']
 d = self.dbpool.runQuery(u" ".join(query_parts), values)
 d.addCallback(self.sqliteHistoryToList)
 d.addCallback(self.listDict2listTuple)
 return d
-#Private values
+## Private values
 def _privateDataEb(self, failure_, operation, namespace, key=None, profile=None):
 """generic errback for data queries"""
 log.error(_(u"Can't {operation} data in database for namespace {namespace}{and_key}{for_profile}: {msg}").format(
 operation = operation,
 args.append(self.profiles[profile])
 d = self.dbpool.runQuery(u" ".join(query_parts), args)
 d.addErrback(self._privateDataEb, u"delete", namespace, key, profile=profile)
 return d
+## Files
+@defer.inlineCallbacks
+def getFiles(self, client, file_id=None, version=u'', parent=None, type_=None,
+file_hash=None, hash_algo=None, name=None, namespace=None, mime_type=None,
+owner=None, access=None, projection=None, unique=False):
+"""retrieve files with with given filters
+@param file_id(unicode, None): id of the file
+None to ignore
+@param version(unicode, None): version of the file
+None to ignore
+empty string to look for current version
+@param parent(unicode, None): id of the directory containing the files
+None to ignore
+empty string to look for root files/directories
+@param projection(list[unicode], None): name of columns to retrieve
+None to retrieve all
+@param unique(bool): if True will remove duplicates
+other params are the same as for [setFile]
+@return (list[dict]): files corresponding to filters
+"""
+query_parts = ["SELECT"]
+if unique:
+query_parts.append('DISTINCT')
+if projection is None:
+projection = ['id', 'version', 'parent', 'type', 'hash', 'hash_algo', 'name',
+'size', 'namespace', 'mime_type', 'created', 'modified', 'owner',
+'access', 'extra']
+query_parts.append(','.join(projection))
+query_parts.append("FROM files WHERE")
+filters = ['profile_id=?']
+args = [self.profiles[client.profile]]
+if file_id is not None:
+filters.append(u'id=?')
+args.append(file_id)
+if version is not None:
+filters.append(u'version=?')
+args.append(version)
+if parent is not None:
+filters.append(u'parent=?')
+args.append(parent)
+if type_ is not None:
+filters.append(u'type=?')
+args.append(type_)
+if file_hash is not None:
+filters.append(u'hash=?')
+args.append(file_hash)
+if hash_algo is not None:
+filters.append(u'hash_algo=?')
+args.append(hash_algo)
+if name is not None:
+filters.append(u'name=?')
+args.append(name)
+if namespace is not None:
+filters.append(u'namespace=?')
+args.append(namespace)
+if mime_type is not None:
+filters.append(u'mime_type=?')
+args.append(mime_type)
+if owner is not None:
+filters.append(u'owner=?')
+args.append(owner.full())
+if access is not None:
+raise NotImplementedError('Access check is not implemented yet')
+# a JSON comparison is needed here
+filters = u' AND '.join(filters)
+query_parts.append(filters)
+query = u' '.join(query_parts)
+result = yield self.dbpool.runQuery(query, args)
+files_data = [dict(zip(projection, row)) for row in result]
+to_parse = {'access', 'extra'}.intersection(projection)
+to_filter = {'owner'}.intersection(projection)
+if to_parse or to_filter:
+for file_data in files_data:
+for key in to_parse:
+value = file_data[key]
+file_data[key] = {} if value is None else json.loads(value)
+owner = file_data.get('owner')
+if owner is not None:
+file_data['owner'] = jid.JID(owner)
+defer.returnValue(files_data)
+def setFile(self, client, name, file_id, version=u'', parent=None, type_=C.FILE_TYPE_FILE,
+file_hash=None, hash_algo=None, size=None, namespace=None, mime_type=None,
+created=None, modified=None, owner=None, access=None, extra=None):
+"""set a file metadata
+@param client(SatXMPPClient): client owning the file
+@param name(unicode): name of the file (must not contain "/")
+@param file_id(unicode): unique id of the file
+@param version(unicode): version of this file
+@param parent(unicode): id of the directory containing this file
+None if it is a root file/directory
+@param type_(unicode): one of:
+- file
+- directory
+@param file_hash(unicode): unique hash of the payload
+@param hash_algo(unicode): algorithm used for hashing the file (usually sha-256)
+@param size(int): size in bytes
+@param namespace(unicode, None): identifier (human readable is better) to group files
+for instance, namespace could be used to group files in a specific photo album
+@param mime_type(unicode): MIME type of the file, or None if not known/guessed
+@param created(int): UNIX time of creation
+@param modified(int,None): UNIX time of last modification, or None to use created date
+@param owner(jid.JID, None): jid of the owner of the file (mainly useful for component)
+@param access(dict, None): serialisable dictionary with access rules. See [memory.memory] for details
+@param extra(dict, None): serialisable dictionary of any extra data
+will be encoded to json in database
+"""
+if extra is not None:
+assert isinstance(extra, dict)
+query = ('INSERT INTO files(id, version, parent, type, hash, hash_algo, name, size, namespace, '
+'mime_type, created, modified, owner, access, extra, profile_id) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)')
+d = self.dbpool.runQuery(query, (file_id, version.strip(), parent, type_,
+file_hash, hash_algo,
+name, size, namespace,
+mime_type, created, modified,
+owner.full() if owner is not None else None,
+json.dumps(access) if access else None,
+json.dumps(extra) if extra else None,
+self.profiles[client.profile]))
+d.addErrback(lambda failure: log.error(_(u"Can't save file metadata for [{profile}]: {reason}".format(profile=client.profile, reason=failure))))
+return d
 ##Helper methods##
 def __getFirstResult(self, result):
 """Return the first result of a database query
 Useful when we are looking for one specific value"""
 return None if not result else result[0][0]
 class Updater(object):
-stmnt_regex = re.compile(r"(?:[\w ]+(?:\([\w, ]+\))?)+")
+stmnt_regex = re.compile(r"[\w/' ]+(?:\(.*?\))?[^,]*")
 clean_regex = re.compile(r"^ +|(?<= ) +|(?<=,) +| +$")
 CREATE_SQL = "CREATE TABLE %s (%s)"
 INSERT_SQL = "INSERT INTO %s VALUES (%s)"
 DROP_SQL = "DROP TABLE %s"
 ALTER_SQL = "ALTER TABLE %s ADD COLUMN %s"
 @return: deferred which fire a list of SQL update statements, or None if no update is needed
 """
 local_version = yield self.getLocalVersion()
 raw_local_sch = yield self.getLocalSchema()
 local_sch = self.rawStatements2data(raw_local_sch)
 current_sch = DATABASE_SCHEMAS['current']['CREATE']
 local_hash = self.statementHash(local_sch)
 current_hash = self.statementHash(current_sch)

Mercurial > libervia-backend

comparison src/memory/sqlite.py @ 2500:898b6e1fdc7a