annotate libervia/backend/memory/migration/versions/fe3a02cb4bec_convert_legacypickle_columns_to_json.py @ 4212:5f2d496c633f

core: get rid of `pickle`: Use of `pickle` to serialise data was a technical legacy that was causing trouble to store in database, to update (if a class was serialised, a change could break update), and to security (pickle can lead to code execution). This patch remove all use of Pickle in favour in JSON, notably: - for caching data, a Pydantic model is now used instead - for SQLAlchemy model, the LegacyPickle is replaced by JSON serialisation - in XEP-0373 a class `PublicKeyMetadata` was serialised. New method `from_dict` and `to_dict` method have been implemented to do serialisation. - new methods to (de)serialise data can now be specified with Identity data types. It is notably used to (de)serialise `path` of avatars. A migration script has been created to convert data (for upgrade or downgrade), with special care for XEP-0373 case. Depending of size of database, this migration script can be long to run. rel 443
author Goffi <goffi@goffi.org>
date Fri, 23 Feb 2024 13:31:04 +0100
parents
children 1a7a3e4b52a4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4212
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
1 """convert LegacyPickle columns to JSON
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
2
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
3 Revision ID: fe3a02cb4bec
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
4 Revises: 610345f77e75
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
5 Create Date: 2024-02-22 14:55:59.993983
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
6
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
7 """
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
8 from alembic import op
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
9 import sqlalchemy as sa
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
10 import pickle
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
11 import json
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
12 from libervia.backend.plugins.plugin_xep_0373 import PublicKeyMetadata
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
13
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
14 # revision identifiers, used by Alembic.
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
15 revision = "fe3a02cb4bec"
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
16 down_revision = "610345f77e75"
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
17 branch_labels = None
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
18 depends_on = None
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
19
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
20
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
21 def convert_pickle_to_json(value, table, primary_keys):
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
22 """Convert pickled data to JSON, handling potential errors."""
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
23 if value is None:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
24 return None
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
25 try:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
26 # some values are converted to bytes with LegacyPickle
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
27 if isinstance(value, str):
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
28 value = value.encode()
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
29 try:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
30 deserialized = pickle.loads(value, encoding="utf-8")
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
31 except ModuleNotFoundError:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
32 deserialized = pickle.loads(
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
33 value.replace(b"sat.plugins", b"libervia.backend.plugins"),
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
34 encoding="utf-8",
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
35 )
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
36 if (
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
37 table == "private_ind_bin"
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
38 and primary_keys[0] == "XEP-0373"
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
39 and not primary_keys[1].startswith("/trust")
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
40 and isinstance(deserialized, set)
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
41 and deserialized
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
42 and isinstance(next(iter(deserialized)), PublicKeyMetadata)
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
43 ):
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
44 # XEP-0373 plugin was pickling an internal class, this can't be converted
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
45 # directly to JSON, so we do a special treatment with the add `to_dict` and
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
46 # `from_dict` methods.
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
47 deserialized = [pkm.to_dict() for pkm in deserialized]
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
48
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
49 ret = json.dumps(deserialized, ensure_ascii=False, default=str)
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
50 if table == 'history' and ret == "{}":
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
51 # For history, we can remove empty data, but for other tables it may be
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
52 # significant.
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
53 ret = None
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
54 return ret
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
55 except Exception as e:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
56 print(
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
57 f"Warning: Failed to convert pickle to JSON, using NULL instead. Error: {e}"
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
58 )
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
59 return None
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
60
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
61
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
62 def upgrade():
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
63 print(
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
64 "This migration may take very long, please be patient and don't stop the process."
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
65 )
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
66 connection = op.get_bind()
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
67
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
68 tables_and_columns = [
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
69 ("history", "extra", "uid"),
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
70 ("private_gen_bin", "value", "namespace", "key"),
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
71 ("private_ind_bin", "value", "namespace", "key", "profile_id"),
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
72 ]
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
73
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
74 for table, column, *primary_keys in tables_and_columns:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
75 primary_key_clause = " AND ".join(f"{pk} = :{pk}" for pk in primary_keys)
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
76 select_stmt = sa.text(f"SELECT {', '.join(primary_keys)}, {column} FROM {table}")
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
77 update_stmt = sa.text(
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
78 f"UPDATE {table} SET {column} = :{column} WHERE {primary_key_clause}"
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
79 )
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
80
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
81 result = connection.execute(select_stmt)
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
82 for row in result:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
83 value = row[-1]
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
84 if value is None:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
85 continue
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
86 data = {pk: row[idx] for idx, pk in enumerate(primary_keys)}
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
87 data[column] = convert_pickle_to_json(value, table, row[:-1])
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
88 connection.execute(update_stmt.bindparams(**data))
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
89
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
90
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
91 def convert_json_to_pickle(value, table, primary_keys):
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
92 """Convert JSON data back to pickled data, handling potential errors."""
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
93 if value is None:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
94 return None
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
95 try:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
96 deserialized = json.loads(value)
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
97 # Check for the specific table and primary key conditions that require special
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
98 # handling
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
99 if (
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
100 table == "private_ind_bin"
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
101 and primary_keys[0] == "XEP-0373"
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
102 and not primary_keys[1].startswith("/trust")
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
103 ):
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
104 # Convert list of dicts back to set of PublicKeyMetadata objects
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
105 if isinstance(deserialized, list):
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
106 deserialized = {PublicKeyMetadata.from_dict(d) for d in deserialized}
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
107 return pickle.dumps(deserialized, 0)
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
108 except Exception as e:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
109 print(
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
110 f"Warning: Failed to convert JSON to pickle, using NULL instead. Error: {e}"
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
111 )
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
112 return None
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
113
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
114
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
115 def downgrade():
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
116 print(
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
117 "Reverting JSON columns to LegacyPickle format. This may take a while, please be "
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
118 "patient."
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
119 )
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
120 connection = op.get_bind()
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
121
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
122 tables_and_columns = [
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
123 ("history", "extra", "uid"),
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
124 ("private_gen_bin", "value", "namespace", "key"),
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
125 ("private_ind_bin", "value", "namespace", "key", "profile_id"),
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
126 ]
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
127
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
128 for table, column, *primary_keys in tables_and_columns:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
129 primary_key_clause = " AND ".join(f"{pk} = :{pk}" for pk in primary_keys)
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
130 select_stmt = sa.text(f"SELECT {', '.join(primary_keys)}, {column} FROM {table}")
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
131 update_stmt = sa.text(
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
132 f"UPDATE {table} SET {column} = :{column} WHERE {primary_key_clause}"
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
133 )
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
134
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
135 result = connection.execute(select_stmt)
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
136 for row in result:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
137 value = row[-1]
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
138 if value is None:
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
139 continue
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
140 data = {pk: row[idx] for idx, pk in enumerate(primary_keys)}
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
141 data[column] = convert_json_to_pickle(value, table, row[:-1])
5f2d496c633f core: get rid of `pickle`:
Goffi <goffi@goffi.org>
parents:
diff changeset
142 connection.execute(update_stmt.bindparams(**data))