Mercurial > libervia-backend
comparison libervia/backend/memory/migration/versions/fe3a02cb4bec_convert_legacypickle_columns_to_json.py @ 4212:5f2d496c633f
core: get rid of `pickle`:
Use of `pickle` to serialise data was a technical legacy that was causing trouble to store
in database, to update (if a class was serialised, a change could break update), and to
security (pickle can lead to code execution).
This patch remove all use of Pickle in favour in JSON, notably:
- for caching data, a Pydantic model is now used instead
- for SQLAlchemy model, the LegacyPickle is replaced by JSON serialisation
- in XEP-0373 a class `PublicKeyMetadata` was serialised. New method `from_dict` and
`to_dict` method have been implemented to do serialisation.
- new methods to (de)serialise data can now be specified with Identity data types. It is
notably used to (de)serialise `path` of avatars.
A migration script has been created to convert data (for upgrade or downgrade), with
special care for XEP-0373 case. Depending of size of database, this migration script can
be long to run.
rel 443
author | Goffi <goffi@goffi.org> |
---|---|
date | Fri, 23 Feb 2024 13:31:04 +0100 |
parents | |
children | 1a7a3e4b52a4 |
comparison
equal
deleted
inserted
replaced
4211:be89ab1cbca4 | 4212:5f2d496c633f |
---|---|
1 """convert LegacyPickle columns to JSON | |
2 | |
3 Revision ID: fe3a02cb4bec | |
4 Revises: 610345f77e75 | |
5 Create Date: 2024-02-22 14:55:59.993983 | |
6 | |
7 """ | |
8 from alembic import op | |
9 import sqlalchemy as sa | |
10 import pickle | |
11 import json | |
12 from libervia.backend.plugins.plugin_xep_0373 import PublicKeyMetadata | |
13 | |
14 # revision identifiers, used by Alembic. | |
15 revision = "fe3a02cb4bec" | |
16 down_revision = "610345f77e75" | |
17 branch_labels = None | |
18 depends_on = None | |
19 | |
20 | |
21 def convert_pickle_to_json(value, table, primary_keys): | |
22 """Convert pickled data to JSON, handling potential errors.""" | |
23 if value is None: | |
24 return None | |
25 try: | |
26 # some values are converted to bytes with LegacyPickle | |
27 if isinstance(value, str): | |
28 value = value.encode() | |
29 try: | |
30 deserialized = pickle.loads(value, encoding="utf-8") | |
31 except ModuleNotFoundError: | |
32 deserialized = pickle.loads( | |
33 value.replace(b"sat.plugins", b"libervia.backend.plugins"), | |
34 encoding="utf-8", | |
35 ) | |
36 if ( | |
37 table == "private_ind_bin" | |
38 and primary_keys[0] == "XEP-0373" | |
39 and not primary_keys[1].startswith("/trust") | |
40 and isinstance(deserialized, set) | |
41 and deserialized | |
42 and isinstance(next(iter(deserialized)), PublicKeyMetadata) | |
43 ): | |
44 # XEP-0373 plugin was pickling an internal class, this can't be converted | |
45 # directly to JSON, so we do a special treatment with the add `to_dict` and | |
46 # `from_dict` methods. | |
47 deserialized = [pkm.to_dict() for pkm in deserialized] | |
48 | |
49 ret = json.dumps(deserialized, ensure_ascii=False, default=str) | |
50 if table == 'history' and ret == "{}": | |
51 # For history, we can remove empty data, but for other tables it may be | |
52 # significant. | |
53 ret = None | |
54 return ret | |
55 except Exception as e: | |
56 print( | |
57 f"Warning: Failed to convert pickle to JSON, using NULL instead. Error: {e}" | |
58 ) | |
59 return None | |
60 | |
61 | |
62 def upgrade(): | |
63 print( | |
64 "This migration may take very long, please be patient and don't stop the process." | |
65 ) | |
66 connection = op.get_bind() | |
67 | |
68 tables_and_columns = [ | |
69 ("history", "extra", "uid"), | |
70 ("private_gen_bin", "value", "namespace", "key"), | |
71 ("private_ind_bin", "value", "namespace", "key", "profile_id"), | |
72 ] | |
73 | |
74 for table, column, *primary_keys in tables_and_columns: | |
75 primary_key_clause = " AND ".join(f"{pk} = :{pk}" for pk in primary_keys) | |
76 select_stmt = sa.text(f"SELECT {', '.join(primary_keys)}, {column} FROM {table}") | |
77 update_stmt = sa.text( | |
78 f"UPDATE {table} SET {column} = :{column} WHERE {primary_key_clause}" | |
79 ) | |
80 | |
81 result = connection.execute(select_stmt) | |
82 for row in result: | |
83 value = row[-1] | |
84 if value is None: | |
85 continue | |
86 data = {pk: row[idx] for idx, pk in enumerate(primary_keys)} | |
87 data[column] = convert_pickle_to_json(value, table, row[:-1]) | |
88 connection.execute(update_stmt.bindparams(**data)) | |
89 | |
90 | |
91 def convert_json_to_pickle(value, table, primary_keys): | |
92 """Convert JSON data back to pickled data, handling potential errors.""" | |
93 if value is None: | |
94 return None | |
95 try: | |
96 deserialized = json.loads(value) | |
97 # Check for the specific table and primary key conditions that require special | |
98 # handling | |
99 if ( | |
100 table == "private_ind_bin" | |
101 and primary_keys[0] == "XEP-0373" | |
102 and not primary_keys[1].startswith("/trust") | |
103 ): | |
104 # Convert list of dicts back to set of PublicKeyMetadata objects | |
105 if isinstance(deserialized, list): | |
106 deserialized = {PublicKeyMetadata.from_dict(d) for d in deserialized} | |
107 return pickle.dumps(deserialized, 0) | |
108 except Exception as e: | |
109 print( | |
110 f"Warning: Failed to convert JSON to pickle, using NULL instead. Error: {e}" | |
111 ) | |
112 return None | |
113 | |
114 | |
115 def downgrade(): | |
116 print( | |
117 "Reverting JSON columns to LegacyPickle format. This may take a while, please be " | |
118 "patient." | |
119 ) | |
120 connection = op.get_bind() | |
121 | |
122 tables_and_columns = [ | |
123 ("history", "extra", "uid"), | |
124 ("private_gen_bin", "value", "namespace", "key"), | |
125 ("private_ind_bin", "value", "namespace", "key", "profile_id"), | |
126 ] | |
127 | |
128 for table, column, *primary_keys in tables_and_columns: | |
129 primary_key_clause = " AND ".join(f"{pk} = :{pk}" for pk in primary_keys) | |
130 select_stmt = sa.text(f"SELECT {', '.join(primary_keys)}, {column} FROM {table}") | |
131 update_stmt = sa.text( | |
132 f"UPDATE {table} SET {column} = :{column} WHERE {primary_key_clause}" | |
133 ) | |
134 | |
135 result = connection.execute(select_stmt) | |
136 for row in result: | |
137 value = row[-1] | |
138 if value is None: | |
139 continue | |
140 data = {pk: row[idx] for idx, pk in enumerate(primary_keys)} | |
141 data[column] = convert_json_to_pickle(value, table, row[:-1]) | |
142 connection.execute(update_stmt.bindparams(**data)) |