Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

N + 3: Read from column full_user_id rather than user_id of tables profiles and user_filters #15649

Merged
merged 18 commits into from
Jun 3, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion synapse/storage/schema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

SCHEMA_VERSION = 77 # remember to update the list below when updating
SCHEMA_VERSION = 78 # remember to update the list below when updating
Copy link
Contributor

@MadLittleMods MadLittleMods Jun 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The notes from #15691 are very helpful when trying to understand if this PR is the correct next step in the migration process 💡

And a breakdown of the PRs for this migration process so far:

  • #15458 (N + 1): Bump SCHEMA_VERSION to 76. Add full_user_id column, start writing to both the old and new column moving forward
  • #15537 (N + 2): Bump SCHEMA_VERSION to 77 and SCHEMA_COMPAT_VERSION to 76 because we added a NOT VALID constraint that only ensures that new inserts/updates always fill in full_user_id which isn't backwards compatible because older Synapse versions don't write to the new full_user_id column. Also add a background update to backfill full_user_id for all rows
  • #15649 (N + 3): Bump SCHEMA_VERSION to 78, add a foreground update to finish off the backfill and turn the NOT VALID constraint into a valid one -> full_user_id NOT NULL, and start reading from full_user_id
  • Future PRs needed for N + 4 and N + 5 where we stop writing to user_id and drop the column

Copy link
Contributor

@MadLittleMods MadLittleMods Jun 2, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

✔️ Looks good based on the notes linked above but I've never done this kind of thing before and I'm definitely not an authoritative source of knowledge on this kind of thing.

"""Represents the expectations made by the codebase about the database schema

This should be incremented whenever the codebase changes its requirements on the
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright 2023 The Matrix.org Foundation C.I.C
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from synapse.config.homeserver import HomeServerConfig
from synapse.storage.database import LoggingTransaction
from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine


def run_upgrade(
cur: LoggingTransaction,
database_engine: BaseDatabaseEngine,
config: HomeServerConfig,
) -> None:
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
hostname = config.server.server_name

if isinstance(database_engine, PostgresEngine):
# check if the constraint can be validated
check_sql = """
SELECT user_id from profiles WHERE full_user_id IS NULL
"""
clokep marked this conversation as resolved.
Show resolved Hide resolved
cur.execute(check_sql)
res = cur.fetchall()

if res:
# there are rows the background job missed, finish them here before we validate the constraint
process_rows_sql = """
UPDATE profiles
SET full_user_id = '@' || user_id || ?
H-Shay marked this conversation as resolved.
Show resolved Hide resolved
WHERE user_id IN (SELECT user_id FROM profiles WHERE
full_user_id IS NULL)
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
"""
cur.execute(process_rows_sql, (f":{hostname}",))
clokep marked this conversation as resolved.
Show resolved Hide resolved

# Now we can validate
validate_sql = """
ALTER TABLE profiles VALIDATE CONSTRAINT full_user_id_not_null
"""
cur.execute(validate_sql)

else:
# in SQLite we need to rewrite the table to add the constraint
cur.execute("DROP TABLE IF EXISTS temp_profiles")
H-Shay marked this conversation as resolved.
Show resolved Hide resolved

create_sql = """
CREATE TABLE temp_profiles (
full_user_id text NOT NULL,
user_id text,
displayname text,
avatar_url text,
UNIQUE (full_user_id),
UNIQUE (user_id)
)
"""
cur.execute(create_sql)

copy_sql = """
INSERT INTO temp_profiles (
user_id,
displayname,
avatar_url,
full_user_id)
SELECT user_id, displayname, avatar_url, '@' || user_id || ':' || ? FROM profiles
"""
cur.execute(copy_sql, (f"{hostname}",))

drop_sql = """
DROP TABLE profiles
"""
cur.execute(drop_sql)

rename_sql = """
ALTER TABLE temp_profiles RENAME to profiles
"""
cur.execute(rename_sql)
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Copyright 2023 The Matrix.org Foundation C.I.C
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from synapse.config.homeserver import HomeServerConfig
from synapse.storage.database import LoggingTransaction
from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine


def run_upgrade(
cur: LoggingTransaction,
database_engine: BaseDatabaseEngine,
config: HomeServerConfig,
) -> None:
hostname = config.server.server_name

if isinstance(database_engine, PostgresEngine):
# check if the constraint can be validated
check_sql = """
SELECT user_id from user_filters WHERE full_user_id IS NULL
"""
cur.execute(check_sql)
res = cur.fetchall()

if res:
# there are rows the background job missed, finish them here before we validate constraint
process_rows_sql = """
UPDATE user_filters
SET full_user_id = '@' || user_id || ?
WHERE user_id IN (SELECT user_id FROM user_filters
WHERE full_user_id IS NULL)
"""
cur.execute(process_rows_sql, (f":{hostname}",))

# Now we can validate
validate_sql = """
ALTER TABLE user_filters VALIDATE CONSTRAINT full_user_id_not_null
"""
cur.execute(validate_sql)

else:
cur.execute("DROP TABLE IF EXISTS temp_user_filters")
create_sql = """
CREATE TABLE temp_user_filters (
full_user_id text NOT NULL,
user_id text NOT NULL,
filter_id bigint NOT NULL,
filter_json bytea NOT NULL,
UNIQUE (full_user_id),
UNIQUE (user_id)
)
"""
cur.execute(create_sql)

index_sql = """
CREATE UNIQUE INDEX IF NOT EXISTS user_filters_unique ON
temp_user_filters (user_id, filter_id)
"""
cur.execute(index_sql)

copy_sql = """
INSERT INTO temp_user_filters (
user_id,
filter_id,
filter_json,
full_user_id)
SELECT user_id, filter_id, filter_json, '@' || user_id || ':' || ? FROM user_filters
"""
cur.execute(copy_sql, (f"{hostname}",))

drop_sql = """
DROP TABLE user_filters
"""
cur.execute(drop_sql)

rename_sql = """
ALTER TABLE temp_user_filters RENAME to user_filters
"""
cur.execute(rename_sql)