2022-07-21 20:53:21 +00:00
#!/bin/sh
# Do various clean-up tasks in matrix postgresql database
# run as cron job all 6h or so
# URL to connect to matrix
MATRIXURL = "http://localhost:8008"
# matrix postgresql database credentials
SQLUSER = synapse
SQLPASSWD = password
SQLDB = synapse
2023-07-06 19:51:49 +00:00
# path to synapse_auto_compressor binary (you gotta download that)
2022-07-21 20:53:21 +00:00
# see https://github.com/matrix-org/rust-synapse-compress-state
SYNAPSE_COMPRESSOR_PATH = "/etc/matrix-synapse/synapse_auto_compressor"
# prefix for bridge users and rooms (hardcoded in bridge)
PREFIX = "pp"
2023-07-06 19:51:49 +00:00
# homeserver
HOMESERVER = "pixelplanet.fun"
2022-07-21 20:53:21 +00:00
# admin user of bridge channels
# (bridge creates him automatically, just make sure to run it at least once before running this script)
2023-07-06 19:51:49 +00:00
ADMINID = " @ ${ PREFIX } _admin: ${ HOMESERVER } "
# path to synapse-find-unreferenced-state-groups
# see https://github.com/erikjohnston/synapse-find-unreferenced-state-groups
# (not needed if you will never use the "clean_states" argument)
SYNAPSE_UNREFERENCED_STATES = "/etc/matrix-synapse/rust-synapse-find-unreferenced-state-groups"
# ANSI color codes
R = '\033[0;31m' #'0;31' is Red's ANSI color code
G = '\033[0;32m' #'0;32' is Green's ANSI color code
Y = '\033[1;32m' #'1;32' is Yellow's ANSI color code
B = '\033[0;34m' #'0;34' is Blue's ANSI color code
L = '\033[0;36m' #'0;34' is Blue's ANSI color code
NC = '\033[0m'
PIDFILE = "/var/run/matrixpurge.pid"
2022-07-21 20:53:21 +00:00
echo "----------CLEANING UP POSTGRESQL MATRIX DATABASE------------"
cd /var/lib/postgresql
2023-07-06 19:51:49 +00:00
# get admin token for matrix-synapse, creating a new one if needed
get_admin_token ( ) {
echo "--Get token for admin user"
TOKEN = ` psql -t postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c " select token from access_tokens where user_id = ' ${ ADMINID } ' and device_id = 'SQLCLEANER' limit 1; " | xargs`
if [ -z ${ TOKEN } ] ; then
echo "Non exists, generating new Token..."
TOKEN = ` cat /proc/sys/kernel/random/uuid`
TOKENID = ` psql -t postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c "select max(id) + 1 from access_tokens" `
psql -t postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c " insert into access_tokens(id, user_id, token, device_id, last_validated, used) values ( ${ TOKENID } , ' ${ ADMINID } ', ' ${ TOKEN } ', 'SQLCLEANER', 1656788062940, 'f') "
fi
}
# purge room
purge_room ( ) {
ROOM = " ${ 1 } "
D_IDRET = ` curl --silent --max-time 900 --insecure -XDELETE -H " Authorization: Bearer ${ TOKEN } " -H "Content-Type: application/json" -d "{\"block\": false, \"purge\": true}" " ${ MATRIXURL } /_synapse/admin/v2/rooms/ ${ ROOM } " `
D_ID = ` echo " ${ D_IDRET } " | jq -r '.delete_id' `
if [ " ${ D_ID } " = "null" ] ; then
echo " ${ D_IDRET } " | jq -r '.error'
return 1
fi
printf " Waiting for deletion ${ D_ID } for ${ ROOM } "
2024-01-13 23:30:48 +00:00
D_STATUS = "active"
2023-07-06 19:51:49 +00:00
D_STATUSRET = ""
sleep 4
2024-01-13 23:30:48 +00:00
while [ " ${ D_STATUS } " = "purging" ] || [ " ${ D_STATUS } " = "active" ] ; do
2023-07-06 19:51:49 +00:00
D_STATUSRET = ` curl --silent --max-time 900 --insecure -XGET -H " Authorization: Bearer ${ TOKEN } " " ${ MATRIXURL } /_synapse/admin/v2/rooms/delete_status/ ${ D_ID } " `
D_STATUS = ` echo ${ D_STATUSRET } | jq -r '.status' `
printf "."
sleep 5
done
printf " \nPurge finished ${ D_STATUSRET } \n "
2024-01-13 23:30:48 +00:00
echo "You might want to additionally run:"
echo " ./matrixpurge.sh clean_states ' ${ ROOM } ' "
2023-07-06 19:51:49 +00:00
}
# purge events in a room - we do that here instead of per auto_retention,
# because it allows us to time it
purge_room_history ( ) {
ROOM = " ${ 1 } "
# ms timestamp of 30 days ago
#TS=$((`date +%s%3N` - 2592000000))
# ms timestamp of 14 days ago
TS = $(( ` date +%s%3N` - 1209600000 ))
P_IDRET = ` curl --silent --max-time 900 --insecure -XPOST -H " Authorization: Bearer ${ TOKEN } " -H "Content-Type: application/json" -d " {\"delete_local_events\": true, \"purge_up_to_ts\": ${ TS } } " " ${ MATRIXURL } /_synapse/admin/v1/purge_history/ ${ ROOM } " `
P_ID = ` echo " ${ P_IDRET } " | jq -r '.purge_id' `
if [ " ${ P_ID } " = "null" ] ; then
echo " ${ P_IDRET } " | jq -r '.error'
return 1
fi
printf " Waiting for purge ${ P_ID } for ${ ROOM } "
P_STATUS = "active"
P_STATUSRET = ""
sleep 4
while [ " ${ P_STATUS } " = "active" ] ; do
P_STATUSRET = ` curl --max-time 900 --silent --insecure -XGET -H " Authorization: Bearer ${ TOKEN } " " ${ MATRIXURL } /_synapse/admin/v1/purge_history_status/ ${ P_ID } " `
P_STATUS = ` echo ${ P_STATUSRET } | jq -r '.status' `
printf "."
sleep 5
done
printf " \nPurge finished with ${ P_STATUSRET } \n "
}
# do stuff for each room on server
check_rooms ( ) {
echo "-- Checking individual rooms"
NEXT_BATCH = 0
while [ " ${ NEXT_BATCH } " != "null" ] ; do
RET = ` curl --max-time 900 --silent --insecure -XGET -H " Authorization: Bearer ${ TOKEN } " " ${ MATRIXURL } /_synapse/admin/v1/rooms?order_by=joined_members&from= ${ NEXT_BATCH } " `
echo " $RET " | jq -c '.rooms[]' | while read room
do
amount = $( echo " $room " | jq -r .joined_members)
amount_local = $( echo " $room " | jq -r .joined_local_members)
id = $( echo " $room " | jq -r .room_id)
server = $( echo " $id " | sed -e 's/[^:]*://' )
2023-07-07 08:58:56 +00:00
#printf "=> ${L}${id} = ${G}${amount},${amount_local} ${R}${server}${NC}\n"
printf " => ${ id } = ${ amount } , ${ amount_local } ${ server } \n "
2023-07-06 19:51:49 +00:00
if [ " ${ server } " != " ${ HOMESERVER } " ] && [ " ${ amount_local } " = "0" ] ; then
echo " Room ${ id } is federated, but has no local users in it, purging... "
purge_room ${ id }
elif [ " ${ amount } " = "0" ] ; then
echo " Room ${ id } has no members, purging... "
purge_room ${ id }
elif [ ${ amount } > 10 ] ; then
#randomly decide if purge should happen or not
[ ` date +%1N` -lt 4 ] && {
echo " Room ${ id } with >10 users, magic decided to delete history of >14d ago... "
purge_room_history ${ id }
}
fi
done
NEXT_BATCH = ` echo " $RET " | jq -r '.next_batch' `
done
}
2024-01-13 23:30:48 +00:00
# clear unreferenced state groups of given room
clear_state_groups_of_room ( ) {
ROOM = " ${ 1 } "
[ -f "/tmp/sgstmp.txt" ] && rm "/tmp/sgstmp.txt"
printf " => ${ L } ${ ROOM } ${ NC } \n "
nice -n 10 ${ SYNAPSE_UNREFERENCED_STATES } -p postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -r " ${ ROOM } " -o "/tmp/sgstmp.txt"
[ -f "/tmp/sgstmp.txt" ] && {
# can'tdo that while synapse is running
systemctl stop matrix-synapse
psql -t postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c "CREATE TEMPORARY TABLE unreffed(id BIGINT PRIMARY KEY); COPY unreffed FROM stdin WITH (FORMAT 'csv'); DELETE FROM state_groups_state WHERE state_group IN (SELECT id FROM unreffed); DELETE FROM state_group_edges WHERE state_group IN (SELECT id FROM unreffed); DELETE FROM state_groups WHERE id IN (SELECT id FROM unreffed);" < /tmp/sgstmp.txt
systemctl start matrix-synapse
rm "/tmp/sgstmp.txt"
}
}
2023-07-06 19:51:49 +00:00
# check unreferenced state groups for 20 largest rooms
check_state_groups ( ) {
files = " ${ 1 } "
echo "--- Checking largest room state groups"
[ -f " ${ files } " ] && rm " ${ files } "
[ -f "/tmp/sgstmp.txt" ] && rm "/tmp/sgstmp.txt"
RET = ` curl --max-time 900 --silent --insecure -XGET -H " Authorization: Bearer ${ TOKEN } " " ${ MATRIXURL } /_synapse/admin/v1/rooms?order_by=joined_members&limit=20 " `
echo " $RET " | jq -c '.rooms[]' | while read room
do
id = $( echo " $room " | jq -r .room_id)
printf " => ${ L } ${ id } ${ NC } \n "
nice -n 10 ${ SYNAPSE_UNREFERENCED_STATES } -p postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -r " ${ id } " -o "/tmp/sgstmp.txt"
[ -f "/tmp/sgstmp.txt" ] && {
cat "/tmp/sgstmp.txt" >> " ${ files } "
rm "/tmp/sgstmp.txt"
}
done
}
# clear unreferenced state groups
clear_state_groups ( ) {
echo "--Clearing unreferenced state groups"
check_state_groups "/tmp/sgs.txt"
sleep 10
check_state_groups "/tmp/sgs2.txt"
diff "/tmp/sgs.txt" "/tmp/sgs2.txt" > /dev/null || {
echo "State groups changed while checking, exiting..."
exit 1
}
rm "/tmp/sgs2.txt"
# can't run this while synapse is running
systemctl stop matrix-synapse
psql -t postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c "CREATE TEMPORARY TABLE unreffed(id BIGINT PRIMARY KEY); COPY unreffed FROM stdin WITH (FORMAT 'csv'); DELETE FROM state_groups_state WHERE state_group IN (SELECT id FROM unreffed); DELETE FROM state_group_edges WHERE state_group IN (SELECT id FROM unreffed); DELETE FROM state_groups WHERE id IN (SELECT id FROM unreffed);" < /tmp/sgs.txt
rm "/tmp/sgs.txt"
psql -t postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c " REINDEX (VERBOSE) DATABASE ${ SQLDB } "
systemctl start matrix-synapse
}
2022-07-21 20:53:21 +00:00
2023-07-06 19:51:49 +00:00
# get all appsocket users from a room that did not send any message in the past 30 days,
# and print a curl config to kick them
get_curl_config ( ) {
room = ${ 1 }
ASUSERS = ` psql -t postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c " select user_id from users_in_public_rooms u where room_id = ' ${ room } ' and user_id like '@ ${ PREFIX } _%:pixelplanet.fun' and user_id != ' ${ ADMINID } ' and not exists ( select from events where room_id = ' ${ room } ' and sender = u.user_id and to_timestamp(received_ts/1000) > now() - interval '30 DAYS' ) and not exists ( select from user_ips where user_id = u.user_id ) " `
KICKURL = " ${ MATRIXURL } /_matrix/client/v3/rooms/ ${ room } /kick "
CNT = 1
for user in ${ ASUSERS } ; do
if [ ${ CNT } -eq 1 ] ; then
CNT = 0
else
echo "next"
fi
echo " header=\"Authorization: Bearer ${ TOKEN } \" "
echo "header=\"Content-Type: application/json\""
echo " data=\"{\\\"user_id\\\": \\\" ${ user } \\\"}\" "
echo " url= ${ KICKURL } "
done
return ${ CNT }
}
2022-07-21 20:53:21 +00:00
2023-07-06 19:51:49 +00:00
# Do stuff for appsocket rooms
check_api_rooms ( ) {
ROOMS = ` psql -t postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c " select room_id from room_aliases where room_alias like '# ${ PREFIX } _%' " `
for room in ${ ROOMS }
do
2023-07-07 08:58:56 +00:00
echo " => Delete push_actions of logged-out users from ${ room } and kick out inactive users... "
2023-07-06 19:51:49 +00:00
# Clean event_push_actions of not-logged-in application service users
# see https://github.com/matrix-org/synapse/issues/5569
# This command can be really slow, if it takes too long, remove the "not exists..." part and it won't care about if logged in or not
psql -t postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c " delete from event_push_actions u where room_id = ' ${ room } ' and user_id like '@ ${ PREFIX } _%:pixelplanet.fun' and not exists ( select from user_ips where user_id = u.user_id and to_timestamp(last_seen/1000) > now() - interval '2 days' ) "
get_curl_config " ${ room } " > /tmp/curlkick.tmp && curl --silent --parallel --parallel-immediate --parallel-max 10 --config /tmp/curlkick.tmp && echo ""
rm /tmp/curlkick.tmp
done
}
# disable ratelimit on admin API
disable_ratelimit ( ) {
echo " --Disabling ratelimit for admin user ${ ADMINID } ... "
RATEURL = " ${ MATRIXURL } /_synapse/admin/v1/users/ ${ ADMINID } /override_ratelimit "
curl --silent --insecure -XPOST -H " Authorization: Bearer ${ TOKEN } " -H "Content-Type: application/json" -d "{\"messages_per_second\": 0}" ${ RATEURL }
}
# reset state compressor if requested
reset_state_compressor ( ) {
echo "--Reset rust-synapse-compress-state"
2022-08-27 09:36:50 +00:00
psql -t postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c "drop table state_compressor_state; drop table state_compressor_progress; drop table state_compressor_total_progress;"
}
2023-07-06 19:51:49 +00:00
compress_state ( ) {
echo "--Compress states..."
# https://github.com/matrix-org/rust-synapse-compress-state
nice -n 10 ${ SYNAPSE_COMPRESSOR_PATH } -p postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c 500 -n 100
}
2022-08-27 09:36:50 +00:00
2023-07-06 19:51:49 +00:00
clean_db ( ) {
echo "--Clean up cache_invalidation_stream_by_instance"
# see https://github.com/matrix-org/synapse/issues/8269
2023-07-07 08:58:56 +00:00
psql -t postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c "delete from cache_invalidation_stream_by_instance where to_timestamp(invalidation_ts/1000) > now() - interval '1 months';"
2023-07-06 19:51:49 +00:00
echo "--Vaccum..."
time psql -t postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c "VACUUM FULL"
}
2022-07-21 20:53:21 +00:00
2023-07-06 19:51:49 +00:00
print_stats ( ) {
echo "--DONE. Current database size is..."
2023-07-07 08:58:56 +00:00
psql -t postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c "SELECT pg_size_pretty( pg_database_size( 'synapse' ) )"
2023-07-06 19:51:49 +00:00
[ -n " ${ BASH_VERSION } " ] && set +H
psql postgresql://${ SQLUSER } :${ SQLPASSWD } @localhost/${ SQLDB } -c "SELECT nspname || '.' || relname AS \"relation\", pg_size_pretty(pg_total_relation_size(c.oid)) AS \"total_size\" FROM pg_class c LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace) WHERE nspname NOT IN ('pg_catalog', 'information_schema') AND c.relkind <> 'i' AND nspname !~ '^pg_toast' ORDER BY pg_total_relation_size(c.oid) DESC LIMIT 20;"
}
2022-07-21 20:53:21 +00:00
2023-07-06 19:51:49 +00:00
[ -f " ${ PIDFILE } " ] && ps -p ` cat " ${ PIDFILE } " ` > /dev/null && {
echo "matrixpurge.sh already running, exiting."
2024-01-13 23:30:48 +00:00
echo " If you think this is a mistake, delete this file: ${ PIDFILE } "
2023-07-06 19:51:49 +00:00
exit 1
2022-07-21 20:53:21 +00:00
}
2023-07-06 19:51:49 +00:00
echo $$ > " ${ PIDFILE } "
2022-07-21 20:53:21 +00:00
2023-07-06 19:51:49 +00:00
get_admin_token
2022-07-21 20:53:21 +00:00
2023-07-06 19:51:49 +00:00
[ " ${ 1 } " = "clean_states" ] && {
2024-01-13 23:30:48 +00:00
if [ " ${ 2 } " ] ; then
clear_state_groups_of_room " ${ 2 } "
else
clear_state_groups
fi
2023-07-06 19:51:49 +00:00
exit 0
}
2022-08-27 09:36:50 +00:00
2023-07-08 09:17:58 +00:00
[ " ${ 1 } " = "delete" ] && [ " ${ 2 } " != "" ] && {
purge_room " ${ 2 } "
exit 0
}
2024-01-13 23:30:48 +00:00
[ " ${ 1 } " ] && [ " ${ 1 } " != "reset" ] && {
echo " Unrecognized argument: ${ 1 } " 1>& 2
exit 1
}
2023-07-06 19:51:49 +00:00
check_rooms
disable_ratelimit
check_api_rooms
[ " ${ 1 } " = "reset" ] && reset_state_compressor
compress_state
clean_db
print_stats
2022-08-27 09:36:50 +00:00
2023-07-06 19:51:49 +00:00
rm " ${ PIDFILE } "