matrixbridge/purge/matrixpurge.sh

296 lines
13 KiB
Bash
Executable File

#!/bin/sh
# Do various clean-up tasks in matrix postgresql database
# run as cron job all 6h or so
# URL to connect to matrix
MATRIXURL="http://localhost:8008"
# matrix postgresql database credentials
SQLUSER=synapse
SQLPASSWD=password
SQLDB=synapse
# path to synapse_auto_compressor binary (you gotta download that)
# see https://github.com/matrix-org/rust-synapse-compress-state
SYNAPSE_COMPRESSOR_PATH="/etc/matrix-synapse/synapse_auto_compressor"
# prefix for bridge users and rooms (hardcoded in bridge)
PREFIX="pp"
# homeserver
HOMESERVER="pixelplanet.fun"
# admin user of bridge channels
# (bridge creates him automatically, just make sure to run it at least once before running this script)
ADMINID="@${PREFIX}_admin:${HOMESERVER}"
# path to synapse-find-unreferenced-state-groups
# see https://github.com/erikjohnston/synapse-find-unreferenced-state-groups
# (not needed if you will never use the "clean_states" argument)
SYNAPSE_UNREFERENCED_STATES="/etc/matrix-synapse/rust-synapse-find-unreferenced-state-groups"
# ANSI color codes
R='\033[0;31m' #'0;31' is Red's ANSI color code
G='\033[0;32m' #'0;32' is Green's ANSI color code
Y='\033[1;32m' #'1;32' is Yellow's ANSI color code
B='\033[0;34m' #'0;34' is Blue's ANSI color code
L='\033[0;36m' #'0;34' is Blue's ANSI color code
NC='\033[0m'
PIDFILE="/var/run/matrixpurge.pid"
echo "----------CLEANING UP POSTGRESQL MATRIX DATABASE------------"
cd /var/lib/postgresql
# get admin token for matrix-synapse, creating a new one if needed
get_admin_token () {
echo "--Get token for admin user"
TOKEN=`psql -t postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c "select token from access_tokens where user_id = '${ADMINID}' and device_id = 'SQLCLEANER' limit 1;" | xargs`
if [ -z ${TOKEN} ]; then
echo "Non exists, generating new Token..."
TOKEN=`cat /proc/sys/kernel/random/uuid`
TOKENID=`psql -t postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c "select max(id) + 1 from access_tokens"`
psql -t postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c "insert into access_tokens(id, user_id, token, device_id, last_validated, used) values (${TOKENID}, '${ADMINID}', '${TOKEN}', 'SQLCLEANER', 1656788062940, 'f')"
fi
}
# purge room
purge_room () {
ROOM="${1}"
D_IDRET=`curl --silent --max-time 900 --insecure -XDELETE -H "Authorization: Bearer ${TOKEN}" -H "Content-Type: application/json" -d "{\"block\": false, \"purge\": true}" "${MATRIXURL}/_synapse/admin/v2/rooms/${ROOM}"`
D_ID=`echo "${D_IDRET}" | jq -r '.delete_id'`
if [ "${D_ID}" = "null" ]; then
echo "${D_IDRET}" | jq -r '.error'
return 1
fi
printf "Waiting for deletion ${D_ID} for ${ROOM}"
D_STATUS="active"
D_STATUSRET=""
sleep 4
while [ "${D_STATUS}" = "purging" ] || [ "${D_STATUS}" = "active" ]; do
D_STATUSRET=`curl --silent --max-time 900 --insecure -XGET -H "Authorization: Bearer ${TOKEN}" "${MATRIXURL}/_synapse/admin/v2/rooms/delete_status/${D_ID}"`
D_STATUS=`echo ${D_STATUSRET} | jq -r '.status'`
printf "."
sleep 5
done
printf "\nPurge finished ${D_STATUSRET}\n"
echo "You might want to additionally run:"
echo " ./matrixpurge.sh clean_states '${ROOM}'"
}
# purge events in a room - we do that here instead of per auto_retention,
# because it allows us to time it
purge_room_history () {
ROOM="${1}"
# ms timestamp of 30 days ago
#TS=$((`date +%s%3N` - 2592000000))
# ms timestamp of 14 days ago
TS=$((`date +%s%3N` - 1209600000))
P_IDRET=`curl --silent --max-time 900 --insecure -XPOST -H "Authorization: Bearer ${TOKEN}" -H "Content-Type: application/json" -d "{\"delete_local_events\": true, \"purge_up_to_ts\": ${TS}}" "${MATRIXURL}/_synapse/admin/v1/purge_history/${ROOM}"`
P_ID=`echo "${P_IDRET}" | jq -r '.purge_id'`
if [ "${P_ID}" = "null" ]; then
echo "${P_IDRET}" | jq -r '.error'
return 1
fi
printf "Waiting for purge ${P_ID} for ${ROOM}"
P_STATUS="active"
P_STATUSRET=""
sleep 4
while [ "${P_STATUS}" = "active" ]; do
P_STATUSRET=`curl --max-time 900 --silent --insecure -XGET -H "Authorization: Bearer ${TOKEN}" "${MATRIXURL}/_synapse/admin/v1/purge_history_status/${P_ID}"`
P_STATUS=`echo ${P_STATUSRET} | jq -r '.status'`
printf "."
sleep 5
done
printf "\nPurge finished with ${P_STATUSRET}\n"
}
# do stuff for each room on server
check_rooms () {
echo "-- Checking individual rooms"
NEXT_BATCH=0
while [ "${NEXT_BATCH}" != "null" ]; do
RET=`curl --max-time 900 --silent --insecure -XGET -H "Authorization: Bearer ${TOKEN}" "${MATRIXURL}/_synapse/admin/v1/rooms?order_by=joined_members&from=${NEXT_BATCH}"`
echo "$RET" | jq -c '.rooms[]' | while read room
do
amount=$(echo "$room" | jq -r .joined_members)
amount_local=$(echo "$room" | jq -r .joined_local_members)
id=$(echo "$room" | jq -r .room_id)
server=$(echo "$id" | sed -e 's/[^:]*://')
#printf "=> ${L}${id} = ${G}${amount},${amount_local} ${R}${server}${NC}\n"
printf "=> ${id} = ${amount},${amount_local} ${server}\n"
if [ "${server}" != "${HOMESERVER}" ] && [ "${amount_local}" = "0" ]; then
echo "Room ${id} is federated, but has no local users in it, purging..."
purge_room ${id}
elif [ "${amount}" = "0" ]; then
echo "Room ${id} has no members, purging..."
purge_room ${id}
elif [ ${amount} > 10 ]; then
#randomly decide if purge should happen or not
[ `date +%1N` -lt 4 ] && {
echo "Room ${id} with >10 users, magic decided to delete history of >14d ago..."
purge_room_history ${id}
}
fi
done
NEXT_BATCH=`echo "$RET" | jq -r '.next_batch'`
done
}
# clear unreferenced state groups of given room
clear_state_groups_of_room () {
ROOM="${1}"
[ -f "/tmp/sgstmp.txt" ] && rm "/tmp/sgstmp.txt"
printf "=> ${L}${ROOM}${NC}\n"
nice -n 10 ${SYNAPSE_UNREFERENCED_STATES} -p postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -r "${ROOM}" -o "/tmp/sgstmp.txt"
[ -f "/tmp/sgstmp.txt" ] && {
# can'tdo that while synapse is running
systemctl stop matrix-synapse
psql -t postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c "CREATE TEMPORARY TABLE unreffed(id BIGINT PRIMARY KEY); COPY unreffed FROM stdin WITH (FORMAT 'csv'); DELETE FROM state_groups_state WHERE state_group IN (SELECT id FROM unreffed); DELETE FROM state_group_edges WHERE state_group IN (SELECT id FROM unreffed); DELETE FROM state_groups WHERE id IN (SELECT id FROM unreffed);" < /tmp/sgstmp.txt
systemctl start matrix-synapse
rm "/tmp/sgstmp.txt"
}
}
# check unreferenced state groups for 20 largest rooms
check_state_groups () {
files="${1}"
echo "--- Checking largest room state groups"
[ -f "${files}" ] && rm "${files}"
[ -f "/tmp/sgstmp.txt" ] && rm "/tmp/sgstmp.txt"
RET=`curl --max-time 900 --silent --insecure -XGET -H "Authorization: Bearer ${TOKEN}" "${MATRIXURL}/_synapse/admin/v1/rooms?order_by=joined_members&limit=20"`
echo "$RET" | jq -c '.rooms[]' | while read room
do
id=$(echo "$room" | jq -r .room_id)
printf "=> ${L}${id}${NC}\n"
nice -n 10 ${SYNAPSE_UNREFERENCED_STATES} -p postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -r "${id}" -o "/tmp/sgstmp.txt"
[ -f "/tmp/sgstmp.txt" ] && {
cat "/tmp/sgstmp.txt" >> "${files}"
rm "/tmp/sgstmp.txt"
}
done
}
# clear unreferenced state groups
clear_state_groups () {
echo "--Clearing unreferenced state groups"
check_state_groups "/tmp/sgs.txt"
sleep 10
check_state_groups "/tmp/sgs2.txt"
diff "/tmp/sgs.txt" "/tmp/sgs2.txt" > /dev/null || {
echo "State groups changed while checking, exiting..."
exit 1
}
rm "/tmp/sgs2.txt"
# can't run this while synapse is running
systemctl stop matrix-synapse
psql -t postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c "CREATE TEMPORARY TABLE unreffed(id BIGINT PRIMARY KEY); COPY unreffed FROM stdin WITH (FORMAT 'csv'); DELETE FROM state_groups_state WHERE state_group IN (SELECT id FROM unreffed); DELETE FROM state_group_edges WHERE state_group IN (SELECT id FROM unreffed); DELETE FROM state_groups WHERE id IN (SELECT id FROM unreffed);" < /tmp/sgs.txt
rm "/tmp/sgs.txt"
psql -t postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c "REINDEX (VERBOSE) DATABASE ${SQLDB}"
systemctl start matrix-synapse
}
# get all appsocket users from a room that did not send any message in the past 30 days,
# and print a curl config to kick them
get_curl_config () {
room=${1}
ASUSERS=`psql -t postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c "select user_id from users_in_public_rooms u where room_id = '${room}' and user_id like '@${PREFIX}_%:pixelplanet.fun' and user_id != '${ADMINID}' and not exists ( select from events where room_id = '${room}' and sender = u.user_id and to_timestamp(received_ts/1000) > now() - interval '30 DAYS' ) and not exists ( select from user_ips where user_id = u.user_id )"`
KICKURL="${MATRIXURL}/_matrix/client/v3/rooms/${room}/kick"
CNT=1
for user in ${ASUSERS}; do
if [ ${CNT} -eq 1 ]; then
CNT=0
else
echo "next"
fi
echo "header=\"Authorization: Bearer ${TOKEN}\""
echo "header=\"Content-Type: application/json\""
echo "data=\"{\\\"user_id\\\": \\\"${user}\\\"}\""
echo "url=${KICKURL}"
done
return ${CNT}
}
# Do stuff for appsocket rooms
check_api_rooms () {
ROOMS=`psql -t postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c "select room_id from room_aliases where room_alias like '#${PREFIX}_%'"`
for room in ${ROOMS}
do
echo "=> Delete push_actions of logged-out users from ${room} and kick out inactive users..."
# Clean event_push_actions of not-logged-in application service users
# see https://github.com/matrix-org/synapse/issues/5569
# This command can be really slow, if it takes too long, remove the "not exists..." part and it won't care about if logged in or not
psql -t postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c "delete from event_push_actions u where room_id = '${room}' and user_id like '@${PREFIX}_%:pixelplanet.fun' and not exists ( select from user_ips where user_id = u.user_id and to_timestamp(last_seen/1000) > now() - interval '2 days' )"
get_curl_config "${room}" > /tmp/curlkick.tmp && curl --silent --parallel --parallel-immediate --parallel-max 10 --config /tmp/curlkick.tmp && echo ""
rm /tmp/curlkick.tmp
done
}
# disable ratelimit on admin API
disable_ratelimit () {
echo "--Disabling ratelimit for admin user ${ADMINID}..."
RATEURL="${MATRIXURL}/_synapse/admin/v1/users/${ADMINID}/override_ratelimit"
curl --silent --insecure -XPOST -H "Authorization: Bearer ${TOKEN}" -H "Content-Type: application/json" -d "{\"messages_per_second\": 0}" ${RATEURL}
}
# reset state compressor if requested
reset_state_compressor () {
echo "--Reset rust-synapse-compress-state"
psql -t postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c "drop table state_compressor_state; drop table state_compressor_progress; drop table state_compressor_total_progress;"
}
compress_state () {
echo "--Compress states..."
# https://github.com/matrix-org/rust-synapse-compress-state
nice -n 10 ${SYNAPSE_COMPRESSOR_PATH} -p postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c 500 -n 100
}
clean_db () {
echo "--Clean up cache_invalidation_stream_by_instance"
# see https://github.com/matrix-org/synapse/issues/8269
psql -t postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c "delete from cache_invalidation_stream_by_instance where to_timestamp(invalidation_ts/1000) > now() - interval '1 months';"
echo "--Vaccum..."
time psql -t postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c "VACUUM FULL"
}
print_stats () {
echo "--DONE. Current database size is..."
psql -t postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c "SELECT pg_size_pretty( pg_database_size( 'synapse' ) )"
[ -n "${BASH_VERSION}" ] && set +H
psql postgresql://${SQLUSER}:${SQLPASSWD}@localhost/${SQLDB} -c "SELECT nspname || '.' || relname AS \"relation\", pg_size_pretty(pg_total_relation_size(c.oid)) AS \"total_size\" FROM pg_class c LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace) WHERE nspname NOT IN ('pg_catalog', 'information_schema') AND c.relkind <> 'i' AND nspname !~ '^pg_toast' ORDER BY pg_total_relation_size(c.oid) DESC LIMIT 20;"
}
[ -f "${PIDFILE}" ] && ps -p `cat "${PIDFILE}"` > /dev/null && {
echo "matrixpurge.sh already running, exiting."
echo "If you think this is a mistake, delete this file: ${PIDFILE}"
exit 1
}
echo $$ > "${PIDFILE}"
get_admin_token
[ "${1}" = "clean_states" ] && {
if [ "${2}" ]; then
clear_state_groups_of_room "${2}"
else
clear_state_groups
fi
exit 0
}
[ "${1}" = "delete" ] && [ "${2}" != "" ] && {
purge_room "${2}"
exit 0
}
[ "${1}" ] && [ "${1}" != "reset" ] && {
echo "Unrecognized argument: ${1}" 1>&2
exit 1
}
check_rooms
disable_ratelimit
check_api_rooms
[ "${1}" = "reset" ] && reset_state_compressor
compress_state
clean_db
print_stats
rm "${PIDFILE}"