from uuid import UUID import psycopg2 def create_ltree(cursor): # Activate the ltree extension cursor.execute("CREATE EXTENSION IF NOT EXISTS ltree;") # Create table with ltree field cursor.execute(""" CREATE TABLE IF NOT EXISTS ltree_corruption ( id SERIAL PRIMARY KEY, directory ltree ); """) # Create index on the ltree field cursor.execute( """CREATE INDEX IF NOT EXISTS idx_directory ON ltree_corruption USING GIST (directory);""" ) cursor.execute("TRUNCATE TABLE ltree_corruption;") def insert_directory(cursor, directory: list[UUID]): _directory = ".".join(str(d) for d in directory).replace("-", "_") cursor.execute( f""" INSERT INTO ltree_corruption (directory) VALUES ('{_directory}'); """ ) def insert_corrupt_ltree(cursor): # These UUID's are somewhat arbitrary. The error does not repro with all UUIDs but it does with these. dir1 = UUID(int=4000) dir2 = UUID(int=50001) dir3 = UUID(int=6000002) # # Insert an intial path. Without this the incomplete split does not happen. insert_directory(cursor, [dir1, dir2]) bad_directory = ( [dir1] + list(map(lambda x: UUID(int=3000000 + x), range(50))) + [dir3] ) try: # Insert until the index becomes corrupted. The exact number of insertions required doesn't seem to be # deterministic but is always less than 50. for i in range(0, 50): insert_directory(cursor, bad_directory + [UUID(int=10000000 + i)]) except psycopg2.Error: # traceback.print_exc() pass try: # Run again until corruption happens again. The logs will show: # `fixing incomplete split in index "idx_directory"` for i in range(0, 50): insert_directory(cursor, bad_directory + [UUID(int=10000000 + 100 + i)]) except psycopg2.Error: # traceback.print_exc() cursor.execute(""" SELECT line FROM regexp_split_to_table( pg_read_file( (SELECT setting FROM pg_settings WHERE name = 'data_directory') || '/' || (SELECT setting FROM pg_settings WHERE name = 'log_directory') || '/' || (SELECT name FROM pg_ls_logdir() ORDER BY modification DESC LIMIT 1), 0, 10000000 -- adjust bytes if needed ), E'\n' ) AS line WHERE line LIKE '%fixing incomplete split in index%'; """) print(cursor.fetchone()) def exec(): # Define connection parameters connection = psycopg2.connect( dbname="postgres", user="user", password="password", host="localhost", port="5432", ) connection.autocommit = True # Create a cursor object cursor = connection.cursor() cursor.execute("ALTER SYSTEM SET log_min_messages = 'INFO';") create_ltree(cursor) insert_corrupt_ltree(cursor) # Close the cursor and connection cursor.close() connection.close() if __name__ == "__main__": exec()