#!/usr/bin/perl -w
#----------------------------------------------------------------------
#
# remove_pg_type_oid_symbols.pl
#
# Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
# Portions Copyright (c) 1994, Regents of the University of California
#
# /src/include/catalog/rewrite_dat.pl
#
#----------------------------------------------------------------------

use Catalog;

use strict;
use warnings;

my @input_files;
my $output_path = '';
my $expand_tuples = 0;

# Process command line switches.
while (@ARGV)
{
	my $arg = shift @ARGV;
	if ($arg !~ /^-/)
	{
		push @input_files, $arg;
	}
	elsif ($arg =~ /^-o/)
	{
		$output_path = length($arg) > 2 ? substr($arg, 2) : shift @ARGV;
	}
	elsif ($arg eq '--expand')
	{
		$expand_tuples = 1;
	}
	else
	{
		usage();
	}
}

# Sanity check arguments.
die "No input files.\n"
  if !@input_files;

# Make sure output_path ends in a slash.
if ($output_path ne '' && substr($output_path, -1) ne '/')
{
	$output_path .= '/';
}

# Metadata of a catalog entry
my @METADATA = ('oid', 'oid_symbol', 'descr', 'shdescr');

# Read all the input files into internal data structures.
# We pass data file names as arguments and then look for matching
# headers to parse the schema from.
foreach my $datfile (@input_files)
{
	$datfile =~ /(.+)\.dat$/
	  or die "Input files need to be data (.dat) files.\n";

	my $header = "$1.h";
	die "There in no header file corresponding to $datfile"
	  if ! -e $header;

	my @attnames;
	my $catalog = Catalog::ParseHeader($header);
	my $catname = $catalog->{catname};
	my $schema  = $catalog->{columns};

	foreach my $column (@$schema)
	{
		my $attname = $column->{name};
		push @attnames, $attname;
	}

	my $catalog_data = Catalog::ParseData($datfile, $schema, 1);
	next if !defined $catalog_data;

	# Overwrite .dat files in place.
	my $datfile = "$output_path$catname.dat";
	open my $dat, '>', $datfile
	  or die "can't open $datfile: $!";

	# Write the data.
	foreach my $data (@$catalog_data)
	{
		# Either a newline, comment, or bracket - just write it out.
		if (! ref $data)
		{
			print $dat "$data\n";
		}
		# Hash ref representing a data entry.
		elsif (ref $data eq 'HASH')
		{
			my %values = %$data;
			print $dat "{ ";

			# Write out tuples in a compact representation.
			# Note: This is also a convenient place to do one-off
			# bulk-editing.
			if (!$expand_tuples)
			{
				strip_default_values(\%values, $schema, $catname);
			}

			# Remove pg_type OID symbols if they can match the rule
			# we use to generate them.
			if ($catname eq 'pg_type' and exists $values{oid_symbol})
			{
				my $symbol = form_pg_type_symbol($values{typname});
				delete $values{oid_symbol}
				  if defined $symbol
					and $values{oid_symbol} eq $symbol;
			}

			# Separate out metadata fields for readability.
			my $metadata_line = format_line(\%values, @METADATA);
			if ($metadata_line)
			{
				print $dat $metadata_line;
				print $dat ",\n";
			}
			my $data_line = format_line(\%values, @attnames);

			# Line up with metadata line, if there is one.
			if ($metadata_line)
			{
				print $dat '  ';
			}
			print $dat $data_line;
			print $dat " },\n";
		}
		else
		{
			die "Unexpected data type";
		}
	}
}

# Determine canonical pg_type OID #define symbol from the type name.
sub form_pg_type_symbol
{
	my $typename = shift;

	# Skip for rowtypes of bootstrap tables.
	return
	  if $typename eq 'pg_type'
	    or $typename eq 'pg_proc'
	    or $typename eq 'pg_attribute'
	    or $typename eq 'pg_class';

	$typename =~ /(_)?(.+)/;
	my $arraystr = $1 ? 'ARRAY' : '';
	my $name = uc $2;
	return $name . $arraystr . 'OID';
}

# Leave values out if there is a matching default.
sub strip_default_values
{
	my ($row, $schema, $catname) = @_;

	foreach my $column (@$schema)
	{
		my $attname = $column->{name};
		die "strip_default_values: $catname.$attname undefined\n"
		  if ! defined $row->{$attname};

		# Delete values that match defaults.
		if (defined $column->{default}
			and ($row->{$attname} eq $column->{default}))
		{
			delete $row->{$attname};
		}
	}
}

# Format the individual elements of a Perl hash into a valid string
# representation. We do this ourselves, rather than use native Perl
# facilities, so we can keep control over the exact formatting of the
# data files.
sub format_line
{
	my $data = shift;
	my @attnames = @_;

	my $first = 1;
	my $value;
	my $line = '';

	foreach my $attname (@attnames)
	{
		next if !defined $data->{$attname};
		$value = $data->{$attname};

		# Re-escape single quotes.
		$value =~ s/'/\\'/g;

		if (!$first)
		{
			$line .= ', ';
		}
		$first = 0;

		$line .= "$attname => '$value'";
	}
	return $line;
}

sub usage
{
	die <<EOM;
Usage: remove_pg_type_oid_symbols.pl [options] datafile...

Options:
    -o               output path
    --expand         write out full tuples

Expects a list of .dat files as arguments.

Make sure location of Catalog.pm is passed to the perl interpreter:
perl -I /path/to/Catalog.pm/ ...

EOM
}