diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index fdd3331..a0fe766 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -1141,6 +1141,9 @@ number of transactions actually processed: 10000/10000
 tps = 618.764555 (including connections establishing)
 tps = 622.977698 (excluding connections establishing)
 SQL script 1: &lt;builtin: TPC-B (sort of)&gt;
+ - 10000 transactions (100.0% of total, tps = 618.764555)
+ - latency average = 15.844 ms
+ - latency stddev = 2.715 ms
  - per command latencies in ms:
         0.004386        \set nbranches 1 * :scale
         0.001343        \set ntellers 10 * :scale
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index f7b84da..b20d00c 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -164,6 +164,7 @@ bool		use_log;			/* log transaction latencies to a file */
 bool		use_quiet;			/* quiet logging onto stderr */
 int			agg_interval;		/* log aggregates instead of individual
 								 * transactions */
+bool        per_script_stats = false; /* whether to collect stats per script */
 int			progress = 0;		/* thread progress report every this seconds */
 bool		progress_timestamp = false; /* progress report with Unix time */
 int			nclients = 1;		/* number of clients */
@@ -301,6 +302,7 @@ typedef struct
 {
 	const char *name;
 	Command **commands;
+	StatsData stats;
 } SQLScript;
 
 static SQLScript sql_script[MAX_SCRIPTS];
@@ -1307,7 +1309,7 @@ top:
 		/* transaction finished: calculate latency and log the transaction */
 		if (commands[st->state + 1] == NULL)
 		{
-			if (progress || throttle_delay || latency_limit || logfile)
+			if (progress || throttle_delay || latency_limit || per_script_stats || logfile)
 				doTxStats(thread, st, &now, false, logfile, agg);
 			else
 				thread->stats.cnt ++;
@@ -1398,7 +1400,7 @@ top:
 	}
 
 	/* Record transaction start time under logging, progress or throttling */
-	if ((logfile || progress || throttle_delay || latency_limit) && st->state == 0)
+	if ((logfile || progress || throttle_delay || latency_limit || per_script_stats) && st->state == 0)
 	{
 		INSTR_TIME_SET_CURRENT(st->txn_begin);
 
@@ -1883,6 +1885,9 @@ doTxStats(TState *thread, CState *st, instr_time *now,
 
 	if (use_log)
 		doLog(thread, st, logfile, now, agg, skipped, latency, lag);
+
+	if (per_script_stats) /* mutex? hmmm... these are only statistics */
+		doStats(& sql_script[st->use_file].stats, skipped, latency, lag);
 }
 
 
@@ -2637,6 +2642,7 @@ addScript(const char *name, Command ** commands)
 
 	sql_script[num_scripts].name = name;
 	sql_script[num_scripts].commands = commands;
+	initStats(& sql_script[num_scripts].stats, 0.0);
 	num_scripts++;
 }
 
@@ -2717,21 +2723,40 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
 	printf("tps = %f (including connections establishing)\n", tps_include);
 	printf("tps = %f (excluding connections establishing)\n", tps_exclude);
 
-	/* Report per-command latencies */
-	if (is_latencies)
+	/* Report per-script stats */
+	if (per_script_stats)
 	{
 		int			i;
 
 		for (i = 0; i < num_scripts; i++)
 		{
-			Command ** com;
-
-			printf("SQL script %d: %s\n", i+1, sql_script[i].name);
-			printf(" - per command latencies in ms:\n");
-			for (com = sql_script[i].commands; *com != NULL; com++)
-				printf("   %11.3f  %s\n",
-					   1000.0 * (*com)->stats.sum / (*com)->stats.count,
-					   (*com)->line);
+			printf("SQL script %d: %s\n"
+				   " - "INT64_FORMAT" transactions (%.1f%% of total, tps = %f)\n",
+				   i+1, sql_script[i].name,
+				   sql_script[i].stats.cnt,
+				   100.0 * sql_script[i].stats.cnt / total->cnt,
+				   sql_script[i].stats.cnt / time_include);
+
+			if (latency_limit)
+				printf(" - number of transactions skipped: "INT64_FORMAT" (%.3f%%)\n",
+					   sql_script[i].stats.skipped,
+					   100.0 * sql_script[i].stats.skipped /
+					   (sql_script[i].stats.skipped + sql_script[i].stats.cnt));
+
+			printSimpleStats(" - latency", & sql_script[i].stats.latency);
+
+			/* Report per-command latencies */
+			if (is_latencies)
+			{
+				Command ** com;
+
+				printf(" - per command latencies in ms:\n");
+
+				for (com = sql_script[i].commands; *com != NULL; com++)
+					printf("   %11.3f  %s\n",
+						   1000.0 * (*com)->stats.sum / (*com)->stats.count,
+						   (*com)->line);
+			}
 		}
 	}
 }
@@ -2917,6 +2942,7 @@ main(int argc, char **argv)
 				break;
 			case 'r':
 				benchmarking_option_set = true;
+				per_script_stats = true;
 				is_latencies = true;
 				break;
 			case 's':
@@ -3143,6 +3169,10 @@ main(int argc, char **argv)
 		internal_script_used = true;
 	}
 
+	/* show per script stats if several scripts are used */
+	if (num_scripts > 1)
+		per_script_stats = true;
+
 	/*
 	 * Don't need more threads than there are clients.  (This is not merely an
 	 * optimization; throttle_delay is calculated incorrectly below if some
