-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpg_bloat_check.py
executable file
·699 lines (610 loc) · 36.1 KB
/
pg_bloat_check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
#!/usr/bin/env python
# Script is maintained at https://github.com/keithf4/pg_bloat_check
import argparse, csv, json, psycopg2, re, sys
from psycopg2 import extras
from random import randint
version = "2.6.0"
parser = argparse.ArgumentParser(description="Provide a bloat report for PostgreSQL tables and/or indexes. This script uses the pgstattuple contrib module which must be installed first. Note that the query to check for bloat can be extremely expensive on very large databases or those with many tables. The script stores the bloat stats in a table so they can be queried again as needed without having to re-run the entire scan. The table contains a timestamp columns to show when it was obtained.")
args_general = parser.add_argument_group(title="General options")
args_general.add_argument('-c','--connection', default="host=", help="""Connection string for use by psycopg. Defaults to "host=" (local socket).""")
args_general.add_argument('-e', '--exclude_object_file', help="""Full path to file containing a list of objects to exclude from the report (tables and/or indexes). Each line is a CSV entry in the format: objectname,bytes_wasted,percent_wasted. All objects must be schema qualified. bytes_wasted & percent_wasted are additional filter values on top of -s, -p, and -z to exclude the given object unless these values are also exceeded. Set either of these values to zero (or leave them off entirely) to exclude the object no matter what its bloat level. Comments are allowed if the line is prepended with "#". See the README.md for clearer examples of how to use this for more fine grained filtering.""")
args_general.add_argument('-f', '--format', default="simple", choices=["simple", "json", "jsonpretty", "dict"], help="Output formats. Simple is a plaintext version suitable for any output (ex: console, pipe to email). Object type is in parentheses (t=table, i=index, p=primary key). Json provides standardized json output which may be useful if taking input into something that needs a more structured format. Json also provides more details about dead tuples, empty space & free space. jsonpretty outputs in a more human readable format. Dict is the same as json but in the form of a python dictionary. Default is simple.")
args_general.add_argument('-m', '--mode', choices=["tables", "indexes", "both"], default="both", help="""Provide bloat reports for tables, indexes or both. Index bloat is always distinct from table bloat and reported as separate entries in the report. Default is "both". NOTE: GIN indexes are not supported at this time and will be skipped.""")
args_general.add_argument('-n', '--schema', help="Comma separated list of schema to include in report. All other schemas will be ignored.")
args_general.add_argument('-N', '--exclude_schema', help="Comma separated list of schemas to exclude.")
args_general.add_argument('--noanalyze', action="store_true", help="To ensure accurate fillfactor statistics, an analyze if each object being scanned is done before the check for bloat. Set this to skip the analyze step and reduce overall runtime, however your bloat statistics may not be as accurate.")
args_general.add_argument('--noscan', action="store_true", help="Set this option to have the script just read from the bloat statistics table without doing a scan of any tables again.")
args_general.add_argument('-p', '--min_wasted_percentage', type=float, default=0.1, help="Minimum percentage of wasted space an object must have to be included in the report. Default and minimum value is 0.1 (DO NOT include percent sign in given value).")
args_general.add_argument('-q', '--quick', action="store_true", help="Use the pgstattuple_approx() function instead of pgstattuple() for a quicker, but possibly less accurate bloat report. Only works for tables. Sets the 'approximate' column in the bloat statistics table to True. Note this only works in PostgreSQL 9.5+.")
args_general.add_argument('-u', '--quiet', action="count", help="Suppress console output but still insert data into the bloat stastics table. This option can be set several times. Setting once will suppress all non-error console output if no bloat is found, but still output when it is found for given parameter settings. Setting it twice will suppress all console output, even if bloat is found.")
args_general.add_argument('-r', '--commit_rate', type=int, default=5, help="Sets how many tables are scanned before commiting inserts into the bloat statistics table. Helps avoid long running transactions when scanning large tables. Default is 5. Set to 0 to avoid committing until all tables are scanned. NOTE: The bloat table is truncated on every run unless --noscan is set.")
args_general.add_argument('--rebuild_index', action="store_true", help="Output a series of SQL commands for each index that will rebuild it with minimal impact on database locks. This does NOT run the given sql, it only provides the commands to do so manually. This does not run a new scan and will use the indexes contained in the statistics table from the last run. If a unique index was previously defined as a constraint, it will be recreated as a unique index. All other filters used during a standard bloat check scan can be used with this option so you only get commands to run for objects relevant to your desired bloat thresholds.")
args_general.add_argument('--recovery_mode_norun', action="store_true", help="Setting this option will cause the script to check if the database it is running against is a replica (in recovery mode) and cause it to skip running. Otherwise if it is not in recovery, it will run as normal. This is useful for when you want to ensure the bloat check always runs only on the primary after failover without having to edit crontabs or similar process managers.")
args_general.add_argument('-s', '--min_size', type=int, default=1, help="Minimum size in bytes of object to scan (table or index). Default and minimum value is 1.")
args_general.add_argument('-t', '--tablename', help="Scan for bloat only on the given table. Must be schema qualified. This always gets both table and index bloat and overrides all other filter options so you always get the bloat statistics for the table no matter what they are.")
args_general.add_argument('--version', action="store_true", help="Print version of this script.")
args_general.add_argument('-z', '--min_wasted_size', type=int, default=1, help="Minimum size of wasted space in bytes. Default and minimum is 1.")
args_general.add_argument('--debug', action="store_true", help="Output additional debugging information. Overrides quiet option.")
args_setup = parser.add_argument_group(title="Setup")
args_setup.add_argument('--pgstattuple_schema', help="If pgstattuple is not installed in the default search path, use this option to designate the schema where it is installed.")
args_setup.add_argument('--bloat_schema', help="Set the schema that the bloat report table is in if it's not in the default search path. Note this option can also be set when running --create_stats_table to set which schema you want the table created.")
args_setup.add_argument('--create_stats_table', action="store_true", help="Create the required tables that the bloat report uses (bloat_stats + two child tables). Places table in default search path unless --bloat_schema is set.")
args = parser.parse_args()
def check_pgstattuple(conn):
sql = "SELECT e.extversion, n.nspname FROM pg_catalog.pg_extension e JOIN pg_catalog.pg_namespace n ON e.extnamespace = n.oid WHERE extname = 'pgstattuple'"
cur = conn.cursor()
cur.execute(sql)
pgstattuple_info = cur.fetchone()
if pgstattuple_info == None:
print("pgstattuple extension not found. Please ensure it is installed in the database this script is connecting to.")
close_conn(conn)
sys.exit(2)
if args.pgstattuple_schema != None:
if args.pgstattuple_schema != pgstattuple_info[1]:
print("pgstattuple not found in the schema given by --pgstattuple_schema option: " + args.pgstattuple_schema + ". Found instead in: " + pgstattuple_info[1]+".")
close_conn(conn)
sys.exit(2)
return pgstattuple_info[0]
def check_recovery_status(conn):
sql = "SELECT pg_is_in_recovery FROM pg_catalog.pg_is_in_recovery()"
cur = conn.cursor()
cur.execute(sql)
is_in_recovery = cur.fetchone()[0]
cur.close()
return is_in_recovery
def create_conn():
conn = psycopg2.connect(args.connection)
return conn
def close_conn(conn):
conn.close()
def create_list(list_type, list_items):
split_list = []
if list_type == "csv":
split_list = list_items.split(',')
elif list_type == "file":
with open(list_items, 'r') as csvfile:
objectreader = csv.DictReader(csvfile, fieldnames=['objectname', 'max_wasted', 'max_perc'])
for o in objectreader:
if not o['objectname'].startswith('#'):
o['objectname'] = o['objectname'].strip()
if o['max_wasted'] != None:
o['max_wasted'] = int(o['max_wasted'])
else:
o['max_wasted'] = 0
if o['max_perc'] != None:
o['max_perc'] = float(o['max_perc'])
else:
o['max_perc'] = 0
split_list.append(o)
return split_list
def create_stats_table(conn):
if args.bloat_schema != None:
parent_sql = args.bloat_schema + "." + "bloat_stats"
tables_sql = args.bloat_schema + "." + "bloat_tables"
indexes_sql = args.bloat_schema + "." + "bloat_indexes"
else:
parent_sql = "bloat_stats"
tables_sql = "bloat_tables"
indexes_sql = "bloat_indexes"
drop_sql = "DROP TABLE IF EXISTS " + parent_sql + " CASCADE"
sql = "CREATE TABLE " + parent_sql + """ (
oid oid NOT NULL
, schemaname text NOT NULL
, objectname text NOT NULL
, objecttype text NOT NULL
, size_bytes bigint
, live_tuple_count bigint
, live_tuple_percent float8
, dead_tuple_count bigint
, dead_tuple_size_bytes bigint
, dead_tuple_percent float8
, free_space_bytes bigint
, free_percent float8
, stats_timestamp timestamptz NOT NULL DEFAULT CURRENT_TIMESTAMP
, approximate boolean NOT NULL DEFAULT false
, relpages bigint NOT NULL DEFAULT 1
, fillfactor float8 NOT NULL DEFAULT 100)"""
cur = conn.cursor()
if args.debug:
print(cur.mogrify("drop_sql: " + drop_sql))
cur.execute(drop_sql)
if args.debug:
print(cur.mogrify("sql: " + sql))
cur.execute(sql)
sql = "CREATE TABLE " + tables_sql + " (LIKE " + parent_sql + " INCLUDING ALL) INHERITS (" + parent_sql + ")"
if args.debug:
print(cur.mogrify("sql: " + sql))
cur.execute(sql)
sql = "CREATE TABLE " + indexes_sql + " (LIKE " + parent_sql + " INCLUDING ALL) INHERITS (" + parent_sql + ")"
if args.debug:
print(cur.mogrify("sql: " + sql))
cur.execute(sql)
sql = "COMMENT ON TABLE " + parent_sql + " IS 'Table providing raw data for table & index bloat'"
if args.debug:
print(cur.mogrify("sql: " + sql))
cur.execute(sql)
sql = "COMMENT ON TABLE " + tables_sql + " IS 'Table providing raw data for table bloat'"
if args.debug:
print(cur.mogrify("sql: " + sql))
cur.execute(sql)
sql = "COMMENT ON TABLE " + indexes_sql + " IS 'Table providing raw data for index bloat'"
if args.debug:
print(cur.mogrify("sql: " + sql))
cur.execute(sql)
conn.commit()
cur.close()
def get_bloat(conn, exclude_schema_list, include_schema_list, exclude_object_list):
sql = ""
commit_counter = 0
analyzed_tables = []
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
sql = "SELECT current_setting('block_size')"
cur.execute(sql)
block_size = int(cur.fetchone()[0])
sql_tables = """ SELECT c.oid, c.relkind, c.relname, n.nspname, 'false' as indisprimary, c.reloptions
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
WHERE relkind IN ('r', 'm')
AND c.relpersistence <> 't' """
sql_indexes = """ SELECT c.oid, c.relkind, c.relname, n.nspname, i.indisprimary, c.reloptions
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
JOIN pg_catalog.pg_index i ON c.oid = i.indexrelid
JOIN pg_catalog.pg_am a ON c.relam = a.oid
WHERE c.relkind = 'i'
AND a.amname <> 'gin' AND a.amname <> 'brin' """
cur.execute("SELECT current_setting('server_version_num')::int >= 90300")
if cur.fetchone()[0] == True:
sql_indexes += " AND indislive = 'true' "
if args.tablename != None:
sql_tables += " AND n.nspname||'.'||c.relname = %s "
sql_indexes += " AND i.indrelid::regclass = %s::regclass "
sql_class = sql_tables + """
UNION
""" + sql_indexes
if args.debug:
print("sql_class: " + cur.mogrify(sql_class, [args.tablename, args.tablename] ) )
cur.execute(sql_class, [args.tablename, args.tablename] )
else:
# IN clauses work with python tuples. lists were converted by get_bloat() call
if include_schema_list:
sql_tables += " AND n.nspname IN %s"
sql_indexes += " AND n.nspname IN %s"
filter_list = include_schema_list
elif exclude_schema_list:
sql_tables += " AND n.nspname NOT IN %s"
sql_indexes += " AND n.nspname NOT IN %s"
filter_list = exclude_schema_list
if args.mode == 'tables':
sql_class = sql_tables
elif args.mode == 'indexes':
sql_class = sql_indexes
elif args.mode == "both":
sql_class = sql_tables + """
UNION
""" + sql_indexes
if args.mode == "both":
if args.debug:
print("sql_class: " + str(cur.mogrify(sql_class, (filter_list,filter_list) )) )
cur.execute(sql_class, (filter_list,filter_list))
elif args.mode == "tables" or args.mode == "indexes":
if args.debug:
print("sql_class: " + cur.mogrify(sql_class, (filter_list,) ))
cur.execute(sql_class, (filter_list,) )
else:
cur.execute(sql)
object_list = cur.fetchall()
sql = "TRUNCATE "
if args.bloat_schema:
sql += args.bloat_schema + "."
if args.mode == "tables" or args.mode == "both":
sql_table = sql + "bloat_tables"
cur.execute(sql_table)
if args.mode == "indexes" or args.mode == "both":
sql_index = sql + "bloat_indexes"
cur.execute(sql_index)
conn.commit()
if args.quick:
approximate = True
else:
approximate = False
for o in object_list:
if args.debug:
print(o)
if exclude_object_list and args.tablename == None:
# completely skip object being scanned if it's in the excluded file list with max values equal to zero
match_found = False
for e in exclude_object_list:
if (e['objectname'] == o['nspname'] + "." + o['relname']) and (e['max_wasted'] == 0) and (e['max_perc'] == 0):
match_found = True
if match_found:
continue
if o['relkind'] == "i":
fillfactor = 90.0
else:
fillfactor = 100.0
if o['reloptions'] != None:
reloptions_dict = dict(o.split('=') for o in o['reloptions'])
if 'fillfactor' in reloptions_dict:
fillfactor = float(reloptions_dict['fillfactor'])
sql = """ SELECT count(*) FROM pg_catalog.pg_class WHERE oid = %s """
cur.execute(sql, [ o['oid'] ])
exists = cur.fetchone()[0]
if args.debug:
print("Checking for table existance before scanning: " + str(exists))
if exists == 0:
continue # just skip over it. object was dropped since initial list was made
if args.noanalyze != True:
if o['relkind'] == "r" or o['relkind'] == "m":
quoted_table = "\"" + o['nspname'] + "\".\"" + o['relname'] + "\""
else:
# get table that index is a part of
sql = """SELECT n.nspname, c.relname
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_index i ON c.oid = i.indrelid
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
WHERE indexrelid = %s"""
cur.execute(sql, [ o['oid'] ] )
result = cur.fetchone()
quoted_table = "\"" + result[0] + "\".\"" + result[1] + "\""
# maintain a list of analyzed tables so that if a table was already analyzed, it's not again (ex. mulitple indexes on same table)
if quoted_table in analyzed_tables:
if args.debug:
print("Table already analyzed. Skipping...")
pass
else:
sql = "ANALYZE " + quoted_table
if args.debug:
print(cur.mogrify(sql, [quoted_table]))
cur.execute(sql)
analyzed_tables.append(quoted_table)
# end noanalyze check
sql = """ SELECT c.relpages FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
WHERE n.nspname = %s
AND c.relname = %s """
cur.execute(sql, [o['nspname'], o['relname']])
relpages = int(cur.fetchone()[0])
if args.quick:
sql = "SELECT table_len, approx_tuple_count AS tuple_count, approx_tuple_len AS tuple_len, approx_tuple_percent AS tuple_percent, dead_tuple_count, "
sql += "dead_tuple_len, dead_tuple_percent, approx_free_space AS free_space, approx_free_percent AS free_percent FROM "
else:
sql = "SELECT table_len, tuple_count, tuple_len, tuple_percent, dead_tuple_count, dead_tuple_len, dead_tuple_percent, free_space, free_percent FROM "
if args.pgstattuple_schema != None:
sql += " \"" + args.pgstattuple_schema + "\"."
if args.quick:
sql += "pgstattuple_approx(%s::regclass) "
if args.tablename == None:
sql += " WHERE table_len > %s"
sql += " AND ( (dead_tuple_len + approx_free_space) > %s OR (dead_tuple_percent + approx_free_percent) > %s )"
else:
sql += "pgstattuple(%s::regclass) "
if args.tablename == None:
sql += " WHERE table_len > %s"
sql += " AND ( (dead_tuple_len + free_space) > %s OR (dead_tuple_percent + free_percent) > %s )"
if args.tablename == None:
if args.debug:
print("sql: " + str(cur.mogrify(sql, [ o['oid']
, args.min_size
, args.min_wasted_size
, args.min_wasted_percentage])) )
cur.execute(sql, [ o['oid']
, args.min_size
, args.min_wasted_size
, args.min_wasted_percentage ])
else:
if args.debug:
print("sql: " + cur.mogrify(sql, [ o['oid'] ]))
cur.execute(sql, [ o['oid'] ])
stats = cur.fetchall()
if args.debug:
print(stats)
if stats: # completely empty objects will be zero for all stats, so this would be an empty set
# determine byte size of fillfactor pages
ff_relpages_size = (relpages - ( fillfactor/100 * relpages ) ) * block_size
if exclude_object_list and args.tablename == None:
# If object in the exclude list has max values, compare them to see if it should be left out of report
wasted_space = stats[0]['dead_tuple_len'] + (stats[0]['free_space'] - ff_relpages_size)
wasted_perc = stats[0]['dead_tuple_percent'] + (stats[0]['free_percent'] - (100-fillfactor))
for e in exclude_object_list:
if (e['objectname'] == o['nspname'] + "." + o['relname']):
if ( (e['max_wasted'] < wasted_space ) or (e['max_perc'] < wasted_perc ) ):
match_found = False
else:
match_found = True
if match_found:
continue
sql = "INSERT INTO "
if args.bloat_schema != None:
sql += args.bloat_schema + "."
if o['relkind'] == "r" or o['relkind'] == "m":
sql+= "bloat_tables"
if o['relkind'] == "r":
objecttype = "table"
else:
objecttype = "materialized_view"
elif o['relkind'] == "i":
sql+= "bloat_indexes"
if o['indisprimary'] == True:
objecttype = "index_pk"
else:
objecttype = "index"
sql += """ (oid
, schemaname
, objectname
, objecttype
, size_bytes
, live_tuple_count
, live_tuple_percent
, dead_tuple_count
, dead_tuple_size_bytes
, dead_tuple_percent
, free_space_bytes
, free_percent
, approximate
, relpages
, fillfactor)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """
if args.debug:
print("insert sql: " + str(cur.mogrify(sql, [ o['oid']
, o['nspname']
, o['relname']
, objecttype
, stats[0]['table_len']
, stats[0]['tuple_count']
, stats[0]['tuple_percent']
, stats[0]['dead_tuple_count']
, stats[0]['dead_tuple_len']
, stats[0]['dead_tuple_percent']
, stats[0]['free_space']
, stats[0]['free_percent']
, approximate
, relpages
, fillfactor
])) )
cur.execute(sql, [ o['oid']
, o['nspname']
, o['relname']
, objecttype
, stats[0]['table_len']
, stats[0]['tuple_count']
, stats[0]['tuple_percent']
, stats[0]['dead_tuple_count']
, stats[0]['dead_tuple_len']
, stats[0]['dead_tuple_percent']
, stats[0]['free_space']
, stats[0]['free_percent']
, approximate
, relpages
, fillfactor
])
commit_counter += 1
if args.commit_rate > 0 and (commit_counter % args.commit_rate == 0):
if args.debug:
print("Batch committed. Object scanned count: " + str(commit_counter))
conn.commit()
conn.commit()
cur.close()
## end get_bloat()
def print_report(result_list):
if args.format == "simple":
for r in result_list:
print(r)
else:
print(result_list)
def print_version():
print("Version: " + version)
def rebuild_index(conn, index_list):
if index_list == []:
print("Bloat statistics table contains no indexes for conditions given.")
close_conn(conn)
sys.exit(0)
for i in index_list:
temp_index_name = "pgbloatcheck_rebuild_" + str(randint(1000,9999))
quoted_index = "\"" + i['schemaname'] + "\".\"" + i['objectname'] + "\""
# get table index is in
sql = """SELECT n.nspname, c.relname, t.spcname
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_index i ON c.oid = i.indrelid
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
LEFT OUTER JOIN pg_catalog.pg_tablespace t ON c.reltablespace = t.oid
WHERE indexrelid = %s"""
cur.execute(sql, [ i['oid'] ] )
result = cur.fetchone()
quoted_table = "\"" + result[0] + "\".\"" + result[1] + "\""
if result[2] != None:
quoted_tablespace = "\"" + result[2] + "\""
else:
quoted_tablespace = None
# create temp index definition
sql = "SELECT pg_get_indexdef(%s::regclass)"
cur.execute(sql, [ "\"" + i['schemaname'] +"\".\""+ i['objectname'] + "\"" ])
index_def = cur.fetchone()[0]
index_def = re.sub(r' INDEX', ' INDEX CONCURRENTLY', index_def, 1)
index_def = index_def.replace(i['objectname'], temp_index_name, 1)
if quoted_tablespace != None:
index_def += " TABLESPACE " + quoted_tablespace
index_def += ";"
# check if index is clustered
sql = "SELECT indisclustered FROM pg_catalog.pg_index WHERE indexrelid = %s"
cur.execute(sql, [ i['oid'] ])
indisclustered = cur.fetchone()[0]
# start output
print("")
print(index_def)
if indisclustered == True:
print("ALTER TABLE " + quoted_table + " CLUSTER ON " + temp_index_name) + ";"
# analyze table
print("ANALYZE " + quoted_table + ";")
if i['objecttype'] == "index":
# drop old index or unique constraint
sql = "SELECT count(*) FROM pg_catalog.pg_constraint WHERE conindid = %s"
cur.execute(sql, [ i['oid'] ])
isconstraint = int(cur.fetchone()[0])
if isconstraint == 1:
print("ALTER TABLE " + quoted_table + " DROP CONSTRAINT " + "\"" + i['objectname'] + "\";")
else:
print("DROP INDEX CONCURRENTLY " + quoted_index + ";")
# analyze again
print("ANALYZE " + quoted_table + ";")
# rename temp index to original name
print("ALTER INDEX \"" + i['schemaname'] + "\"." + temp_index_name + " RENAME TO \"" + i['objectname'] + "\";")
elif i['objecttype'] == "index_pk":
print("ALTER TABLE " + quoted_table + " DROP CONSTRAINT " + "\"" + i['objectname'] + "\";")
# analyze again
print("ANALYZE " + quoted_table + ";")
print("ALTER TABLE " + quoted_table + " ADD CONSTRAINT " + i['objectname'] + " PRIMARY KEY USING INDEX " + temp_index_name + ";")
# analyze again
print("ANALYZE " + quoted_table + ";")
if indisclustered == True:
print("")
print("-- WARNING: The following statement will exclusively lock the table for the duration of its runtime.")
print("-- Uncomment it or manually run it to recluster the table on the newly created index.")
print("-- CLUSTER " + quoted_table + ";")
print("")
# end rebuild_index
if __name__ == "__main__":
if args.version:
print_version()
sys.exit(0)
if args.schema != None and args.exclude_schema != None:
print("--schema and --exclude_schema are exclusive options and cannot be set together")
sys.exit(2)
if args.debug:
print("quiet level: " + str(args.quiet))
conn = create_conn()
if args.recovery_mode_norun == True:
is_in_recovery = check_recovery_status(conn)
if is_in_recovery == True:
if args.debug:
print("Recovery mode check found instance in recovery. Skipping run.")
close_conn(conn)
sys.exit(0)
else:
if args.debug:
print("Recovery mode check found primary instance. Running as normal.")
pgstattuple_version = float(check_pgstattuple(conn))
if args.quick:
if pgstattuple_version < 1.3:
print("--quick option requires pgstattuple version 1.3 or greater (PostgreSQL 9.5)")
close_conn(conn)
sys.exit(2)
if (args.mode == "indexes" or args.mode == "both"):
print("--quick option can only be used with --mode=tables")
close_conn(conn)
sys.exit(2)
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
if args.create_stats_table:
create_stats_table(conn)
close_conn(conn)
sys.exit(0)
sql = "SELECT tablename FROM pg_catalog.pg_tables WHERE tablename = %s"
if args.bloat_schema != None:
sql += " AND schemaname = %s"
cur.execute(sql, ['bloat_stats', args.bloat_schema])
else:
cur.execute(sql, ['bloat_stats'])
table_exists = cur.fetchone()
if table_exists == None:
print("Required statistics table does not exist. Please run --create_stats_table first before running a bloat scan.")
close_conn(conn)
sys.exit(2)
if args.exclude_schema != None:
exclude_schema_list = create_list('csv', args.exclude_schema)
else:
exclude_schema_list = []
exclude_schema_list.append('pg_toast')
if args.schema != None:
include_schema_list = create_list('csv', args.schema)
else:
include_schema_list = []
if args.exclude_object_file != None:
exclude_object_list = create_list('file', args.exclude_object_file)
else:
exclude_object_list = []
if args.noscan == False and args.rebuild_index == False:
get_bloat(conn, tuple(exclude_schema_list), tuple(include_schema_list), exclude_object_list)
# Final commit to ensure transaction that inserted stats data closes
conn.commit()
counter = 1
result_list = []
if args.quiet <= 1 or args.quiet == None or args.debug == True:
simple_cols = """schemaname
, objectname
, objecttype
, CASE
WHEN (dead_tuple_percent + (free_percent - (100-fillfactor))) < 0 THEN 0
ELSE (dead_tuple_percent + (free_percent - (100-fillfactor)))
END AS total_waste_percent
, CASE
WHEN (dead_tuple_size_bytes + (free_space_bytes - (relpages - (fillfactor/100) * relpages ) * current_setting('block_size')::int ) ) < 0 THEN '0 bytes'
ELSE pg_size_pretty((dead_tuple_size_bytes + (free_space_bytes - ((relpages - (fillfactor/100) * relpages ) * current_setting('block_size')::int ) ) )::bigint)
END AS total_wasted_size"""
dict_cols = "oid, schemaname, objectname, objecttype, size_bytes, live_tuple_count, live_tuple_percent, dead_tuple_count, dead_tuple_size_bytes, dead_tuple_percent, free_space_bytes, free_percent, approximate, relpages, fillfactor"
if args.format == "dict" or args.format=="json" or args.format=="jsonpretty" or args.rebuild_index:
# Since "simple" is the default, this check needs to be first so that if args.rebuild_index is set, the proper columns are chosen
sql = "SELECT " + dict_cols + " FROM "
elif args.format == "simple":
sql = "SELECT " + simple_cols + " FROM "
else:
print("Unsupported --format given. Use 'simple', 'dict' 'json', or 'jsonpretty'.")
close_conn(conn)
sys.exit(2)
if args.bloat_schema != None:
sql += args.bloat_schema + "."
if args.mode == "tables":
sql += "bloat_tables"
elif args.mode == "indexes" or args.rebuild_index:
sql += "bloat_indexes"
else:
sql += "bloat_stats"
sql += " WHERE (dead_tuple_size_bytes + (free_space_bytes - (relpages - (fillfactor/100) * relpages ) * current_setting('block_size')::int ) ) > %s "
sql += " AND (dead_tuple_percent + (free_percent - (100-fillfactor))) > %s "
sql += " ORDER BY (dead_tuple_size_bytes + (free_space_bytes - ((relpages - (fillfactor/100) * relpages ) * current_setting('block_size')::int ) )) DESC"
cur.execute(sql, [args.min_wasted_size, args.min_wasted_percentage])
result = cur.fetchall()
# Output rebuild commmands instead of status report
if args.rebuild_index:
rebuild_index(conn, result)
close_conn(conn)
sys.exit(0)
for r in result:
if args.format == "simple":
if r['objecttype'] == 'table':
type_label = 't'
elif r['objecttype'] == 'index':
type_label = 'i'
elif r['objecttype'] == 'index_pk':
type_label = 'p'
else:
print("Unexpected object type encountered in stats table. Please report this bug to author with value found: " + str(r['objecttype']))
sys.exit(2)
justify_space = 100 - len(str(counter) + ". " + r['schemaname'] + "." + r['objectname'] + " (" + type_label + ") " + "(" + str(r['total_waste_percent']) + "%)" + r['total_wasted_size'] + " wasted")
result_list.append(str(counter) + ". " + r['schemaname'] + "." + r['objectname'] + " (" + type_label + ") " + "."*justify_space + "(" + str(r['total_waste_percent']) + "%) " + r['total_wasted_size'] + " wasted")
counter += 1
elif args.format == "dict" or args.format == "json" or args.format == "jsonpretty":
result_dict = dict([ ('oid', r['oid'])
, ('schemaname', r['schemaname'])
, ('objectname', r['objectname'])
, ('objecttype', r['objecttype'])
, ('size_bytes', int(r['size_bytes']))
, ('live_tuple_count', int(r['live_tuple_count']))
, ('live_tuple_percent', str(r['live_tuple_percent'])+"%" )
, ('dead_tuple_count', int(r['dead_tuple_count']))
, ('dead_tuple_size_bytes', int(r['dead_tuple_size_bytes']))
, ('dead_tuple_percent', str(r['dead_tuple_percent'])+"%" )
, ('free_space_bytes', int(r['free_space_bytes']))
, ('free_percent', str(r['free_percent'])+"%" )
, ('approximate', r['approximate'])
])
result_list.append(result_dict)
if args.format == "json":
result_list = json.dumps(result_list)
elif args.format == "jsonpretty":
result_list = json.dumps(result_list, indent=4, separators=(',',': '))
if len(result_list) >= 1:
print_report(result_list)
else:
if args.quiet == 0 or args.quiet == None:
print("No bloat found for given parameters")
close_conn(conn)