1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
|
import csv
import multiprocessing
import os
import psycopg2
import psycopg2.extras
import time
from multiprocessing import Process, Queue
from utils.logging import log
class PostgresCollector(object):
"""
collects basic PostgreSQL-level statistics (bgwriter, databases, tables,
indexes)
"""
def __init__(self, dbname):
self._dbname = dbname
def start(self):
self._in_queue = Queue()
self._out_queue = Queue()
self._worker = Process(target=run_collector,
args=(self._in_queue, self._out_queue,
self._dbname))
self._worker.start()
def stop(self):
# signal the worker process to stop by writing a value into the queue
self._in_queue.put(True)
log("stopping the PostgreSQL statistics collector")
# Wait for collector to place result into the output queue. This needs
# to happen before calling join() otherwise it causes a deadlock.
log("waiting for collector result in a queue")
self._result = self._out_queue.get()
# And wait for the worker to terminate. This should be pretty fast as
# the collector places result into the queue right before terminating.
log("waiting for collector process to terminate")
self._worker.join()
self._worker = None
self._in_queue = None
self._out_queue = None
def result(self):
return self._result
def run_collector(in_queue, out_queue, dbname, interval=1.0):
"""
collector code for a separate process, communicating through a pair of
queues
"""
bgwriter_log = None
tables_log = None
indexes_log = None
database_log = None
# get current timestamp
ts = time.time()
while True:
# wait until the next tick
ts += interval
# if we're behind, skip forward
if ts < time.time():
continue
# sleep (but only for the remaining time, to prevent drift)
time.sleep(ts - time.time())
# if we've received message in the input queue (not empty), terminate
if not in_queue.empty():
log("PostgreSQL collector received request to terminate")
break
# open connection to the benchmark database (if can't open, continue)
# notice this is intentionally after the wait, so we'll wait before
# next connection attempt
try:
conn = psycopg2.connect('host=localhost dbname=%s' % (dbname,))
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
except Exception as ex:
continue
# background writer stats
cur.execute('SELECT EXTRACT(EPOCH FROM now()) AS ts, * '
'FROM pg_stat_bgwriter')
# on the first iteration, construct the CSV files
if bgwriter_log is None:
fields = [desc[0] for desc in cur.description]
bgwriter_log = csv.DictWriter(open('bgwriter.csv', 'w'), fields)
bgwriter_log.writeheader()
bgwriter_log.writerows(cur.fetchall())
# TODO we can assume statistics for most objects (tables, indexes)
# won't change every second, so we can optimize the amount of data by
# detecting changes and only keeping the two rows next to it
# table statistics
cur.execute('SELECT EXTRACT(EPOCH FROM now()) AS ts, * '
'FROM pg_stat_all_tables JOIN pg_statio_all_tables '
'USING (relid, schemaname, relname)')
# on the first iteration, construct the CSV files
if tables_log is None:
fields = [desc[0] for desc in cur.description]
tables_log = csv.DictWriter(open('tables.csv', 'w'), fields)
tables_log.writeheader()
tables_log.writerows(cur.fetchall())
# index statistics
cur.execute('SELECT EXTRACT(EPOCH FROM now()) AS ts, * '
'FROM pg_stat_all_indexes JOIN pg_statio_all_indexes '
'USING (relid, indexrelid, schemaname, relname, '
'indexrelname)')
# on the first iteration, construct the CSV files
if indexes_log is None:
fields = [desc[0] for desc in cur.description]
indexes_log = csv.DictWriter(open('indexes.csv', 'w'), fields)
indexes_log.writeheader()
indexes_log.writerows(cur.fetchall())
# database statistics
cur.execute('SELECT EXTRACT(EPOCH FROM now()) AS ts, * '
'FROM pg_stat_database')
# on the first iteration, construct the CSV files
if database_log is None:
fields = [desc[0] for desc in cur.description]
database_log = csv.DictWriter(open('database.csv', 'w'), fields)
database_log.writeheader()
database_log.writerows(cur.fetchall())
conn.close()
log("PostgreSQL collector generates CSV results")
# close the CSV writers
bgwriter_log = None
tables_log = None
indexes_log = None
database_log = None
result = {}
for file in ['bgwriter', 'tables', 'indexes', 'database']:
if os.path.isfile(''.join([file, '.csv'])):
with open(''.join([file, '.csv']), 'r') as f:
result.update({file : f.read()})
# remove the files
os.remove(''.join([file, '.csv']))
out_queue.put(result)
log("PostgreSQL collector put results into output queue and terminates")
|