@@ -77,6 +77,283 @@ int BgWriterDelay = 200;
77
77
static TimestampTz last_snapshot_ts ;
78
78
static XLogRecPtr last_snapshot_lsn = InvalidXLogRecPtr ;
79
79
80
+ /*
81
+ * Collected buffer usage information.
82
+ */
83
+ typedef struct BackendBufferStats
84
+ {
85
+ int backend_id ;
86
+ uint64 usage_sum ;
87
+ double usage_ratio ;
88
+ } BackendBufferStats ;
89
+
90
+ static int
91
+ compare_backend_usage (const void * a , const void * b )
92
+ {
93
+ const BackendBufferStats * stat_a = (const BackendBufferStats * ) a ;
94
+ const BackendBufferStats * stat_b = (const BackendBufferStats * ) b ;
95
+
96
+ if (stat_a -> usage_ratio < stat_b -> usage_ratio )
97
+ return -1 ;
98
+ if (stat_a -> usage_ratio > stat_b -> usage_ratio )
99
+ return 1 ;
100
+ return 0 ;
101
+ }
102
+
103
+ static uint64
104
+ CalculateSystemBufferPressure (BackendBufferStats * backend_stats [], int * num_backends )
105
+ {
106
+ uint64 total_usage = 0 ;
107
+ int active_backends = 0 ;
108
+ BackendBufferStats * stats ;
109
+
110
+ /* Count active backends first */
111
+ for (int i = 0 ; i < ProcGlobal -> allProcCount ; i ++ )
112
+ {
113
+ PGPROC * proc = & ProcGlobal -> allProcs [i ];
114
+
115
+ if (proc -> pid != 0 && proc -> databaseId != InvalidOid )
116
+ active_backends ++ ;
117
+ }
118
+
119
+ if (active_backends == 0 )
120
+ {
121
+ * backend_stats = NULL ;
122
+ * num_backends = 0 ;
123
+ return 0 ;
124
+ }
125
+
126
+ /* Allocate stats array */
127
+ stats = palloc (sizeof (BackendBufferStats ) * active_backends );
128
+ * backend_stats = stats ;
129
+ * num_backends = active_backends ;
130
+
131
+ /* Collect stats from all active backends */
132
+ for (int i = 0 , j = 0 ; i < ProcGlobal -> allProcCount ; i ++ )
133
+ {
134
+ PGPROC * proc = & ProcGlobal -> allProcs [i ];
135
+
136
+ if (proc -> pid != 0 && proc -> databaseId != InvalidOid )
137
+ {
138
+ uint64 usage_sum = pg_atomic_read_u32 (& proc -> bufferUsageSum );
139
+
140
+ stats [j ].backend_id = i ;
141
+ stats [j ].usage_sum = usage_sum ;
142
+ stats [j ].usage_ratio = (double ) usage_sum / NBuffers ;
143
+ total_usage += usage_sum ;
144
+ j ++ ;
145
+ }
146
+ }
147
+
148
+ /* Sort by usage ratio for percentile calculation */
149
+ qsort (stats , active_backends , sizeof (BackendBufferStats ),
150
+ compare_backend_usage );
151
+
152
+ return total_usage ;
153
+ }
154
+
155
+ static void
156
+ GetHighUsageBackends (BackendBufferStats * stats , int num_backends ,
157
+ int * * high_usage_backends , int * num_high_usage )
158
+ {
159
+ int percentile_90_idx = (int ) (num_backends * 0.9 );
160
+
161
+ * num_high_usage = num_backends - percentile_90_idx ;
162
+
163
+ if (* num_high_usage > 0 )
164
+ {
165
+ * high_usage_backends = palloc (sizeof (int ) * (* num_high_usage ));
166
+ for (int i = 0 ; i < * num_high_usage ; i ++ )
167
+ (* high_usage_backends )[i ] = stats [percentile_90_idx + i ].backend_id ;
168
+ }
169
+ else
170
+ {
171
+ * high_usage_backends = NULL ;
172
+ * num_high_usage = 0 ;
173
+ }
174
+ }
175
+
176
+ /*
177
+ * Shared buffer sync function used by both main loop and aggressive writing
178
+ */
179
+ static int
180
+ SyncTargetedBuffers (WritebackContext * wb_context , int * target_backends ,
181
+ int num_targets , int max_buffers )
182
+ {
183
+ int buffers_written = 0 ;
184
+ int buffer_id ;
185
+ BufferDesc * bufHdr ;
186
+ uint32 buf_state ;
187
+
188
+ /* If no specific targets, sync any dirty buffers */
189
+ if (target_backends == NULL || num_targets == 0 )
190
+ return BgBufferSync (wb_context );
191
+
192
+ /* Scan through all buffers looking for dirty ones from target backends */
193
+ for (buffer_id = 0 ; buffer_id < NBuffers && buffers_written < max_buffers ; buffer_id ++ )
194
+ {
195
+ uint32 dirty_backend ;
196
+ bool is_target ;
197
+
198
+ bufHdr = GetBufferDescriptor (buffer_id );
199
+
200
+ /* Quick check if buffer is dirty */
201
+ buf_state = pg_atomic_read_u32 (& bufHdr -> state );
202
+ if (!(buf_state & BM_DIRTY ))
203
+ continue ;
204
+
205
+ /* Check if this buffer is from one of our target backends */
206
+ dirty_backend = pg_atomic_read_u32 (& bufHdr -> dirty_backend_id );
207
+ is_target = false;
208
+
209
+ for (int i = 0 ; i < num_targets ; i ++ )
210
+ if (dirty_backend == target_backends [i ])
211
+ {
212
+ is_target = true;
213
+ break ;
214
+ }
215
+
216
+ if (!is_target )
217
+ continue ;
218
+
219
+ /* Skip if buffer is pinned */
220
+ if (BUF_STATE_GET_REFCOUNT (buf_state ) > 0 )
221
+ continue ;
222
+
223
+ /* Try to write this buffer using the writeback context */
224
+ ScheduleBufferTagForWriteback (wb_context ,
225
+ IOContextForStrategy (NULL ),
226
+ & bufHdr -> tag );
227
+ buffers_written ++ ;
228
+ }
229
+
230
+ /* Issue the actual writes */
231
+ if (buffers_written > 0 )
232
+ IssuePendingWritebacks (wb_context , IOContextForStrategy (NULL ));
233
+
234
+ return buffers_written ;
235
+ }
236
+
237
+ static void
238
+ AggressiveBufferWrite (WritebackContext * wb_context , int * high_usage_backends ,
239
+ int num_high_usage , bool critical )
240
+ {
241
+ int write_target = critical ? bgwriter_lru_maxpages * 3 : bgwriter_lru_maxpages * 2 ;
242
+ int buffers_written = 0 ;
243
+
244
+ /* Focus on buffers from high-usage backends first */
245
+ buffers_written = SyncTargetedBuffers (wb_context , high_usage_backends ,
246
+ num_high_usage , write_target );
247
+
248
+ /* If still under target, write additional dirty buffers */
249
+ if (buffers_written < write_target )
250
+ BgBufferSync (wb_context );
251
+ }
252
+
253
+ /* In src/backend/postmaster/bgwriter.c - Enhanced UpdateBackendDecayRates */
254
+ static void
255
+ UpdateBackendDecayRates (BackendBufferStats * backend_stats , int num_backends ,
256
+ double pressure_ratio , int * high_usage_backends , int num_high_usage )
257
+ {
258
+ uint32 base_decay_rate ;
259
+ uint64 total_usage = 0 ;
260
+ uint64 avg_usage ;
261
+ int i ,
262
+ j ;
263
+
264
+ /* Calculate base decay rate from system pressure */
265
+ if (pressure_ratio > 0.90 )
266
+ /* Critical pressure - aggressive decay */
267
+ base_decay_rate = 3 ;
268
+ else if (pressure_ratio > 0.75 )
269
+ /* High pressure */
270
+ base_decay_rate = 2 ;
271
+ else
272
+ /* Normal decay rate */
273
+ base_decay_rate = 1 ;
274
+
275
+ /* Calculate total usage for relative comparisons */
276
+ for (i = 0 ; i < num_backends ; i ++ )
277
+ total_usage += backend_stats [i ].usage_sum ;
278
+ avg_usage = num_backends > 0 ? total_usage / num_backends : 0 ;
279
+
280
+ if (base_decay_rate > 1 )
281
+ elog (DEBUG2 , "Buffer pressure: %.2f%%, base decay rate: %u, avg usage: %lu" ,
282
+ pressure_ratio * 100 , base_decay_rate , avg_usage );
283
+
284
+ /* Update each backend's personalized decay rate */
285
+ for (i = 0 ; i < ProcGlobal -> allProcCount ; i ++ )
286
+ {
287
+ PGPROC * proc = & ProcGlobal -> allProcs [i ];
288
+
289
+ /* Only update active user backends */
290
+ if (proc -> pid != 0 && proc -> databaseId != InvalidOid )
291
+ {
292
+ uint32 backend_usage = pg_atomic_read_u32 (& proc -> bufferUsageSum );
293
+ uint32 personalized_rate = base_decay_rate ;
294
+
295
+ /* Find this backend in the stats array */
296
+ BackendBufferStats * backend_stat = NULL ;
297
+
298
+ for (j = 0 ; j < num_backends ; j ++ )
299
+ {
300
+ if (backend_stats [j ].backend_id == i )
301
+ {
302
+ backend_stat = & backend_stats [j ];
303
+ break ;
304
+ }
305
+ }
306
+
307
+ /*
308
+ * Calculate personalized decay rate based on usage and
309
+ * clock-sweep performance.
310
+ */
311
+ if (backend_stat != NULL && avg_usage > 0 )
312
+ {
313
+ double usage_ratio = (double ) backend_usage / avg_usage ;
314
+
315
+ /* Get clock-sweep performance metrics */
316
+ uint32 search_count = pg_atomic_read_u32 (& proc -> bufferSearchCount );
317
+ uint64 total_distance = pg_atomic_read_u64 (& proc -> clockSweepDistance );
318
+ uint32 total_passes = pg_atomic_read_u32 (& proc -> clockSweepPasses );
319
+ uint64 total_time = pg_atomic_read_u64 (& proc -> clockSweepTimeMicros );
320
+
321
+ /* Calculate average search metrics */
322
+ double avg_distance = search_count > 0 ? (double ) total_distance / search_count : 0 ;
323
+ double avg_passes = search_count > 0 ? (double ) total_passes / search_count : 0 ;
324
+ double avg_time = search_count > 0 ? (double ) total_time / search_count : 0 ;
325
+
326
+ /* Adjust decay rate based on usage relative to average */
327
+ if (usage_ratio > 2.0 )
328
+ /* High usage backends get more aggressive decay */
329
+ personalized_rate = Min (4 , base_decay_rate + 2 );
330
+ else if (usage_ratio > 1.5 )
331
+ personalized_rate = Min (4 , base_decay_rate + 1 );
332
+ else if (usage_ratio < 0.5 )
333
+ /* Low usage backends get less aggressive decay */
334
+ personalized_rate = Max (1 , base_decay_rate > 1 ? base_decay_rate - 1 : 1 );
335
+
336
+ /* Further adjust based on clock-sweep performance */
337
+ if (avg_distance > NBuffers * 0.5 )
338
+ /* Searching more than half the buffer pool */
339
+ personalized_rate = Min (4 , personalized_rate + 1 );
340
+ if (avg_passes > 1.0 )
341
+ /* Making multiple complete passes */
342
+ personalized_rate = Min (4 , personalized_rate + 1 );
343
+ if (avg_time > 1000.0 )
344
+ /* Taking more than 1ms per search */
345
+ personalized_rate = Min (4 , personalized_rate + 1 );
346
+
347
+ elog (DEBUG2 , "Backend %d: usage_ratio=%.2f, avg_distance=%.1f, avg_passes=%.2f, "
348
+ "avg_time=%.1fμs, decay_rate=%u" ,
349
+ i , usage_ratio , avg_distance , avg_passes , avg_time , personalized_rate );
350
+ }
351
+
352
+ /* Update the backend's decay rate */
353
+ pg_atomic_write_u32 (& proc -> bufferDecayRate , personalized_rate );
354
+ }
355
+ }
356
+ }
80
357
81
358
/*
82
359
* Main entry point for bgwriter process
@@ -222,6 +499,15 @@ BackgroundWriterMain(const void *startup_data, size_t startup_data_len)
222
499
*/
223
500
for (;;)
224
501
{
502
+ BackendBufferStats * backend_stats = NULL ;
503
+ int num_backends ;
504
+ int * high_usage_backends = NULL ;
505
+ int num_high_usage ;
506
+ uint64 max_possible ;
507
+ uint64 total_usage ;
508
+ double pressure_ratio ;
509
+ bool high_pressure ;
510
+ bool critical_pressure ;
225
511
bool can_hibernate ;
226
512
int rc ;
227
513
@@ -230,6 +516,35 @@ BackgroundWriterMain(const void *startup_data, size_t startup_data_len)
230
516
231
517
ProcessMainLoopInterrupts ();
232
518
519
+ /* Calculate current buffer pressure */
520
+ total_usage = CalculateSystemBufferPressure (& backend_stats , & num_backends );
521
+ max_possible = (uint64 ) NBuffers * BM_MAX_USAGE_COUNT ;
522
+ total_usage = total_usage > max_possible ? max_possible : total_usage ;
523
+ pressure_ratio = (double ) total_usage / max_possible ;
524
+
525
+ /* Get high-usage backends (90th percentile) */
526
+ if (backend_stats != NULL )
527
+ GetHighUsageBackends (backend_stats , num_backends ,
528
+ & high_usage_backends , & num_high_usage );
529
+
530
+ /* Update global decay rate based on current pressure */
531
+ UpdateBackendDecayRates (backend_stats , num_backends , pressure_ratio ,
532
+ high_usage_backends , num_high_usage );
533
+
534
+ /* Determine if proactive action is needed */
535
+ high_pressure = pressure_ratio > 0.75 ; /* 75% threshold */
536
+ critical_pressure = pressure_ratio > 0.90 ; /* 90% threshold */
537
+
538
+ if (high_pressure )
539
+ {
540
+ elog (LOG , "%s buffer pressure detected: %.2f%% (%d high-usage backends)" ,
541
+ critical_pressure ? "Critical" : "High" ,
542
+ pressure_ratio * 100 , num_high_usage );
543
+
544
+ /* Aggressive writing of dirty buffers */
545
+ AggressiveBufferWrite (& wb_context , high_usage_backends , num_high_usage , critical_pressure );
546
+ }
547
+
233
548
/*
234
549
* Do one cycle of dirty-buffer writing.
235
550
*/
@@ -294,6 +609,11 @@ BackgroundWriterMain(const void *startup_data, size_t startup_data_len)
294
609
}
295
610
}
296
611
612
+ if (backend_stats != NULL )
613
+ pfree (backend_stats );
614
+ if (high_usage_backends != NULL )
615
+ pfree (high_usage_backends );
616
+
297
617
/*
298
618
* Sleep until we are signaled or BgWriterDelay has elapsed.
299
619
*
0 commit comments