@@ -131,13 +131,21 @@ typedef struct ReorderBufferTupleCidEnt
131
131
CommandId combocid ; /* just for debugging */
132
132
} ReorderBufferTupleCidEnt ;
133
133
134
+ /* Virtual file descriptor with file offset tracking */
135
+ typedef struct TXNEntryFile
136
+ {
137
+ File vfd ; /* -1 when the file is closed */
138
+ off_t curOffset ; /* offset for next write or read. Reset to 0
139
+ * when vfd is opened. */
140
+ } TXNEntryFile ;
141
+
134
142
/* k-way in-order change iteration support structures */
135
143
typedef struct ReorderBufferIterTXNEntry
136
144
{
137
145
XLogRecPtr lsn ;
138
146
ReorderBufferChange * change ;
139
147
ReorderBufferTXN * txn ;
140
- int fd ;
148
+ TXNEntryFile file ;
141
149
XLogSegNo segno ;
142
150
} ReorderBufferIterTXNEntry ;
143
151
@@ -207,7 +215,8 @@ static void AssertTXNLsnOrder(ReorderBuffer *rb);
207
215
* subtransactions
208
216
* ---------------------------------------
209
217
*/
210
- static ReorderBufferIterTXNState * ReorderBufferIterTXNInit (ReorderBuffer * rb , ReorderBufferTXN * txn );
218
+ static void ReorderBufferIterTXNInit (ReorderBuffer * rb , ReorderBufferTXN * txn ,
219
+ ReorderBufferIterTXNState * volatile * iter_state );
211
220
static ReorderBufferChange * ReorderBufferIterTXNNext (ReorderBuffer * rb , ReorderBufferIterTXNState * state );
212
221
static void ReorderBufferIterTXNFinish (ReorderBuffer * rb ,
213
222
ReorderBufferIterTXNState * state );
@@ -223,7 +232,7 @@ static void ReorderBufferSerializeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn);
223
232
static void ReorderBufferSerializeChange (ReorderBuffer * rb , ReorderBufferTXN * txn ,
224
233
int fd , ReorderBufferChange * change );
225
234
static Size ReorderBufferRestoreChanges (ReorderBuffer * rb , ReorderBufferTXN * txn ,
226
- int * fd , XLogSegNo * segno );
235
+ TXNEntryFile * file , XLogSegNo * segno );
227
236
static void ReorderBufferRestoreChange (ReorderBuffer * rb , ReorderBufferTXN * txn ,
228
237
char * change );
229
238
static void ReorderBufferRestoreCleanup (ReorderBuffer * rb , ReorderBufferTXN * txn );
@@ -996,15 +1005,23 @@ ReorderBufferIterCompare(Datum a, Datum b, void *arg)
996
1005
/*
997
1006
* Allocate & initialize an iterator which iterates in lsn order over a
998
1007
* transaction and all its subtransactions.
1008
+ *
1009
+ * Note: The iterator state is returned through iter_state parameter rather
1010
+ * than the function's return value. This is because the state gets cleaned up
1011
+ * in a PG_CATCH block in the caller, so we want to make sure the caller gets
1012
+ * back the state even if this function throws an exception.
999
1013
*/
1000
- static ReorderBufferIterTXNState *
1001
- ReorderBufferIterTXNInit (ReorderBuffer * rb , ReorderBufferTXN * txn )
1014
+ static void
1015
+ ReorderBufferIterTXNInit (ReorderBuffer * rb , ReorderBufferTXN * txn ,
1016
+ ReorderBufferIterTXNState * volatile * iter_state )
1002
1017
{
1003
1018
Size nr_txns = 0 ;
1004
1019
ReorderBufferIterTXNState * state ;
1005
1020
dlist_iter cur_txn_i ;
1006
1021
int32 off ;
1007
1022
1023
+ * iter_state = NULL ;
1024
+
1008
1025
/*
1009
1026
* Calculate the size of our heap: one element for every transaction that
1010
1027
* contains changes. (Besides the transactions already in the reorder
@@ -1039,7 +1056,7 @@ ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn)
1039
1056
1040
1057
for (off = 0 ; off < state -> nr_txns ; off ++ )
1041
1058
{
1042
- state -> entries [off ].fd = -1 ;
1059
+ state -> entries [off ].file . vfd = -1 ;
1043
1060
state -> entries [off ].segno = 0 ;
1044
1061
}
1045
1062
@@ -1048,6 +1065,9 @@ ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn)
1048
1065
ReorderBufferIterCompare ,
1049
1066
state );
1050
1067
1068
+ /* Now that the state fields are initialized, it is safe to return it. */
1069
+ * iter_state = state ;
1070
+
1051
1071
/*
1052
1072
* Now insert items into the binary heap, in an unordered fashion. (We
1053
1073
* will run a heap assembly step at the end; this is more efficient.)
@@ -1064,7 +1084,7 @@ ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn)
1064
1084
{
1065
1085
/* serialize remaining changes */
1066
1086
ReorderBufferSerializeTXN (rb , txn );
1067
- ReorderBufferRestoreChanges (rb , txn , & state -> entries [off ].fd ,
1087
+ ReorderBufferRestoreChanges (rb , txn , & state -> entries [off ].file ,
1068
1088
& state -> entries [off ].segno );
1069
1089
}
1070
1090
@@ -1094,7 +1114,7 @@ ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn)
1094
1114
/* serialize remaining changes */
1095
1115
ReorderBufferSerializeTXN (rb , cur_txn );
1096
1116
ReorderBufferRestoreChanges (rb , cur_txn ,
1097
- & state -> entries [off ].fd ,
1117
+ & state -> entries [off ].file ,
1098
1118
& state -> entries [off ].segno );
1099
1119
}
1100
1120
cur_change = dlist_head_element (ReorderBufferChange , node ,
@@ -1110,8 +1130,6 @@ ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn)
1110
1130
1111
1131
/* assemble a valid binary heap */
1112
1132
binaryheap_build (state -> heap );
1113
-
1114
- return state ;
1115
1133
}
1116
1134
1117
1135
/*
@@ -1175,7 +1193,7 @@ ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state)
1175
1193
dlist_delete (& change -> node );
1176
1194
dlist_push_tail (& state -> old_change , & change -> node );
1177
1195
1178
- if (ReorderBufferRestoreChanges (rb , entry -> txn , & entry -> fd ,
1196
+ if (ReorderBufferRestoreChanges (rb , entry -> txn , & entry -> file ,
1179
1197
& state -> entries [off ].segno ))
1180
1198
{
1181
1199
/* successfully restored changes from disk */
@@ -1214,8 +1232,8 @@ ReorderBufferIterTXNFinish(ReorderBuffer *rb,
1214
1232
1215
1233
for (off = 0 ; off < state -> nr_txns ; off ++ )
1216
1234
{
1217
- if (state -> entries [off ].fd != -1 )
1218
- CloseTransientFile (state -> entries [off ].fd );
1235
+ if (state -> entries [off ].file . vfd != -1 )
1236
+ FileClose (state -> entries [off ].file . vfd );
1219
1237
}
1220
1238
1221
1239
/* free memory we might have "leaked" in the last *Next call */
@@ -1558,7 +1576,7 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
1558
1576
1559
1577
rb -> begin (rb , txn );
1560
1578
1561
- iterstate = ReorderBufferIterTXNInit (rb , txn );
1579
+ ReorderBufferIterTXNInit (rb , txn , & iterstate );
1562
1580
while ((change = ReorderBufferIterTXNNext (rb , iterstate )) != NULL )
1563
1581
{
1564
1582
Relation relation = NULL ;
@@ -2765,11 +2783,12 @@ ReorderBufferChangeSize(ReorderBufferChange *change)
2765
2783
*/
2766
2784
static Size
2767
2785
ReorderBufferRestoreChanges (ReorderBuffer * rb , ReorderBufferTXN * txn ,
2768
- int * fd , XLogSegNo * segno )
2786
+ TXNEntryFile * file , XLogSegNo * segno )
2769
2787
{
2770
2788
Size restored = 0 ;
2771
2789
XLogSegNo last_segno ;
2772
2790
dlist_mutable_iter cleanup_iter ;
2791
+ File * fd = & file -> vfd ;
2773
2792
2774
2793
Assert (txn -> first_lsn != InvalidXLogRecPtr );
2775
2794
Assert (txn -> final_lsn != InvalidXLogRecPtr );
@@ -2810,7 +2829,11 @@ ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn,
2810
2829
ReorderBufferSerializedPath (path , MyReplicationSlot , txn -> xid ,
2811
2830
* segno );
2812
2831
2813
- * fd = OpenTransientFile (path , O_RDONLY | PG_BINARY );
2832
+ * fd = PathNameOpenFile (path , O_RDONLY | PG_BINARY );
2833
+
2834
+ /* No harm in resetting the offset even in case of failure */
2835
+ file -> curOffset = 0 ;
2836
+
2814
2837
if (* fd < 0 && errno == ENOENT )
2815
2838
{
2816
2839
* fd = -1 ;
@@ -2830,14 +2853,14 @@ ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn,
2830
2853
* end of this file.
2831
2854
*/
2832
2855
ReorderBufferSerializeReserve (rb , sizeof (ReorderBufferDiskChange ));
2833
- pgstat_report_wait_start ( WAIT_EVENT_REORDER_BUFFER_READ );
2834
- readBytes = read ( * fd , rb -> outbuf , sizeof (ReorderBufferDiskChange ));
2835
- pgstat_report_wait_end ( );
2856
+ readBytes = FileRead ( file -> vfd , rb -> outbuf ,
2857
+ sizeof (ReorderBufferDiskChange ),
2858
+ file -> curOffset , WAIT_EVENT_REORDER_BUFFER_READ );
2836
2859
2837
2860
/* eof */
2838
2861
if (readBytes == 0 )
2839
2862
{
2840
- CloseTransientFile (* fd );
2863
+ FileClose (* fd );
2841
2864
* fd = -1 ;
2842
2865
(* segno )++ ;
2843
2866
continue ;
@@ -2853,16 +2876,19 @@ ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn,
2853
2876
readBytes ,
2854
2877
(uint32 ) sizeof (ReorderBufferDiskChange ))));
2855
2878
2879
+ file -> curOffset += readBytes ;
2880
+
2856
2881
ondisk = (ReorderBufferDiskChange * ) rb -> outbuf ;
2857
2882
2858
2883
ReorderBufferSerializeReserve (rb ,
2859
2884
sizeof (ReorderBufferDiskChange ) + ondisk -> size );
2860
2885
ondisk = (ReorderBufferDiskChange * ) rb -> outbuf ;
2861
2886
2862
- pgstat_report_wait_start (WAIT_EVENT_REORDER_BUFFER_READ );
2863
- readBytes = read (* fd , rb -> outbuf + sizeof (ReorderBufferDiskChange ),
2864
- ondisk -> size - sizeof (ReorderBufferDiskChange ));
2865
- pgstat_report_wait_end ();
2887
+ readBytes = FileRead (file -> vfd ,
2888
+ rb -> outbuf + sizeof (ReorderBufferDiskChange ),
2889
+ ondisk -> size - sizeof (ReorderBufferDiskChange ),
2890
+ file -> curOffset ,
2891
+ WAIT_EVENT_REORDER_BUFFER_READ );
2866
2892
2867
2893
if (readBytes < 0 )
2868
2894
ereport (ERROR ,
@@ -2875,6 +2901,8 @@ ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn,
2875
2901
readBytes ,
2876
2902
(uint32 ) (ondisk -> size - sizeof (ReorderBufferDiskChange )))));
2877
2903
2904
+ file -> curOffset += readBytes ;
2905
+
2878
2906
/*
2879
2907
* ok, read a full change from disk, now restore it into proper
2880
2908
* in-memory format
0 commit comments