1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
|
/*-------------------------------------------------------------------------
*
* execRemote.h
*
* Functions to execute commands on multiple Datanodes
*
*
* Portions Copyright (c) 2012-2014, TransLattice, Inc.
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
* Portions Copyright (c) 2010-2012 Postgres-XC Development Group
*
* src/include/pgxc/execRemote.h
*
*-------------------------------------------------------------------------
*/
#ifndef EXECREMOTE_H
#define EXECREMOTE_H
#include "locator.h"
#include "nodes/nodes.h"
#include "pgxcnode.h"
#include "planner.h"
#ifdef XCP
#include "squeue.h"
#include "remotecopy.h"
#endif
#include "access/tupdesc.h"
#include "executor/tuptable.h"
#include "nodes/execnodes.h"
#include "nodes/pg_list.h"
#include "tcop/dest.h"
#include "tcop/pquery.h"
#include "utils/snapshot.h"
/* Outputs of handle_response() */
#define RESPONSE_EOF EOF
#define RESPONSE_COMPLETE 0
#define RESPONSE_SUSPENDED 1
#define RESPONSE_TUPDESC 2
#define RESPONSE_DATAROW 3
#define RESPONSE_COPY 4
#define RESPONSE_BARRIER_OK 5
#ifdef XCP
#define RESPONSE_ERROR 6
#define RESPONSE_READY 10
#define RESPONSE_WAITXIDS 11
#define RESPONSE_ASSIGN_GXID 12
#endif
typedef enum
{
REQUEST_TYPE_NOT_DEFINED, /* not determined yet */
REQUEST_TYPE_COMMAND, /* OK or row count response */
REQUEST_TYPE_QUERY, /* Row description response */
REQUEST_TYPE_COPY_IN, /* Copy In response */
REQUEST_TYPE_COPY_OUT, /* Copy Out response */
REQUEST_TYPE_ERROR /* Error, ignore responses */
} RequestType;
/*
* Type of requests associated to a remote COPY OUT
*/
typedef enum
{
REMOTE_COPY_NONE, /* Not defined yet */
REMOTE_COPY_STDOUT, /* Send back to client */
REMOTE_COPY_FILE, /* Write in file */
REMOTE_COPY_TUPLESTORE /* Store data in tuplestore */
} RemoteCopyType;
/* Combines results of INSERT statements using multiple values */
typedef struct CombineTag
{
CmdType cmdType; /* DML command type */
char data[COMPLETION_TAG_BUFSIZE]; /* execution result combination data */
} CombineTag;
/*
* Common part for all plan state nodes needed to access remote datanodes
* ResponseCombiner must be the first field of the plan state node so we can
* typecast
*/
typedef struct ResponseCombiner
{
ScanState ss; /* its first field is NodeTag */
int node_count; /* total count of participating nodes */
PGXCNodeHandle **connections; /* Datanode connections being combined */
int conn_count; /* count of active connections */
int current_conn; /* used to balance load when reading from connections */
long current_conn_rows_consumed;
CombineType combine_type; /* see CombineType enum */
int command_complete_count; /* count of received CommandComplete messages */
RequestType request_type; /* see RequestType enum */
TupleDesc tuple_desc; /* tuple descriptor to be referenced by emitted tuples */
int description_count; /* count of received RowDescription messages */
int copy_in_count; /* count of received CopyIn messages */
int copy_out_count; /* count of received CopyOut messages */
FILE *copy_file; /* used if copy_dest == COPY_FILE */
uint64 processed; /* count of data rows handled */
char errorCode[5]; /* error code to send back to client */
char *errorMessage; /* error message to send back to client */
char *errorDetail; /* error detail to send back to client */
char *errorHint; /* error hint to send back to client */
Oid returning_node; /* returning replicated node */
RemoteDataRow currentRow; /* next data ro to be wrapped into a tuple */
/* TODO use a tuplestore as a rowbuffer */
List *rowBuffer; /* buffer where rows are stored when connection
* should be cleaned for reuse by other RemoteQuery */
/*
* To handle special case - if there is a simple sort and sort connection
* is buffered. If EOF is reached on a connection it should be removed from
* the array, but we need to know node number of the connection to find
* messages in the buffer. So we store nodenum to that array if reach EOF
* when buffering
*/
Oid *tapenodes;
/*
* If some tape (connection) is buffered, contains a reference on the cell
* right before first row buffered from this tape, needed to speed up
* access to the data
*/
ListCell **tapemarks;
bool merge_sort; /* perform mergesort of node tuples */
bool extended_query; /* running extended query protocol */
bool probing_primary; /* trying replicated on primary node */
void *tuplesortstate; /* for merge sort */
/* COPY support */
RemoteCopyType remoteCopyType;
Tuplestorestate *tuplestorestate;
/* cursor support */
char *cursor; /* cursor name */
char *update_cursor; /* throw this cursor current tuple can be updated */
int cursor_count; /* total count of participating nodes */
PGXCNodeHandle **cursor_connections;/* data node connections being combined */
} ResponseCombiner;
typedef struct RemoteQueryState
{
ResponseCombiner combiner; /* see ResponseCombiner struct */
bool query_Done; /* query has been sent down to Datanodes */
/*
* While we are not supporting grouping use this flag to indicate we need
* to initialize collecting of aggregates from the DNs
*/
bool initAggregates;
/* Simple DISTINCT support */
FmgrInfo *eqfunctions; /* functions to compare tuples */
MemoryContext tmp_ctx; /* separate context is needed to compare tuples */
/* Support for parameters */
char *paramval_data; /* parameter data, format is like in BIND */
int paramval_len; /* length of parameter values data */
Oid *rqs_param_types; /* Types of the remote params */
int rqs_num_params;
int eflags; /* capability flags to pass to tuplestore */
bool eof_underlying; /* reached end of underlying plan? */
} RemoteQueryState;
typedef struct RemoteParam
{
ParamKind paramkind; /* kind of parameter */
int paramid; /* numeric ID for parameter */
Oid paramtype; /* pg_type OID of parameter's datatype */
int paramused; /* is param used */
} RemoteParam;
/*
* Execution state of a RemoteSubplan node
*/
typedef struct RemoteSubplanState
{
ResponseCombiner combiner; /* see ResponseCombiner struct */
char *subplanstr; /* subplan encoded as a string */
bool bound; /* subplan is sent down to the nodes */
bool local_exec; /* execute subplan on this datanode */
Locator *locator; /* determine destination of tuples of
* locally executed plan */
int *dest_nodes; /* allocate once */
List *execNodes; /* where to execute subplan */
/* should query be executed on all (true) or any (false) node specified
* in the execNodes list */
bool execOnAll;
int nParamRemote; /* number of params sent from the master node */
RemoteParam *remoteparams; /* parameter descriptors */
} RemoteSubplanState;
/*
* Data needed to set up a PreparedStatement on the remote node and other data
* for the remote executor
*/
typedef struct RemoteStmt
{
NodeTag type;
CmdType commandType; /* select|insert|update|delete */
bool hasReturning; /* is it insert|update|delete RETURNING? */
struct Plan *planTree; /* tree of Plan nodes */
List *rtable; /* list of RangeTblEntry nodes */
/* rtable indexes of target relations for INSERT/UPDATE/DELETE */
List *resultRelations; /* integer list of RT indexes, or NIL */
List *subplans; /* Plan trees for SubPlan expressions */
int nParamExec; /* number of PARAM_EXEC Params used */
int nParamRemote; /* number of params sent from the master node */
RemoteParam *remoteparams; /* parameter descriptors */
List *rowMarks;
char distributionType;
AttrNumber distributionKey;
List *distributionNodes;
List *distributionRestrict;
} RemoteStmt;
extern int PGXLRemoteFetchSize;
typedef void (*xact_callback) (bool isCommit, void *args);
/* Copy command just involves Datanodes */
extern void DataNodeCopyBegin(RemoteCopyData *rcstate);
extern int DataNodeCopyIn(char *data_row, int len, int conn_count,
PGXCNodeHandle** copy_connections,
bool binary);
extern uint64 DataNodeCopyOut(PGXCNodeHandle** copy_connections,
int conn_count, FILE* copy_file);
extern uint64 DataNodeCopyStore(PGXCNodeHandle** copy_connections,
int conn_count, Tuplestorestate* store);
extern void DataNodeCopyFinish(int conn_count, PGXCNodeHandle** connections);
extern int DataNodeCopyInBinaryForAll(char *msg_buf, int len, int conn_count,
PGXCNodeHandle** connections);
extern bool DataNodeCopyEnd(PGXCNodeHandle *handle, bool is_error);
extern RemoteQueryState *ExecInitRemoteQuery(RemoteQuery *node, EState *estate, int eflags);
extern TupleTableSlot* ExecRemoteQuery(PlanState *pstate);
extern void ExecReScanRemoteQuery(RemoteQueryState *node);
extern void ExecEndRemoteQuery(RemoteQueryState *step);
extern void RemoteSubplanMakeUnique(Node *plan, int unique);
extern RemoteSubplanState *ExecInitRemoteSubplan(RemoteSubplan *node, EState *estate, int eflags);
extern void ExecFinishInitRemoteSubplan(RemoteSubplanState *node);
extern TupleTableSlot* ExecRemoteSubplan(PlanState *pstate);
extern void ExecEndRemoteSubplan(RemoteSubplanState *node);
extern void ExecReScanRemoteSubplan(RemoteSubplanState *node);
extern void ExecRemoteUtility(RemoteQuery *node);
extern bool is_data_node_ready(PGXCNodeHandle * conn);
extern int handle_response(PGXCNodeHandle *conn, ResponseCombiner *combiner);
extern void HandleCmdComplete(CmdType commandType, CombineTag *combine, const char *msg_body,
size_t len);
#define CHECK_OWNERSHIP(conn, node) \
do { \
if ((conn)->state == DN_CONNECTION_STATE_QUERY && \
(conn)->combiner && \
(conn)->combiner != (ResponseCombiner *) (node)) \
BufferConnection(conn); \
(conn)->combiner = (ResponseCombiner *) (node); \
} while(0)
extern TupleTableSlot *FetchTuple(ResponseCombiner *combiner);
extern void InitResponseCombiner(ResponseCombiner *combiner, int node_count,
CombineType combine_type);
extern void CloseCombiner(ResponseCombiner *combiner);
extern void BufferConnection(PGXCNodeHandle *conn);
extern void ExecRemoteQueryReScan(RemoteQueryState *node, ExprContext *exprCtxt);
extern void SetDataRowForExtParams(ParamListInfo params, RemoteQueryState *rq_state);
extern void ExecCloseRemoteStatement(const char *stmt_name, List *nodelist);
extern char *PrePrepare_Remote(char *prepareGID, bool localNode, bool implicit);
extern void PostPrepare_Remote(char *prepareGID, bool implicit);
extern void PreCommit_Remote(char *prepareGID, char *nodestring, bool preparedLocalNode);
extern bool PreAbort_Remote(void);
extern void AtEOXact_Remote(void);
extern bool IsTwoPhaseCommitRequired(bool localWrite);
extern bool FinishRemotePreparedTransaction(char *prepareGID, bool commit);
extern char *GetImplicit2PCGID(const char *implicit2PC_head, bool localWrite);
extern void pgxc_all_success_nodes(ExecNodes **d_nodes, ExecNodes **c_nodes, char **failednodes_msg);
extern void AtEOXact_DBCleanup(bool isCommit);
extern void set_dbcleanup_callback(xact_callback function, void *paraminfo, int paraminfo_size);
#endif
|