You can subscribe to this list here.
2010 |
Jan
|
Feb
|
Mar
|
Apr
(4) |
May
(28) |
Jun
(12) |
Jul
(11) |
Aug
(12) |
Sep
(5) |
Oct
(19) |
Nov
(14) |
Dec
(12) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2011 |
Jan
(18) |
Feb
(30) |
Mar
(115) |
Apr
(89) |
May
(50) |
Jun
(44) |
Jul
(22) |
Aug
(13) |
Sep
(11) |
Oct
(30) |
Nov
(28) |
Dec
(39) |
2012 |
Jan
(38) |
Feb
(18) |
Mar
(43) |
Apr
(91) |
May
(108) |
Jun
(46) |
Jul
(37) |
Aug
(44) |
Sep
(33) |
Oct
(29) |
Nov
(36) |
Dec
(15) |
2013 |
Jan
(35) |
Feb
(611) |
Mar
(5) |
Apr
(55) |
May
(30) |
Jun
(28) |
Jul
(458) |
Aug
(34) |
Sep
(9) |
Oct
(39) |
Nov
(22) |
Dec
(32) |
2014 |
Jan
(16) |
Feb
(16) |
Mar
(42) |
Apr
(179) |
May
(7) |
Jun
(6) |
Jul
(9) |
Aug
|
Sep
(4) |
Oct
|
Nov
(3) |
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
(2) |
May
(4) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
S | M | T | W | T | F | S |
---|---|---|---|---|---|---|
|
|
|
1
|
2
|
3
|
4
|
5
|
6
(1) |
7
|
8
|
9
|
10
|
11
|
12
|
13
|
14
(1) |
15
|
16
|
17
|
18
|
19
|
20
|
21
|
22
(1) |
23
|
24
|
25
|
26
(1) |
27
(1) |
28
|
29
|
30
|
|
|
From: mason_s <ma...@us...> - 2010-09-06 23:58:58
|
Project "Postgres-XC". The branch, master has been updated via 19a8fa536779653524a1feb862c18277efa317f4 (commit) from 06c882f78694a31749746aad0cb76347a3f7bcef (commit) - Log ----------------------------------------------------------------- commit 19a8fa536779653524a1feb862c18277efa317f4 Author: Mason Sharp <ma...@us...> Date: Mon Sep 6 19:54:53 2010 -0400 Improved error handling. The primary focus is to better handle the case of a stopped or crashed data node on the coordinator. Also, before a rollback make sure connections are clean. If there was an error, tell the pooler to destroy the connections instead of returning them to the pools, even the data node connections that did not have an error but are involved in the statement. This is becaue there may be some remaining messages buffered or in transit, and could affect subsequent requests. diff --git a/src/backend/pgxc/pool/datanode.c b/src/backend/pgxc/pool/datanode.c index ba56ca1..31b5bc0 100644 --- a/src/backend/pgxc/pool/datanode.c +++ b/src/backend/pgxc/pool/datanode.c @@ -37,7 +37,6 @@ #include "utils/snapmgr.h" #include "../interfaces/libpq/libpq-fe.h" -#define NO_SOCKET -1 static int node_count = 0; static DataNodeHandle *handles = NULL; @@ -280,7 +279,8 @@ retry: { add_error_message(conn, "unexpected EOF on datanode connection"); elog(WARNING, "unexpected EOF on datanode connection"); - return EOF; + /* Should we read from the other connections before returning? */ + return EOF; } else { @@ -429,6 +429,18 @@ retry: } +/* + * Clear out socket data and buffer. + * Throw away any data. + */ +void +clear_socket_data (DataNodeHandle *conn) +{ + do { + conn->inStart = conn->inCursor = conn->inEnd = 0; + } while (data_node_read_data(conn) > 0); +} + /* * Get one character from the connection buffer and advance cursor */ @@ -529,14 +541,20 @@ get_message(DataNodeHandle *conn, int *len, char **msg) } -/* Release all data node connections back to pool and release occupied memory */ +/* + * Release all data node connections back to pool and release occupied memory + * + * If force_drop is true, we force dropping all of the connections, such as after + * a rollback, which was likely issued due to an error. + */ void -release_handles(void) +release_handles(bool force_drop) { int i; int discard[NumDataNodes]; int ndisc = 0; + if (node_count == 0) return; @@ -546,7 +564,9 @@ release_handles(void) if (handle->sock != NO_SOCKET) { - if (handle->state != DN_CONNECTION_STATE_IDLE) + if (force_drop) + discard[ndisc++] = handle->nodenum; + else if (handle->state != DN_CONNECTION_STATE_IDLE) { elog(WARNING, "Connection to data node %d has unexpected state %d and will be dropped", handle->nodenum, handle->state); discard[ndisc++] = handle->nodenum; @@ -1070,6 +1090,12 @@ get_transaction_nodes(DataNodeHandle **connections) { for (i = 0; i < NumDataNodes; i++) { + /* + * We may want to consider also not returning connections with a + * state of DN_CONNECTION_STATE_ERROR_NOT_READY or + * DN_CONNECTION_STATE_ERROR_FATAL. + * ERROR_NOT_READY can happen if the data node abruptly disconnects. + */ if (handles[i].sock != NO_SOCKET && handles[i].transaction_status != 'I') connections[tran_count++] = &handles[i]; } @@ -1077,3 +1103,29 @@ get_transaction_nodes(DataNodeHandle **connections) return tran_count; } + +/* + * Return those node connections that appear to be active and + * have data to consume on them. + */ +int +get_active_nodes (DataNodeHandle **connections) +{ + int active_count = 0; + int i; + + if (node_count) + { + for (i = 0; i < NumDataNodes; i++) + { + if (handles[i].sock != NO_SOCKET && + handles[i].state != DN_CONNECTION_STATE_IDLE && + handles[i].state != DN_CONNECTION_STATE_ERROR_NOT_READY && + handles[i].state != DN_CONNECTION_STATE_ERROR_FATAL) + connections[active_count++] = &handles[i]; + } + } + + return active_count; +} + diff --git a/src/backend/pgxc/pool/execRemote.c b/src/backend/pgxc/pool/execRemote.c index f065289..05dbe2e 100644 --- a/src/backend/pgxc/pool/execRemote.c +++ b/src/backend/pgxc/pool/execRemote.c @@ -15,6 +15,7 @@ *------------------------------------------------------------------------- */ +#include <time.h> #include "postgres.h" #include "access/gtm.h" #include "access/xact.h" @@ -30,6 +31,10 @@ #include "utils/tuplesort.h" #include "utils/snapmgr.h" +#define END_QUERY_TIMEOUT 20 +#define CLEAR_TIMEOUT 5 + + extern char *deparseSql(RemoteQueryState *scanstate); /* @@ -50,6 +55,9 @@ static int data_node_rollback(int conn_count, DataNodeHandle ** connections); static void clear_write_node_list(); +static int handle_response_clear(DataNodeHandle * conn); + + #define MAX_STATEMENTS_PER_TRAN 10 /* Variables to collect statistics */ @@ -761,7 +769,8 @@ HandleError(RemoteQueryState *combiner, char *msg_body, size_t len) { combiner->errorMessage = pstrdup(message); /* Error Code is exactly 5 significant bytes */ - memcpy(combiner->errorCode, code, 5); + if (code) + memcpy(combiner->errorCode, code, 5); } /* @@ -916,7 +925,7 @@ data_node_receive_responses(const int conn_count, DataNodeHandle ** connections, * Read results. * Note we try and read from data node connections even if there is an error on one, * so as to avoid reading incorrect results on the next statement. - * It might be better to just destroy these connections and tell the pool manager. + * Other safegaurds exist to avoid this, however. */ while (count > 0) { @@ -971,6 +980,7 @@ handle_response(DataNodeHandle * conn, RemoteQueryState *combiner) { char *msg; int msg_len; + char msg_type; for (;;) { @@ -991,7 +1001,8 @@ handle_response(DataNodeHandle * conn, RemoteQueryState *combiner) } /* TODO handle other possible responses */ - switch (get_message(conn, &msg_len, &msg)) + msg_type = get_message(conn, &msg_len, &msg); + switch (msg_type) { case '\0': /* Not enough data in the buffer */ conn->state = DN_CONNECTION_STATE_QUERY; @@ -1056,15 +1067,85 @@ handle_response(DataNodeHandle * conn, RemoteQueryState *combiner) case 'I': /* EmptyQuery */ default: /* sync lost? */ + elog(WARNING, "Received unsupported message type: %c", msg_type); conn->state = DN_CONNECTION_STATE_ERROR_FATAL; return RESPONSE_EOF; } } - /* Keep compiler quiet */ + return RESPONSE_EOF; } /* + * Like handle_response, but for consuming the messages, + * in case we of an error to clean the data node connection. + * Return values: + * RESPONSE_EOF - need to receive more data for the connection + * RESPONSE_COMPLETE - done with the connection, or done trying (error) + */ +static int +handle_response_clear(DataNodeHandle * conn) +{ + char *msg; + int msg_len; + char msg_type; + + for (;;) + { + /* No data available, exit */ + if (conn->state == DN_CONNECTION_STATE_QUERY) + return RESPONSE_EOF; + + /* + * If we are in the process of shutting down, we + * may be rolling back, and the buffer may contain other messages. + * We want to avoid a procarray exception + * as well as an error stack overflow. + */ + if (proc_exit_inprogress) + { + conn->state = DN_CONNECTION_STATE_ERROR_FATAL; + return RESPONSE_COMPLETE; + } + + msg_type = get_message(conn, &msg_len, &msg); + switch (msg_type) + { + case '\0': /* Not enough data in the buffer */ + case 'c': /* CopyToCommandComplete */ + case 'C': /* CommandComplete */ + case 'T': /* RowDescription */ + case 'D': /* DataRow */ + case 'H': /* CopyOutResponse */ + case 'd': /* CopyOutDataRow */ + case 'A': /* NotificationResponse */ + case 'N': /* NoticeResponse */ + break; + case 'E': /* ErrorResponse */ + conn->state = DN_CONNECTION_STATE_ERROR_NOT_READY; + /* + * Do not return with an error, we still need to consume Z, + * ready-for-query + */ + break; + case 'Z': /* ReadyForQuery */ + conn->transaction_status = msg[0]; + conn->state = DN_CONNECTION_STATE_IDLE; + return RESPONSE_COMPLETE; + case 'I': /* EmptyQuery */ + default: + /* sync lost? */ + elog(WARNING, "Received unsupported message type: %c", msg_type); + conn->state = DN_CONNECTION_STATE_ERROR_FATAL; + return RESPONSE_COMPLETE; + } + } + + return RESPONSE_EOF; +} + + +/* * Send BEGIN command to the Data nodes and receive responses */ static int @@ -1150,13 +1231,13 @@ finish: if (!autocommit) stat_transaction(tran_count); if (!PersistentConnections) - release_handles(); + release_handles(false); autocommit = true; clear_write_node_list(); if (res != 0) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Could not commit connection on data nodes"))); + errmsg("Could not commit (or autocommit) data node connection"))); } @@ -1271,6 +1352,7 @@ finish: /* * Rollback current transaction + * This will happen */ int DataNodeRollback(void) @@ -1279,6 +1361,10 @@ DataNodeRollback(void) int tran_count; DataNodeHandle *connections[NumDataNodes]; + + /* Consume any messages on the data nodes first if necessary */ + DataNodeConsumeMessages(); + /* gather connections to rollback */ tran_count = get_transaction_nodes(connections); @@ -1296,7 +1382,7 @@ finish: if (!autocommit) stat_transaction(tran_count); if (!PersistentConnections) - release_handles(); + release_handles(true); autocommit = true; clear_write_node_list(); return res; @@ -1313,11 +1399,19 @@ data_node_rollback(int conn_count, DataNodeHandle ** connections) struct timeval *timeout = NULL; RemoteQueryState *combiner; + + /* + * Rollback is a special case, being issued because of an error. + * We try to read and throw away any extra data on the connection before + * issuing our rollbacks so that we did not read the results of the + * previous command. + */ + for (i = 0; i < conn_count; i++) + clear_socket_data(connections[i]); + /* Send ROLLBACK - */ for (i = 0; i < conn_count; i++) - { data_node_send_query(connections[i], "ROLLBACK"); - } combiner = CreateResponseCombiner(conn_count, COMBINE_TYPE_NONE); /* Receive responses */ @@ -1487,7 +1581,7 @@ DataNodeCopyBegin(const char *query, List *nodelist, Snapshot snapshot, bool is_ if (need_tran) DataNodeCopyFinish(connections, 0, COMBINE_TYPE_NONE); else if (!PersistentConnections) - release_handles(); + release_handles(false); } pfree(connections); @@ -1711,7 +1805,7 @@ DataNodeCopyOut(Exec_Nodes *exec_nodes, DataNodeHandle** copy_connections, FILE* if (!ValidateAndCloseCombiner(combiner)) { if (autocommit && !PersistentConnections) - release_handles(); + release_handles(false); pfree(copy_connections); ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), @@ -2136,8 +2230,10 @@ ExecRemoteQuery(RemoteQueryState *node) if (connections[i]->transaction_status != 'T') new_connections[new_count++] = connections[i]; - if (new_count) - data_node_begin(new_count, new_connections, gxid); + if (new_count && data_node_begin(new_count, new_connections, gxid)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Could not begin transaction on data nodes."))); } /* Get the SQL string */ @@ -2292,7 +2388,7 @@ ExecRemoteQuery(RemoteQueryState *node) { ExecSetSlotDescriptor(scanslot, node->tuple_desc); /* - * Now tuple table slot is responcible for freeing the + * Now tuple table slot is responsible for freeing the * descriptor */ node->tuple_desc = NULL; @@ -2492,9 +2588,88 @@ ExecRemoteQuery(RemoteQueryState *node) return resultslot; } +/* + * End the remote query + */ void ExecEndRemoteQuery(RemoteQueryState *node) { + + /* + * If processing was interrupted, (ex: client did not consume all the data, + * or a subquery with LIMIT) we may still have data on the nodes. Try and consume. + * We do not simply call DataNodeConsumeMessages, because the same + * connection could be used for multiple RemoteQuery steps. + * + * It seems most stable checking command_complete_count + * and only then working with conn_count + * + * PGXCTODO: Change in the future when we remove materialization nodes. + */ + if (node->command_complete_count < node->node_count) + { + elog(WARNING, "Extra data node messages when ending remote query step"); + + while (node->conn_count > 0) + { + int i = 0; + int res; + + /* + * Just consume the rest of the messages + */ + if ((i = node->current_conn + 1) == node->conn_count) + i = 0; + + for (;;) + { + /* throw away message */ + if (node->msg) + { + pfree(node->msg); + node->msg = NULL; + } + + res = handle_response(node->connections[i], node); + + if (res == RESPONSE_COMPLETE || + node->connections[i]->state == DN_CONNECTION_STATE_ERROR_FATAL || + node->connections[i]->state == DN_CONNECTION_STATE_ERROR_NOT_READY) + { + if (--node->conn_count == 0) + break; + if (i == node->conn_count) + i = 0; + else + node->connections[i] = node->connections[node->conn_count]; + if (node->current_conn == node->conn_count) + node->current_conn = i; + } + else if (res == RESPONSE_EOF) + { + /* go to next connection */ + if (++i == node->conn_count) + i = 0; + + /* if we cycled over all connections we need to receive more */ + if (i == node->current_conn) + { + struct timeval timeout; + timeout.tv_sec = END_QUERY_TIMEOUT; + timeout.tv_usec = 0; + + if (data_node_receive(node->conn_count, node->connections, &timeout)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Failed to read response from data nodes when ending query"))); + } + } + } + } + elog(WARNING, "Data node connection buffers cleaned"); + } + + /* * Release tuplesort resources */ @@ -2517,6 +2692,64 @@ ExecEndRemoteQuery(RemoteQueryState *node) CloseCombiner(node); } +/* + * Consume any remaining messages on the connections. + * This is useful for calling after ereport() + */ +void +DataNodeConsumeMessages(void) +{ + int i; + int active_count = 0; + int res; + struct timeval timeout; + DataNodeHandle *connection = NULL; + DataNodeHandle **connections = NULL; + DataNodeHandle *active_connections[NumDataNodes]; + + + active_count = get_active_nodes(active_connections); + + /* Iterate through handles in use and try and clean */ + for (i = 0; i < active_count; i++) + { + elog(WARNING, "Consuming data node messages after error."); + + connection = active_connections[i]; + + res = RESPONSE_EOF; + + while (res != RESPONSE_COMPLETE) + { + int res = handle_response_clear(connection); + + if (res == RESPONSE_EOF) + { + if (!connections) + connections = (DataNodeHandle **) palloc(sizeof(DataNodeHandle*)); + + connections[0] = connection; + + /* Use a timeout so we do not wait forever */ + timeout.tv_sec = CLEAR_TIMEOUT; + timeout.tv_usec = 0; + if (data_node_receive(1, connections, &timeout)) + { + /* Mark this as bad, move on to next one */ + connection->state = DN_CONNECTION_STATE_ERROR_FATAL; + break; + } + } + if (connection->state == DN_CONNECTION_STATE_ERROR_FATAL + || connection->state == DN_CONNECTION_STATE_IDLE) + break; + } + } + + if (connections) + pfree(connections); +} + /* ---------------------------------------------------------------- * ExecRemoteQueryReScan @@ -2609,8 +2842,11 @@ ExecRemoteUtility(RemoteQuery *node) if (connections[i]->transaction_status != 'T') new_connections[new_count++] = connections[i]; - if (new_count) - data_node_begin(new_count, new_connections, gxid); + if (new_count && data_node_begin(new_count, new_connections, gxid)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Could not begin transaction on data nodes"))); + } /* See if we have a primary nodes, execute on it first before the others */ @@ -2760,10 +2996,11 @@ DataNodeCleanAndRelease(int code, Datum arg) /* Rollback on GTM if transaction id opened. */ RollbackTranGTM((GlobalTransactionId) GetCurrentTransactionIdIfAny()); - } - /* Release data node connections */ - release_handles(); + release_handles(true); + } else + /* Release data node connections */ + release_handles(false); /* Close connection with GTM */ CloseGTM(); diff --git a/src/include/pgxc/datanode.h b/src/include/pgxc/datanode.h index 4202e2e..4039c45 100644 --- a/src/include/pgxc/datanode.h +++ b/src/include/pgxc/datanode.h @@ -23,6 +23,9 @@ #include "utils/snapshot.h" #include <unistd.h> +#define NO_SOCKET -1 + + /* Connection to data node maintained by Pool Manager */ typedef struct PGconn NODE_CONNECTION; @@ -80,8 +83,9 @@ extern int DataNodeConnClean(NODE_CONNECTION * conn); extern void DataNodeCleanAndRelease(int code, Datum arg); extern DataNodeHandle **get_handles(List *nodelist); -extern void release_handles(void); +extern void release_handles(bool force_drop); extern int get_transaction_nodes(DataNodeHandle ** connections); +extern int get_active_nodes(DataNodeHandle ** connections); extern int ensure_in_buffer_capacity(size_t bytes_needed, DataNodeHandle * handle); extern int ensure_out_buffer_capacity(size_t bytes_needed, DataNodeHandle * handle); @@ -100,5 +104,6 @@ extern int data_node_flush(DataNodeHandle *handle); extern char get_message(DataNodeHandle *conn, int *len, char **msg); extern void add_error_message(DataNodeHandle * handle, const char *message); +extern void clear_socket_data (DataNodeHandle *conn); #endif diff --git a/src/include/pgxc/execRemote.h b/src/include/pgxc/execRemote.h index 143c8fa..fbc4db0 100644 --- a/src/include/pgxc/execRemote.h +++ b/src/include/pgxc/execRemote.h @@ -96,6 +96,7 @@ extern int handle_response(DataNodeHandle * conn, RemoteQueryState *combiner); extern bool FetchTuple(RemoteQueryState *combiner, TupleTableSlot *slot); extern void ExecRemoteQueryReScan(RemoteQueryState *node, ExprContext *exprCtxt); +extern void DataNodeConsumeMessages(void); extern int primary_data_node; #endif ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/pool/datanode.c | 62 ++++++++- src/backend/pgxc/pool/execRemote.c | 275 +++++++++++++++++++++++++++++++++--- src/include/pgxc/datanode.h | 7 +- src/include/pgxc/execRemote.h | 1 + 4 files changed, 320 insertions(+), 25 deletions(-) hooks/post-receive -- Postgres-XC |