graphprotocol · lutter · Apr 3, 2025 · Apr 1, 2025 · Apr 1, 2025 · Apr 1, 2025
diff --git a/graph/src/env/store.rs b/graph/src/env/store.rs
@@ -280,7 +280,7 @@ pub struct InnerStore {
     last_rollup_from_poi: bool,
     #[envconfig(from = "GRAPH_STORE_INSERT_EXTRA_COLS", default = "0")]
     insert_extra_cols: usize,
-    #[envconfig(from = "GRAPH_STORE_FDW_FETCH_SIZE", default = "10000")]
+    #[envconfig(from = "GRAPH_STORE_FDW_FETCH_SIZE", default = "1000")]
     fdw_fetch_size: usize,
 }
 

diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs
@@ -347,12 +347,14 @@ impl PoolName {
 #[derive(Clone)]
 struct PoolStateTracker {
     available: Arc<AtomicBool>,
+    ignore_timeout: Arc<AtomicBool>,
 }
 
 impl PoolStateTracker {
     fn new() -> Self {
         Self {
             available: Arc::new(AtomicBool::new(true)),
+            ignore_timeout: Arc::new(AtomicBool::new(false)),
         }
     }
 
@@ -367,6 +369,20 @@ impl PoolStateTracker {
     fn is_available(&self) -> bool {
         self.available.load(Ordering::Relaxed)
     }
+
+    fn timeout_is_ignored(&self) -> bool {
+        self.ignore_timeout.load(Ordering::Relaxed)
+    }
+
+    fn ignore_timeout<F, R>(&self, f: F) -> R
+    where
+        F: FnOnce() -> R,
+    {
+        self.ignore_timeout.store(true, Ordering::Relaxed);
+        let res = f();
+        self.ignore_timeout.store(false, Ordering::Relaxed);
+        res
+    }
 }
 
 impl ConnectionPool {
@@ -530,8 +546,12 @@ impl ConnectionPool {
         &self,
         logger: &Logger,
         timeout: Duration,
-    ) -> Result<Option<PooledConnection<ConnectionManager<PgConnection>>>, StoreError> {
-        self.get_ready()?.try_get_fdw(logger, timeout)
+    ) -> Option<PooledConnection<ConnectionManager<PgConnection>>> {
+        let Ok(inner) = self.get_ready() else {
+            return None;
+        };
+        self.state_tracker
+            .ignore_timeout(|| inner.try_get_fdw(logger, timeout))
     }
 
     pub fn connection_detail(&self) -> Result<ForeignServer, StoreError> {
@@ -740,6 +760,9 @@ impl HandleEvent for EventHandler {
     }
 
     fn handle_timeout(&self, event: e::TimeoutEvent) {
+        if self.state_tracker.timeout_is_ignored() {
+            return;
+        }
         self.add_conn_wait_time(event.timeout());
         if self.state_tracker.is_available() {
             error!(self.logger, "Connection checkout timed out";
@@ -1042,15 +1065,18 @@ impl PoolInner {
         &self,
         logger: &Logger,
         timeout: Duration,
-    ) -> Result<Option<PooledConnection<ConnectionManager<PgConnection>>>, StoreError> {
+    ) -> Option<PooledConnection<ConnectionManager<PgConnection>>> {
         // Any error trying to get a connection is treated as "couldn't get
         // a connection in time". If there is a serious error with the
         // database, e.g., because it's not available, the next database
         // operation will run into it and report it.
-        self.fdw_pool(logger)?
-            .get_timeout(timeout)
-            .map(|conn| Some(conn))
-            .or_else(|_| Ok(None))
+        let Ok(fdw_pool) = self.fdw_pool(logger) else {
+            return None;
+        };
+        let Ok(conn) = fdw_pool.get_timeout(timeout) else {
+            return None;
+        };
+        Some(conn)
     }
 
     pub fn connection_detail(&self) -> Result<ForeignServer, StoreError> {

diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs
@@ -37,6 +37,7 @@ use graph::{
         info, lazy_static, o, warn, BlockNumber, BlockPtr, CheapClone, Logger, StoreError, ENV_VARS,
     },
     schema::EntityType,
+    slog::error,
 };
 use itertools::Itertools;
 
@@ -125,7 +126,7 @@ pub fn is_source(conn: &mut PgConnection, site: &Site) -> Result<bool, StoreErro
     .map_err(StoreError::from)
 }
 
-#[derive(Copy, Clone, PartialEq, Eq)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum Status {
     Finished,
     Cancelled,
@@ -687,9 +688,18 @@ impl CopyTableWorker {
         }
     }
 
-    async fn run(mut self, logger: Logger, progress: Arc<CopyProgress>) -> Self {
-        self.result = self.run_inner(logger, &progress);
-        self
+    async fn run(
+        mut self,
+        logger: Logger,
+        progress: Arc<CopyProgress>,
+    ) -> Result<Self, StoreError> {
+        let object = self.table.dst.object.cheap_clone();
+        graph::spawn_blocking_allow_panic(move || {
+            self.result = self.run_inner(logger, &progress);
+            self
+        })
+        .await
+        .map_err(|e| constraint_violation!("copy worker for {} panicked: {}", object, e))
     }
 
     fn run_inner(&mut self, logger: Logger, progress: &CopyProgress) -> Result<Status, StoreError> {
@@ -739,10 +749,19 @@ impl CopyTableWorker {
                             break status;
                         }
                         Err(StoreError::StatementTimeout) => {
+                            let timeout = ENV_VARS
+                                .store
+                                .batch_timeout
+                                .map(|t| t.as_secs().to_string())
+                                .unwrap_or_else(|| "unlimted".to_string());
                             warn!(
-                                    logger,
-                                    "Current batch took longer than GRAPH_STORE_BATCH_TIMEOUT seconds. Retrying with a smaller batch size."
-                                );
+                                logger,
+                                "Current batch timed out. Retrying with a smaller batch size.";
+                                "timeout_s" => timeout,
+                                "table" => self.table.dst.qualified_name.as_str(),
+                                "current_vid" => self.table.batcher.next_vid(),
+                                "current_batch_size" => self.table.batcher.batch_size(),
+                            );
                         }
                         Err(e) => {
                             return Err(e);
@@ -857,7 +876,7 @@ impl Connection {
     {
         let Some(conn) = self.conn.as_mut() else {
             return Err(constraint_violation!(
-                "copy connection has been handed to background task but not returned yet"
+                "copy connection has been handed to background task but not returned yet (transaction)"
             ));
         };
         conn.transaction(|conn| f(conn))
@@ -890,20 +909,18 @@ impl Connection {
         &mut self,
         state: &mut CopyState,
         progress: &Arc<CopyProgress>,
-    ) -> Result<Option<Pin<Box<dyn Future<Output = CopyTableWorker>>>>, StoreError> {
-        let conn = self.conn.take().ok_or_else(|| {
-            constraint_violation!(
-                "copy connection has been handed to background task but not returned yet"
-            )
-        })?;
+    ) -> Option<Pin<Box<dyn Future<Output = Result<CopyTableWorker, StoreError>>>>> {
+        let Some(conn) = self.conn.take() else {
+            return None;
+        };
         let Some(table) = state.unfinished.pop() else {
-            return Ok(None);
+            return None;
         };
 
         let worker = CopyTableWorker::new(conn, table);
-        Ok(Some(Box::pin(
+        Some(Box::pin(
             worker.run(self.logger.cheap_clone(), progress.cheap_clone()),
-        )))
+        ))
     }
 
     /// Opportunistically create an extra worker if we have more tables to
@@ -913,29 +930,74 @@ impl Connection {
         &mut self,
         state: &mut CopyState,
         progress: &Arc<CopyProgress>,
-    ) -> Result<Option<Pin<Box<dyn Future<Output = CopyTableWorker>>>>, StoreError> {
+    ) -> Option<Pin<Box<dyn Future<Output = Result<CopyTableWorker, StoreError>>>>> {
         // It's important that we get the connection before the table since
         // we remove the table from the state and could drop it otherwise
         let Some(conn) = self
             .pool
-            .try_get_fdw(&self.logger, ENV_VARS.store.batch_worker_wait)?
+            .try_get_fdw(&self.logger, ENV_VARS.store.batch_worker_wait)
         else {
-            return Ok(None);
+            return None;
         };
         let Some(table) = state.unfinished.pop() else {
-            return Ok(None);
+            return None;
         };
 
         let worker = CopyTableWorker::new(conn, table);
-        Ok(Some(Box::pin(
+        Some(Box::pin(
             worker.run(self.logger.cheap_clone(), progress.cheap_clone()),
-        )))
+        ))
+    }
+
+    /// Check that we can make progress, i.e., that we have at least one
+    /// worker that copies as long as there are unfinished tables. This is a
+    /// safety check to guard against `copy_data_internal` looping forever
+    /// because of some internal inconsistency
+    fn assert_progress(&self, num_workers: usize, state: &CopyState) -> Result<(), StoreError> {
+        if num_workers == 0 && !state.unfinished.is_empty() {
+            // Something bad happened. We should have at least one
+            // worker if there are still tables to copy
+            if self.conn.is_none() {
+                return Err(constraint_violation!(
+                    "copy connection has been handed to background task but not returned yet (copy_data_internal)"
+                ));
+            } else {
+                return Err(constraint_violation!(
+                    "no workers left but still tables to copy"
+                ));
+            }
+        }
+        Ok(())
     }
 
-    pub async fn copy_data_internal(
+    /// Wait for all workers to finish. This is called when we a worker has
+    /// failed with an error that forces us to abort copying
+    async fn cancel_workers(
         &mut self,
-        index_list: IndexList,
-    ) -> Result<Status, StoreError> {
+        progress: Arc<CopyProgress>,
+        mut workers: Vec<Pin<Box<dyn Future<Output = Result<CopyTableWorker, StoreError>>>>>,
+    ) {
+        progress.cancel();
+        error!(
+            self.logger,
+            "copying encountered an error; waiting for all workers to finish"
+        );
+        while !workers.is_empty() {
+            let (result, _, remaining) = select_all(workers).await;
+            workers = remaining;
+            match result {
+                Ok(worker) => {
+                    self.conn = Some(worker.conn);
+                }
+                Err(e) => {
+                    /* Ignore; we had an error previously */
+                    error!(self.logger, "copy worker panicked: {}", e);
+                }
+            }
+        }
+    }
+
+    async fn copy_data_internal(&mut self, index_list: IndexList) -> Result<Status, StoreError> {
         let src = self.src.clone();
         let dst = self.dst.clone();
         let target_block = self.target_block.clone();
@@ -949,40 +1011,69 @@ impl Connection {
         // connection in `self.conn`. If the fdw pool has idle connections
         // and there are more tables to be copied, we can start more
         // workers, up to `self.workers` many
+        //
+        // The loop has to be very careful about terminating early so that
+        // we do not ever leave the loop with `self.conn == None`
         let mut workers = Vec::new();
         while !state.unfinished.is_empty() || !workers.is_empty() {
             // We usually add at least one job here, except if we are out of
             // tables to copy. In that case, we go through the `while` loop
             // every time one of the tables we are currently copying
             // finishes
-            if let Some(worker) = self.default_worker(&mut state, &progress)? {
+            if let Some(worker) = self.default_worker(&mut state, &progress) {
                 workers.push(worker);
             }
             loop {
                 if workers.len() >= self.workers {
                     break;
                 }
-                let Some(worker) = self.extra_worker(&mut state, &progress)? else {
+                let Some(worker) = self.extra_worker(&mut state, &progress) else {
                     break;
                 };
                 workers.push(worker);
             }
-            let (worker, _idx, remaining) = select_all(workers).await;
-            workers = remaining;
 
-            // Put the connection back into self.conn so that we can use it
-            // in the next iteration.
-            self.conn = Some(worker.conn);
-            state.finished.push(worker.table);
-
-            if worker.result.is_err() {
-                progress.cancel();
-                return worker.result;
-            }
+            self.assert_progress(workers.len(), &state)?;
+            let (result, _idx, remaining) = select_all(workers).await;
+            workers = remaining;
 
-            if progress.is_cancelled() {
-                return Ok(Status::Cancelled);
-            }
+            // Analyze `result` and take another trip through the loop if
+            // everything is ok; wait for pending workers and return if
+            // there was an error or if copying was cancelled.
+            match result {
+                Err(e) => {
+                    // This is a panic in the background task. We need to
+                    // cancel all other tasks and return the error
+                    self.cancel_workers(progress, workers).await;
+                    return Err(e);
+                }
+                Ok(worker) => {
+                    // Put the connection back into self.conn so that we can use it
+                    // in the next iteration.
+                    self.conn = Some(worker.conn);
+
+                    match (worker.result, progress.is_cancelled()) {
+                        (Ok(Status::Finished), false) => {
+                            // The worker finished successfully, and nothing was
+                            // cancelled; take another trip through the loop
+                            state.finished.push(worker.table);
+                        }
+                        (Ok(Status::Finished), true) => {
+                            state.finished.push(worker.table);
+                            self.cancel_workers(progress, workers).await;
+                            return Ok(Status::Cancelled);
+                        }
+                        (Ok(Status::Cancelled), _) => {
+                            self.cancel_workers(progress, workers).await;
+                            return Ok(Status::Cancelled);
+                        }
+                        (Err(e), _) => {
+                            self.cancel_workers(progress, workers).await;
+                            return Err(e);
+                        }
+                    }
+                }
+            };
         }
         debug_assert!(self.conn.is_some());
 
@@ -1048,7 +1139,7 @@ impl Connection {
     /// lower(v1.block_range) => v2.vid > v1.vid` and we can therefore stop
     /// the copying of each table as soon as we hit `max_vid = max { v.vid |
     /// lower(v.block_range) <= target_block.number }`.
-    pub async fn copy_data(&mut self, index_list: IndexList) -> Result<Status, StoreError> {
+    pub async fn copy_data(mut self, index_list: IndexList) -> Result<Status, StoreError> {
         // We require sole access to the destination site, and that we get a
         // consistent view of what has been copied so far. In general, that
         // is always true. It can happen though that this function runs when
@@ -1061,10 +1152,23 @@ impl Connection {
             &self.logger,
             "Obtaining copy lock (this might take a long time if another process is still copying)"
         );
+
         let dst_site = self.dst.site.cheap_clone();
         self.transaction(|conn| advisory_lock::lock_copying(conn, &dst_site))?;
+
         let res = self.copy_data_internal(index_list).await;
+
+        if self.conn.is_none() {
+            // A background worker panicked and left us without our
+            // dedicated connection, but we still need to release the copy
+            // lock; get a normal connection, not from the fdw pool for that
+            // as that will be much less contended. We won't be holding on
+            // to the connection for long as `res` will be an error and we
+            // will abort starting this subgraph
+            self.conn = Some(self.pool.get()?);
+        }
         self.transaction(|conn| advisory_lock::unlock_copying(conn, &dst_site))?;
+
         if matches!(res, Ok(Status::Cancelled)) {
             warn!(&self.logger, "Copying was cancelled and is incomplete");
         }