From 541d169873b4cce9fd63169cbf6670f740228f35 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 17 Mar 2025 12:26:48 +0100 Subject: [PATCH 001/160] store: Allow copying of failed deployments Generally, it makes no sense to copy failed deployments, but when deployments need to be moved to different shards, it does. --- store/postgres/src/subgraph_store.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/store/postgres/src/subgraph_store.rs b/store/postgres/src/subgraph_store.rs index e9f5f2cce34..0beeadf345d 100644 --- a/store/postgres/src/subgraph_store.rs +++ b/store/postgres/src/subgraph_store.rs @@ -699,12 +699,6 @@ impl SubgraphStoreInner { ))); } let deployment = src_store.load_deployment(src.clone())?; - if deployment.failed { - return Err(StoreError::Unknown(anyhow!( - "can not copy deployment {} because it has failed", - src_loc - ))); - } let index_def = src_store.load_indexes(src.clone())?; // Transmogrify the deployment into a new one From 8a8b428650d514377c551fb8ad224dd9b5804342 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 17 Mar 2025 13:49:09 +0100 Subject: [PATCH 002/160] store: Do not prune deployments that are being copied Pruning during copying can cause the copy to fail because data it needs disappears during copying. --- store/postgres/src/copy.rs | 14 ++++++++++++++ store/postgres/src/deployment_store.rs | 10 +++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index f2f7e9f1d66..5a31acfb959 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -94,6 +94,20 @@ table! { } } +/// Return `true` if the site is the source of a copy operation. The copy +/// operation might be just queued or in progress already +pub fn is_source(conn: &mut PgConnection, site: &Site) -> Result { + use active_copies as ac; + + select(diesel::dsl::exists( + ac::table + .filter(ac::src.eq(site.id)) + .filter(ac::cancelled_at.is_null()), + )) + .get_result::(conn) + .map_err(StoreError::from) +} + #[derive(Copy, Clone, PartialEq, Eq)] pub enum Status { Finished, diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index b148129d924..df2295c6d54 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -55,7 +55,7 @@ use crate::primary::DeploymentId; use crate::relational::index::{CreateIndex, IndexList, Method}; use crate::relational::{Layout, LayoutCache, SqlName, Table}; use crate::relational_queries::FromEntityData; -use crate::{advisory_lock, catalog, retry}; +use crate::{advisory_lock, catalog, copy, retry}; use crate::{connection_pool::ConnectionPool, detail}; use crate::{dynds, primary::Site}; @@ -1234,6 +1234,14 @@ impl DeploymentStore { site: Arc, req: PruneRequest, ) -> Result<(), StoreError> { + let mut conn = store.get_conn()?; + if copy::is_source(&mut conn, &site)? { + debug!( + logger, + "Skipping pruning since this deployment is being copied" + ); + return Ok(()); + } let logger2 = logger.cheap_clone(); retry::forever_async(&logger2, "prune", move || { let store = store.cheap_clone(); From 0cc798630024e1550159562f3d0eabd58c6601cd Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 17 Mar 2025 18:50:35 +0100 Subject: [PATCH 003/160] store: Do not hold a connection unnecessarily --- store/postgres/src/deployment_store.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index df2295c6d54..b196cd3c539 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -1234,13 +1234,15 @@ impl DeploymentStore { site: Arc, req: PruneRequest, ) -> Result<(), StoreError> { - let mut conn = store.get_conn()?; - if copy::is_source(&mut conn, &site)? { - debug!( - logger, - "Skipping pruning since this deployment is being copied" - ); - return Ok(()); + { + let mut conn = store.get_conn()?; + if copy::is_source(&mut conn, &site)? { + debug!( + logger, + "Skipping pruning since this deployment is being copied" + ); + return Ok(()); + } } let logger2 = logger.cheap_clone(); retry::forever_async(&logger2, "prune", move || { From 900f10a8483ecb6a0efb480ebc95c843973b4d79 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 19 Mar 2025 12:20:31 +0100 Subject: [PATCH 004/160] store: Move filtering of histogram_bounds into VidBatcher::new --- store/postgres/src/vid_batcher.rs | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/store/postgres/src/vid_batcher.rs b/store/postgres/src/vid_batcher.rs index 81da5382e3d..5e640ca586f 100644 --- a/store/postgres/src/vid_batcher.rs +++ b/store/postgres/src/vid_batcher.rs @@ -112,20 +112,6 @@ pub(crate) struct VidBatcher { } impl VidBatcher { - fn histogram_bounds( - conn: &mut PgConnection, - nsp: &Namespace, - table: &Table, - range: VidRange, - ) -> Result, StoreError> { - let bounds = catalog::histogram_bounds(conn, nsp, &table.name, VID_COLUMN)? - .into_iter() - .filter(|bound| range.min < *bound && range.max > *bound) - .chain(vec![range.min, range.max].into_iter()) - .collect::>(); - Ok(bounds) - } - /// Initialize a batcher for batching through entries in `table` with /// `vid` in the given `vid_range` /// @@ -138,7 +124,7 @@ impl VidBatcher { table: &Table, vid_range: VidRange, ) -> Result { - let bounds = Self::histogram_bounds(conn, nsp, table, vid_range)?; + let bounds = catalog::histogram_bounds(conn, nsp, &table.name, VID_COLUMN)?; let batch_size = AdaptiveBatchSize::new(table); Self::new(bounds, vid_range, batch_size) } @@ -150,6 +136,12 @@ impl VidBatcher { ) -> Result { let start = range.min; + let bounds = bounds + .into_iter() + .filter(|bound| range.min < *bound && range.max > *bound) + .chain(vec![range.min, range.max].into_iter()) + .collect::>(); + let mut ogive = if range.is_empty() { None } else { From 2038f1c2ee830813106e2259d8090477222f2adf Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 19 Mar 2025 12:28:44 +0100 Subject: [PATCH 005/160] store: Fix mistake in handling of histogram bounds in VidBatcher When setting upa VidBatcher we have both accurate values for the range of vids as well as Postgres' estimate of bounds for a histogram with roughly the same number of entries in each bucket. As an example, say we have min and max of 1 and 100, and histogram bounds [5, 50, 96]. We used to then add min and max to these bounds resulting in an ogive over [1, 5, 50, 96, 100]. With that, it seems that there is a bucket [1, 5] with just as many entries as the bucket [5, 50], which is not what the Posgres staistics indicate. Using this ogive will cause e.g. pruning to increase batch size quickly as it tries to get out of the [1, 5] bucket resulting in a batch size that is way too big for the next bucket and a batch that can take a very long time. The first and last entry of the bounds are Postgres' estimate of the min and max. We now simply replace the first and last bound with our known min and max, resulting in an ogive over [1, 50, 100], which reflects the statistics much more accurately and avoids impossibly short buckets. --- store/postgres/src/vid_batcher.rs | 56 +++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/store/postgres/src/vid_batcher.rs b/store/postgres/src/vid_batcher.rs index 5e640ca586f..2a1c30e7889 100644 --- a/store/postgres/src/vid_batcher.rs +++ b/store/postgres/src/vid_batcher.rs @@ -136,12 +136,26 @@ impl VidBatcher { ) -> Result { let start = range.min; - let bounds = bounds - .into_iter() - .filter(|bound| range.min < *bound && range.max > *bound) - .chain(vec![range.min, range.max].into_iter()) - .collect::>(); - + let bounds = { + // Keep only histogram bounds that are relevent for the range + let mut bounds = bounds + .into_iter() + .filter(|bound| range.min <= *bound && range.max >= *bound) + .collect::>(); + // The first and last entry in `bounds` are Postgres' estimates + // of the min and max `vid` values in the table. We use the + // actual min and max `vid` values from the `vid_range` instead + let len = bounds.len(); + if len > 1 { + bounds[0] = range.min; + bounds[len - 1] = range.max; + } else { + // If Postgres doesn't have a histogram, just use one bucket + // from min to max + bounds = vec![range.min, range.max]; + } + bounds + }; let mut ogive = if range.is_empty() { None } else { @@ -363,6 +377,17 @@ mod tests { } } + impl std::fmt::Debug for Batcher { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Batcher") + .field("start", &self.vid.start) + .field("end", &self.vid.end) + .field("size", &self.vid.batch_size.size) + .field("duration", &self.vid.batch_size.target.as_secs()) + .finish() + } + } + #[test] fn simple() { let bounds = vec![10, 20, 30, 40, 49]; @@ -414,4 +439,23 @@ mod tests { batcher.at(360, 359, 80); batcher.step(360, 359, S010); } + + #[test] + fn vid_batcher_adjusts_bounds() { + // The first and last entry in `bounds` are estimats of the min and + // max that are slightly off compared to the actual min and max we + // put in `vid_range`. Check that `VidBatcher` uses the actual min + // and max from `vid_range`. + let bounds = vec![639, 20_000, 40_000, 60_000, 80_000, 90_000]; + let vid_range = VidRange::new(1, 100_000); + let batch_size = AdaptiveBatchSize { + size: 1000, + target: S100, + }; + + let vid_batcher = VidBatcher::new(bounds, vid_range, batch_size).unwrap(); + let ogive = vid_batcher.ogive.as_ref().unwrap(); + assert_eq!(1, ogive.start()); + assert_eq!(100_000, ogive.end()); + } } From ee52ac7104a1972862c654026fd2b2f35e23c972 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 7 Mar 2025 17:07:19 +0000 Subject: [PATCH 006/160] build(deps): bump ring from 0.17.8 to 0.17.13 Bumps [ring](https://github.com/briansmith/ring) from 0.17.8 to 0.17.13. - [Changelog](https://github.com/briansmith/ring/blob/main/RELEASES.md) - [Commits](https://github.com/briansmith/ring/commits) --- updated-dependencies: - dependency-name: ring dependency-type: indirect ... Signed-off-by: dependabot[bot] --- Cargo.lock | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c511d00601f..0ce5eeef843 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4113,15 +4113,14 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.8" +version = "0.17.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee" dependencies = [ "cc", "cfg-if 1.0.0", "getrandom 0.2.15", "libc", - "spin", "untrusted", "windows-sys 0.52.0", ] From 61500fbc64e6377838b6c51f049c0927d7bb8e1e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Mar 2025 21:08:39 +0000 Subject: [PATCH 007/160] build(deps): bump url from 2.5.2 to 2.5.4 Bumps [url](https://github.com/servo/rust-url) from 2.5.2 to 2.5.4. - [Release notes](https://github.com/servo/rust-url/releases) - [Commits](https://github.com/servo/rust-url/compare/v2.5.2...v2.5.4) --- updated-dependencies: - dependency-name: url dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Cargo.lock | 272 +++++++++++++++++++++++++++++++++++++++++++++-- graph/Cargo.toml | 2 +- node/Cargo.toml | 2 +- 3 files changed, 267 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0ce5eeef843..a7ea378f9e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1351,6 +1351,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "dsl_auto_type" version = "0.1.1" @@ -2643,6 +2654,124 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "id-arena" version = "2.2.1" @@ -2668,12 +2797,23 @@ dependencies = [ [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", ] [[package]] @@ -2974,6 +3114,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856" + [[package]] name = "lock_api" version = "0.4.12" @@ -4912,6 +5058,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "system-configuration" version = "0.5.1" @@ -5109,6 +5266,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinyvec" version = "1.7.0" @@ -5689,12 +5856,12 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.2" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", - "idna 0.5.0", + "idna 1.0.3", "percent-encoding", ] @@ -5704,6 +5871,18 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -6543,6 +6722,18 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "wyz" version = "0.5.1" @@ -6564,6 +6755,30 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -6604,12 +6819,55 @@ dependencies = [ "syn 2.0.87", ] +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "zstd" version = "0.11.2+zstd.1.5.2" diff --git a/graph/Cargo.toml b/graph/Cargo.toml index 3ea0c0bf349..163838f5d00 100644 --- a/graph/Cargo.toml +++ b/graph/Cargo.toml @@ -75,7 +75,7 @@ tokio = { version = "1.38.0", features = [ tokio-stream = { version = "0.1.15", features = ["sync"] } tokio-retry = "0.3.0" toml = "0.8.8" -url = "2.5.2" +url = "2.5.4" prometheus = "0.13.4" priority-queue = "2.0.3" tonic = { workspace = true } diff --git a/node/Cargo.toml b/node/Cargo.toml index ee6411fc87c..444b18784fc 100644 --- a/node/Cargo.toml +++ b/node/Cargo.toml @@ -19,7 +19,7 @@ clap.workspace = true git-testament = "0.2" itertools = { workspace = true } lazy_static = "1.5.0" -url = "2.5.2" +url = "2.5.4" graph = { path = "../graph" } graph-core = { path = "../core" } graph-chain-arweave = { path = "../chain/arweave" } From c1011eeaaadb17e64687c69a759c6b0f4ee1a407 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 21 Mar 2025 08:30:14 -0700 Subject: [PATCH 008/160] store: Fix syntax error in 'alter server' --- store/postgres/src/connection_pool.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index 374a1adc5ab..5ad9a60c5e1 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -185,7 +185,7 @@ impl ForeignServer { alter server \"{name}\" options (set host '{remote_host}', \ {set_port} port '{remote_port}', \ - set dbname '{remote_db}, \ + set dbname '{remote_db}', \ {set_fetch_size} fetch_size '{fetch_size}'); alter user mapping for current_user server \"{name}\" From 3bc203201f7aee2f48a05aed44623dd7fc884b87 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Sat, 22 Mar 2025 09:48:53 -0700 Subject: [PATCH 009/160] store: Analyze tables earlier during copying Analyzing earlier makes it so that Postgres has statistics when rewinding the subgraph --- store/postgres/src/deployment_store.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index b196cd3c539..01f705158d3 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -1553,6 +1553,12 @@ impl DeploymentStore { catalog::copy_account_like(conn, &src.site, &dst.site)?; + // Analyze all tables for this deployment + info!(logger, "Analyzing all {} tables", dst.tables.len()); + for entity_name in dst.tables.keys() { + self.analyze_with_conn(site.cheap_clone(), entity_name.as_str(), conn)?; + } + // Rewind the subgraph so that entity versions that are // clamped in the future (beyond `block`) become valid for // all blocks after `block`. `revert_block` gets rid of @@ -1563,6 +1569,7 @@ impl DeploymentStore { .number .checked_add(1) .expect("block numbers fit into an i32"); + info!(logger, "Rewinding to block {}", block.number); let count = dst.revert_block(conn, block_to_revert)?; deployment::update_entity_count(conn, &dst.site, count)?; @@ -1575,11 +1582,6 @@ impl DeploymentStore { src_deployment.manifest.history_blocks, )?; - // Analyze all tables for this deployment - for entity_name in dst.tables.keys() { - self.analyze_with_conn(site.cheap_clone(), entity_name.as_str(), conn)?; - } - // The `earliest_block` for `src` might have changed while // we did the copy if `src` was pruned while we copied; // adjusting it very late in the copy process ensures that From c6f02fc7ef2d022746c70bc75493593b1e58c9c0 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 21 Mar 2025 09:22:50 -0700 Subject: [PATCH 010/160] store: Make sure we always map the right set of foreign tables --- store/postgres/src/catalog.rs | 10 ++++ store/postgres/src/connection_pool.rs | 80 +++++++++++++++++++++------ 2 files changed, 72 insertions(+), 18 deletions(-) diff --git a/store/postgres/src/catalog.rs b/store/postgres/src/catalog.rs index 1524a768acc..ba532dd53ff 100644 --- a/store/postgres/src/catalog.rs +++ b/store/postgres/src/catalog.rs @@ -398,6 +398,16 @@ pub fn drop_foreign_schema(conn: &mut PgConnection, src: &Site) -> Result<(), St Ok(()) } +pub fn foreign_tables(conn: &mut PgConnection, nsp: &str) -> Result, StoreError> { + use foreign_tables as ft; + + ft::table + .filter(ft::foreign_table_schema.eq(nsp)) + .select(ft::foreign_table_name) + .get_results::(conn) + .map_err(StoreError::from) +} + /// Drop the schema `nsp` and all its contents if it exists, and create it /// again so that `nsp` is an empty schema pub fn recreate_schema(conn: &mut PgConnection, nsp: &str) -> Result<(), StoreError> { diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index 5ad9a60c5e1..c4ba365cfd7 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -37,6 +37,11 @@ use crate::primary::{self, NAMESPACE_PUBLIC}; use crate::{advisory_lock, catalog}; use crate::{Shard, PRIMARY_SHARD}; +/// Tables that we map from the primary into `primary_public` in each shard +const PRIMARY_TABLES: [&str; 3] = ["deployment_schemas", "chains", "active_copies"]; + +/// Tables that we map from each shard into each other shard into the +/// `shard__subgraphs` namespace const SHARDED_TABLES: [(&str, &[&str]); 2] = [ ("public", &["ethereum_networks"]), ( @@ -209,7 +214,7 @@ impl ForeignServer { catalog::recreate_schema(conn, Self::PRIMARY_PUBLIC)?; let mut query = String::new(); - for table_name in ["deployment_schemas", "chains", "active_copies"] { + for table_name in PRIMARY_TABLES { let create_stmt = if shard == &*PRIMARY_SHARD { format!( "create view {nsp}.{table_name} as select * from public.{table_name};", @@ -246,6 +251,33 @@ impl ForeignServer { } Ok(conn.batch_execute(&query)?) } + + fn needs_remap(&self, conn: &mut PgConnection) -> Result { + fn different(mut existing: Vec, mut needed: Vec) -> bool { + existing.sort(); + needed.sort(); + existing != needed + } + + if &self.shard == &*PRIMARY_SHARD { + let existing = catalog::foreign_tables(conn, Self::PRIMARY_PUBLIC)?; + let needed = PRIMARY_TABLES + .into_iter() + .map(String::from) + .collect::>(); + if different(existing, needed) { + return Ok(true); + } + } + + let existing = catalog::foreign_tables(conn, &Self::metadata_schema(&self.shard))?; + let needed = SHARDED_TABLES + .iter() + .flat_map(|(_, tables)| *tables) + .map(|table| table.to_string()) + .collect::>(); + Ok(different(existing, needed)) + } } /// How long to keep connections in the `fdw_pool` around before closing @@ -1037,16 +1069,14 @@ impl PoolInner { let result = pool .configure_fdw(coord.servers.as_ref()) .and_then(|()| pool.drop_cross_shard_views()) - .and_then(|()| migrate_schema(&pool.logger, &mut conn)) - .and_then(|count| { - pool.create_cross_shard_views(coord.servers.as_ref()) - .map(|()| count) - }); + .and_then(|()| migrate_schema(&pool.logger, &mut conn)); debug!(&pool.logger, "Release migration lock"); advisory_lock::unlock_migration(&mut conn).unwrap_or_else(|err| { die(&pool.logger, "failed to release migration lock", &err); }); - let result = result.and_then(|count| coord.propagate(&pool, count)); + let result = result + .and_then(|count| coord.propagate(&pool, count)) + .and_then(|()| pool.create_cross_shard_views(coord.servers.as_ref())); result.unwrap_or_else(|err| die(&pool.logger, "migrations failed", &err)); // Locale check @@ -1178,9 +1208,9 @@ impl PoolInner { .await } - // The foreign server `server` had schema changes, and we therefore need - // to remap anything that we are importing via fdw to make sure we are - // using this updated schema + /// The foreign server `server` had schema changes, and we therefore + /// need to remap anything that we are importing via fdw to make sure we + /// are using this updated schema pub fn remap(&self, server: &ForeignServer) -> Result<(), StoreError> { if &server.shard == &*PRIMARY_SHARD { info!(&self.logger, "Mapping primary"); @@ -1198,6 +1228,15 @@ impl PoolInner { } Ok(()) } + + pub fn needs_remap(&self, server: &ForeignServer) -> Result { + if &server.shard == &self.shard { + return Ok(false); + } + + let mut conn = self.get()?; + server.needs_remap(&mut conn) + } } pub const MIGRATIONS: EmbeddedMigrations = embed_migrations!("./migrations"); @@ -1211,10 +1250,6 @@ impl MigrationCount { fn had_migrations(&self) -> bool { self.old != self.new } - - fn is_new(&self) -> bool { - self.old == 0 - } } /// Run all schema migrations. @@ -1334,13 +1369,22 @@ impl PoolCoordinator { /// code that does _not_ hold the migration lock as it will otherwise /// deadlock fn propagate(&self, pool: &PoolInner, count: MigrationCount) -> Result<(), StoreError> { - // pool is a new shard, map all other shards into it - if count.is_new() { - for server in self.servers.iter() { + // We need to remap all these servers into `pool` if the list of + // tables that are mapped have changed from the code of the previous + // version. Since dropping and recreating the foreign table + // definitions can slow the startup of other nodes down because of + // locking, we try to only do this when it is actually needed + for server in self.servers.iter() { + if pool.needs_remap(server)? { pool.remap(server)?; } } - // pool had schema changes, refresh the import from pool into all other shards + + // pool had schema changes, refresh the import from pool into all + // other shards. This makes sure that schema changes to + // already-mapped tables are propagated to all other shards. Since + // we run `propagate` after migrations have been applied to `pool`, + // we can be sure that these mappings use the correct schema if count.had_migrations() { let server = self.server(&pool.shard)?; for pool in self.pools.lock().unwrap().values() { From 5f2ecb72352038ddf60ebe63c8536e7199fad5ad Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Sat, 22 Mar 2025 10:44:25 -0700 Subject: [PATCH 011/160] store: Do not map the subgraph_features table It's only maintained in the primary, and there's no point in mapping it across shards --- store/postgres/src/connection_pool.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index c4ba365cfd7..6267a41628a 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -52,7 +52,6 @@ const SHARDED_TABLES: [(&str, &[&str]); 2] = [ "dynamic_ethereum_contract_data_source", "subgraph_deployment", "subgraph_error", - "subgraph_features", "subgraph_manifest", "table_stats", ], From 701f77d2d39decfef2ec2d91aa5df0cf5abb7c69 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Sun, 23 Mar 2025 10:46:42 -0700 Subject: [PATCH 012/160] graphman: Annotate failures in 'copy create' with source When creating many copies with a shell script, it is useful to have the deployment we are trying to copy in the error message --- node/src/manager/commands/copy.rs | 34 ++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/node/src/manager/commands/copy.rs b/node/src/manager/commands/copy.rs index ab007ea319d..d3280823e76 100644 --- a/node/src/manager/commands/copy.rs +++ b/node/src/manager/commands/copy.rs @@ -2,7 +2,7 @@ use diesel::{ExpressionMethods, JoinOnDsl, OptionalExtension, QueryDsl, RunQuery use std::{collections::HashMap, sync::Arc, time::SystemTime}; use graph::{ - components::store::{BlockStore as _, DeploymentId}, + components::store::{BlockStore as _, DeploymentId, DeploymentLocator}, data::query::QueryTarget, prelude::{ anyhow::{anyhow, bail, Error}, @@ -84,10 +84,9 @@ impl CopyState { } } -pub async fn create( +async fn create_inner( store: Arc, - primary: ConnectionPool, - src: DeploymentSearch, + src: &DeploymentLocator, shard: String, shards: Vec, node: String, @@ -104,7 +103,6 @@ pub async fn create( }; let subgraph_store = store.subgraph_store(); - let src = src.locate_unique(&primary)?; let query_store = store .query_store(QueryTarget::Deployment( src.hash.clone(), @@ -154,6 +152,32 @@ pub async fn create( Ok(()) } +pub async fn create( + store: Arc, + primary: ConnectionPool, + src: DeploymentSearch, + shard: String, + shards: Vec, + node: String, + block_offset: u32, + activate: bool, + replace: bool, +) -> Result<(), Error> { + let src = src.locate_unique(&primary)?; + create_inner( + store, + &src, + shard, + shards, + node, + block_offset, + activate, + replace, + ) + .await + .map_err(|e| anyhow!("cannot copy {src}: {e}")) +} + pub fn activate(store: Arc, deployment: String, shard: String) -> Result<(), Error> { let shard = Shard::new(shard)?; let deployment = From ccd65e777ca322816172862eaa198fc478b792b1 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Sun, 23 Mar 2025 13:14:54 -0700 Subject: [PATCH 013/160] graphman: Fix table status indicator for 'copy status' With recent changes, the status shown was '>' (in progress) for all tables that hadn't finished yet, not just the one being worked on --- node/src/manager/commands/copy.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/node/src/manager/commands/copy.rs b/node/src/manager/commands/copy.rs index d3280823e76..9ca80bc9b20 100644 --- a/node/src/manager/commands/copy.rs +++ b/node/src/manager/commands/copy.rs @@ -255,13 +255,11 @@ pub fn list(pools: HashMap) -> Result<(), Error> { } pub fn status(pools: HashMap, dst: &DeploymentSearch) -> Result<(), Error> { + const CHECK: &str = "✓"; + use catalog::active_copies as ac; use catalog::deployment_schemas as ds; - fn done(ts: &Option) -> String { - ts.map(|_| "✓").unwrap_or(".").to_string() - } - fn duration(start: &UtcDateTime, end: &Option) -> String { let start = *start; let end = *end; @@ -314,7 +312,7 @@ pub fn status(pools: HashMap, dst: &DeploymentSearch) -> }; let progress = match &state.finished_at { - Some(_) => done(&state.finished_at), + Some(_) => CHECK.to_string(), None => { let target: i64 = tables.iter().map(|table| table.target_vid).sum(); let next: i64 = tables.iter().map(|table| table.next_vid).sum(); @@ -363,13 +361,15 @@ pub fn status(pools: HashMap, dst: &DeploymentSearch) -> ); println!("{:-<74}", "-"); for table in tables { - let status = if table.next_vid > 0 && table.next_vid < table.target_vid { - ">".to_string() - } else if table.target_vid < 0 { + let status = match &table.finished_at { + // table finished + Some(_) => CHECK, // empty source table - "✓".to_string() - } else { - done(&table.finished_at) + None if table.target_vid < 0 => CHECK, + // copying in progress + None if table.duration_ms > 0 => ">", + // not started + None => ".", }; println!( "{} {:<28} | {:>8} | {:>8} | {:>8} | {:>8}", From dde111cf6cd61d44b61716e583d029b6295fb451 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Sun, 23 Mar 2025 17:10:34 -0700 Subject: [PATCH 014/160] store: Create postponed indexes non-concurrently for copy/graft At the point where we create the postponed indexes during copying, nothing else is writing to the subgraph and we can't be blocking a writer with a normal 'create index'. Since concurrent index creation has to wait for all previous transactions in the database to finish, the concurrent creation can significantly slow down index creation and therefore how long the copy takes. --- store/postgres/src/copy.rs | 8 ++++++-- store/postgres/src/relational/ddl.rs | 9 +++++++-- store/postgres/src/relational/ddl_tests.rs | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 5a31acfb959..d82bc33e4a8 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -730,7 +730,7 @@ impl Connection { &table.src.name.to_string(), &table.dst, true, - true, + false, )?; for (_, sql) in arr { @@ -748,7 +748,11 @@ impl Connection { .iter() .map(|c| c.name.to_string()) .collect_vec(); - for sql in table.dst.create_postponed_indexes(orig_colums).into_iter() { + for sql in table + .dst + .create_postponed_indexes(orig_colums, false) + .into_iter() + { let query = sql_query(sql); query.execute(conn)?; } diff --git a/store/postgres/src/relational/ddl.rs b/store/postgres/src/relational/ddl.rs index 55e116272d1..980bca2b9fd 100644 --- a/store/postgres/src/relational/ddl.rs +++ b/store/postgres/src/relational/ddl.rs @@ -269,7 +269,11 @@ impl Table { (method, index_expr) } - pub(crate) fn create_postponed_indexes(&self, skip_colums: Vec) -> Vec { + pub(crate) fn create_postponed_indexes( + &self, + skip_colums: Vec, + concurrently: bool, + ) -> Vec { let mut indexing_queries = vec![]; let columns = self.columns_to_index(); @@ -281,8 +285,9 @@ impl Table { && column.name.as_str() != "id" && !skip_colums.contains(&column.name.to_string()) { + let conc = if concurrently { "concurrently " } else { "" }; let sql = format!( - "create index concurrently if not exists attr_{table_index}_{column_index}_{table_name}_{column_name}\n on {qname} using {method}({index_expr});\n", + "create index {conc}if not exists attr_{table_index}_{column_index}_{table_name}_{column_name}\n on {qname} using {method}({index_expr});\n", table_index = self.position, table_name = self.name, column_name = column.name, diff --git a/store/postgres/src/relational/ddl_tests.rs b/store/postgres/src/relational/ddl_tests.rs index 86e9f232d49..bb1dcc67f46 100644 --- a/store/postgres/src/relational/ddl_tests.rs +++ b/store/postgres/src/relational/ddl_tests.rs @@ -158,7 +158,7 @@ fn generate_postponed_indexes() { let layout = test_layout(THING_GQL); let table = layout.table(&SqlName::from("Scalar")).unwrap(); let skip_colums = vec!["id".to_string()]; - let query_vec = table.create_postponed_indexes(skip_colums); + let query_vec = table.create_postponed_indexes(skip_colums, true); assert!(query_vec.len() == 7); let queries = query_vec.join(" "); check_eqv(THING_POSTPONED_INDEXES, &queries) From f898defe28ee8f2a509dc1f4e51effb1ab44bb12 Mon Sep 17 00:00:00 2001 From: encalypto Date: Mon, 24 Mar 2025 16:49:13 +0000 Subject: [PATCH 015/160] Redact URLs in RPC error log messages (#5902) --- chain/ethereum/src/ethereum_adapter.rs | 20 ++++++++++++++ graph/src/util/futures.rs | 36 +++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/chain/ethereum/src/ethereum_adapter.rs b/chain/ethereum/src/ethereum_adapter.rs index 7173c069c65..e0714c24f02 100644 --- a/chain/ethereum/src/ethereum_adapter.rs +++ b/chain/ethereum/src/ethereum_adapter.rs @@ -147,6 +147,7 @@ impl EthereumAdapter { let retry_log_message = format!("trace_filter RPC call for block range: [{}..{}]", from, to); retry(retry_log_message, &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -295,6 +296,7 @@ impl EthereumAdapter { let eth_adapter = self.clone(); let retry_log_message = format!("eth_getLogs RPC call for block range: [{}..{}]", from, to); retry(retry_log_message, &logger) + .redact_log_urls(true) .when(move |res: &Result<_, web3::error::Error>| match res { Ok(_) => false, Err(e) => !too_many_logs_fingerprints @@ -511,6 +513,7 @@ impl EthereumAdapter { let retry_log_message = format!("eth_getCode RPC call for block {}", block_ptr); retry(retry_log_message, &logger) + .redact_log_urls(true) .when(|result| match result { Ok(_) => false, Err(_) => true, @@ -546,6 +549,7 @@ impl EthereumAdapter { let retry_log_message = format!("eth_getBalance RPC call for block {}", block_ptr); retry(retry_log_message, &logger) + .redact_log_urls(true) .when(|result| match result { Ok(_) => false, Err(_) => true, @@ -586,6 +590,7 @@ impl EthereumAdapter { let block_id = self.block_ptr_to_id(&block_ptr); let retry_log_message = format!("eth_call RPC call for block {}", block_ptr); retry(retry_log_message, &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -765,6 +770,7 @@ impl EthereumAdapter { stream::iter_ok::<_, Error>(ids.into_iter().map(move |hash| { let web3 = web3.clone(); retry(format!("load block {}", hash), &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -799,6 +805,7 @@ impl EthereumAdapter { async move { retry(format!("load block {}", number), &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -856,6 +863,7 @@ impl EthereumAdapter { stream::iter_ok::<_, Error>(block_nums.into_iter().map(move |block_num| { let web3 = web3.clone(); retry(format!("load block ptr {}", block_num), &logger) + .redact_log_urls(true) .when(|res| !res.is_ok() && !detect_null_block(res)) .no_limit() .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) @@ -1140,6 +1148,7 @@ impl EthereumAdapter { let web3 = self.web3.clone(); u64::try_from( retry("chain_id RPC call", &logger) + .redact_log_urls(true) .no_limit() .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -1175,6 +1184,7 @@ impl EthereumAdapterTrait for EthereumAdapter { let metrics = self.metrics.clone(); let provider = self.provider().to_string(); let net_version_future = retry("net_version RPC call", &logger) + .redact_log_urls(true) .no_limit() .timeout_secs(20) .run(move || { @@ -1203,6 +1213,7 @@ impl EthereumAdapterTrait for EthereumAdapter { ENV_VARS.genesis_block_number ); let gen_block_hash_future = retry(retry_log_message, &logger) + .redact_log_urls(true) .no_limit() .timeout_secs(30) .run(move || { @@ -1254,6 +1265,7 @@ impl EthereumAdapterTrait for EthereumAdapter { let web3 = self.web3.clone(); Box::new( retry("eth_getBlockByNumber(latest) no txs RPC call", logger) + .redact_log_urls(true) .no_limit() .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -1288,6 +1300,7 @@ impl EthereumAdapterTrait for EthereumAdapter { let web3 = self.web3.clone(); Box::new( retry("eth_getBlockByNumber(latest) with txs RPC call", logger) + .redact_log_urls(true) .no_limit() .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -1345,6 +1358,7 @@ impl EthereumAdapterTrait for EthereumAdapter { ); Box::new( retry(retry_log_message, &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -1376,6 +1390,7 @@ impl EthereumAdapterTrait for EthereumAdapter { ); Box::new( retry(retry_log_message, &logger) + .redact_log_urls(true) .no_limit() .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -1458,6 +1473,7 @@ impl EthereumAdapterTrait for EthereumAdapter { ); Box::new( retry(retry_log_message, logger) + .redact_log_urls(true) .no_limit() .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -1525,6 +1541,7 @@ impl EthereumAdapterTrait for EthereumAdapter { let web3 = self.web3.clone(); let logger = logger.clone(); let res = retry(retry_log_message, &logger) + .redact_log_urls(true) .when(|res| !res.is_ok() && !detect_null_block(res)) .no_limit() .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) @@ -2279,6 +2296,7 @@ async fn fetch_transaction_receipts_in_batch_with_retry( block_hash ); retry(retry_log_message, &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .no_logging() .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) @@ -2406,6 +2424,7 @@ async fn fetch_block_receipts_with_retry( // Perform the retry operation let receipts_option = retry(retry_log_message, &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || web3.eth().block_receipts(BlockId::Hash(block_hash)).boxed()) @@ -2450,6 +2469,7 @@ async fn fetch_transaction_receipt_with_retry( transaction_hash ); retry(retry_log_message, &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || web3.eth().transaction_receipt(transaction_hash).boxed()) diff --git a/graph/src/util/futures.rs b/graph/src/util/futures.rs index d742457dcd1..7c49806c53a 100644 --- a/graph/src/util/futures.rs +++ b/graph/src/util/futures.rs @@ -1,5 +1,7 @@ use crate::ext::futures::FutureExtension; use futures03::{Future, FutureExt, TryFutureExt}; +use lazy_static::lazy_static; +use regex::Regex; use slog::{debug, trace, warn, Logger}; use std::fmt::Debug; use std::marker::PhantomData; @@ -61,6 +63,7 @@ pub fn retry(operation_name: impl ToString, logger: &Logger) -> RetryConfi log_after: 1, warn_after: 10, limit: RetryConfigProperty::Unknown, + redact_log_urls: false, phantom_item: PhantomData, phantom_error: PhantomData, } @@ -75,6 +78,7 @@ pub struct RetryConfig { limit: RetryConfigProperty, phantom_item: PhantomData, phantom_error: PhantomData, + redact_log_urls: bool, } impl RetryConfig @@ -125,6 +129,12 @@ where self } + /// Redact alphanumeric URLs from log messages. + pub fn redact_log_urls(mut self, redact_log_urls: bool) -> Self { + self.redact_log_urls = redact_log_urls; + self + } + /// Set how long (in seconds) to wait for an attempt to complete before giving up on that /// attempt. pub fn timeout_secs(self, timeout_secs: u64) -> RetryConfigWithTimeout { @@ -173,6 +183,7 @@ where let log_after = self.inner.log_after; let warn_after = self.inner.warn_after; let limit_opt = self.inner.limit.unwrap(&operation_name, "limit"); + let redact_log_urls = self.inner.redact_log_urls; let timeout = self.timeout; trace!(logger, "Run with retry: {}", operation_name); @@ -184,6 +195,7 @@ where log_after, warn_after, limit_opt, + redact_log_urls, move || { try_it() .timeout(timeout) @@ -214,6 +226,7 @@ impl RetryConfigNoTimeout { let log_after = self.inner.log_after; let warn_after = self.inner.warn_after; let limit_opt = self.inner.limit.unwrap(&operation_name, "limit"); + let redact_log_urls = self.inner.redact_log_urls; trace!(logger, "Run with retry: {}", operation_name); @@ -224,6 +237,7 @@ impl RetryConfigNoTimeout { log_after, warn_after, limit_opt, + redact_log_urls, // No timeout, so all errors are inner errors move || try_it().map_err(TimeoutError::Inner), ) @@ -265,6 +279,7 @@ fn run_retry( log_after: u64, warn_after: u64, limit_opt: Option, + redact_log_urls: bool, mut try_it_with_timeout: F, ) -> impl Future>> + Send where @@ -311,25 +326,38 @@ where // If needs retry if condition.check(&result) { + let result_str = || { + if redact_log_urls { + lazy_static! { + static ref RE: Regex = + Regex::new(r#"https?://[a-zA-Z0-9\-\._:/\?#&=]+"#).unwrap(); + } + let e = format!("{result:?}"); + RE.replace_all(&e, "[REDACTED]").into_owned() + } else { + format!("{result:?}") + } + }; + if attempt_count >= warn_after { // This looks like it would be nice to de-duplicate, but if we try // to use log! slog complains about requiring a const for the log level // See also b05e1594-e408-4047-aefb-71fc60d70e8f warn!( logger, - "Trying again after {} failed (attempt #{}) with result {:?}", + "Trying again after {} failed (attempt #{}) with result {}", &operation_name, attempt_count, - result + result_str(), ); } else if attempt_count >= log_after { // See also b05e1594-e408-4047-aefb-71fc60d70e8f debug!( logger, - "Trying again after {} failed (attempt #{}) with result {:?}", + "Trying again after {} failed (attempt #{}) with result {}", &operation_name, attempt_count, - result + result_str(), ); } From 09579fbbd3b85185ce0f96cfd1c3cd5e39ec939b Mon Sep 17 00:00:00 2001 From: Zoran Cvetkov <36600146+zorancv@users.noreply.github.com> Date: Wed, 26 Mar 2025 19:22:04 +0200 Subject: [PATCH 016/160] store: fix grafting bug --- store/postgres/src/relational_queries.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/store/postgres/src/relational_queries.rs b/store/postgres/src/relational_queries.rs index 19f9400c470..c6567c5d4f7 100644 --- a/store/postgres/src/relational_queries.rs +++ b/store/postgres/src/relational_queries.rs @@ -4809,7 +4809,7 @@ impl<'a> QueryFragment for CopyEntityBatchQuery<'a> { fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> QueryResult<()> { out.unsafe_to_cache_prepared(); - let has_vid_seq = self.src.object.has_vid_seq(); + let has_vid_seq = self.dst.object.has_vid_seq(); // Construct a query // insert into {dst}({columns}) From eba48445745cd05ac959538e03b0a8de64cea6c9 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 25 Mar 2025 13:12:39 -0700 Subject: [PATCH 017/160] store: Do not emit an index in block$ twice If an immutable entity type has a `block` column, we would emit the index on the block$ twice, making deployment fail --- store/postgres/src/relational/ddl_tests.rs | 80 ++++++++++++++++++++++ store/postgres/src/relational/index.rs | 30 ++++++-- 2 files changed, 103 insertions(+), 7 deletions(-) diff --git a/store/postgres/src/relational/ddl_tests.rs b/store/postgres/src/relational/ddl_tests.rs index bb1dcc67f46..53106be2b1a 100644 --- a/store/postgres/src/relational/ddl_tests.rs +++ b/store/postgres/src/relational/ddl_tests.rs @@ -352,6 +352,74 @@ fn can_copy_from() { ); } +/// Check that we do not create the index on `block$` twice. There was a bug +/// that if an immutable entity type had a `block` field and index creation +/// was postponed, we would emit the index on `block$` twice, once from +/// `Table.create_time_travel_indexes` and once through +/// `IndexList.indexes_for_table` +#[test] +fn postponed_indexes_with_block_column() { + fn index_list() -> IndexList { + // To generate this list, print the output of `layout.as_ddl(None)`, run + // that in Postgres and do `select indexdef from pg_indexes where + // schemaname = 'sgd0815'` + const INDEX_DEFS: &[&str] = &[ + "CREATE UNIQUE INDEX data_pkey ON sgd0815.data USING btree (vid)", + "CREATE UNIQUE INDEX data_id_key ON sgd0815.data USING btree (id)", + "CREATE INDEX data_block ON sgd0815.data USING btree (block$)", + "CREATE INDEX attr_1_0_data_block ON sgd0815.data USING btree (block, \"block$\")", + ]; + + let mut indexes: HashMap> = HashMap::new(); + indexes.insert( + "data".to_string(), + INDEX_DEFS + .iter() + .map(|def| CreateIndex::parse(def.to_string())) + .collect(), + ); + IndexList { indexes } + } + // Names of the two indexes we are interested in. Not the leading space + // to guard a little against overlapping names + const BLOCK_IDX: &str = " data_block"; + const ATTR_IDX: &str = " attr_1_0_data_block"; + + let layout = test_layout(BLOCK_GQL); + + // Create everything + let sql = layout.as_ddl(None).unwrap(); + assert!(sql.contains(BLOCK_IDX)); + assert!(sql.contains(ATTR_IDX)); + + // Defer attribute indexes + let sql = layout.as_ddl(Some(index_list())).unwrap(); + assert!(sql.contains(BLOCK_IDX)); + assert!(!sql.contains(ATTR_IDX)); + // This used to be duplicated + let count = sql.matches(BLOCK_IDX).count(); + assert_eq!(1, count); + + let table = layout.table(&SqlName::from("Data")).unwrap(); + let sql = table.create_postponed_indexes(vec![], false); + assert_eq!(1, sql.len()); + assert!(!sql[0].contains(BLOCK_IDX)); + assert!(sql[0].contains(ATTR_IDX)); + + let dst_nsp = Namespace::new("sgd2".to_string()).unwrap(); + let arr = index_list() + .indexes_for_table(&dst_nsp, &table.name.to_string(), &table, true, false) + .unwrap(); + assert_eq!(1, arr.len()); + assert!(!arr[0].1.contains(BLOCK_IDX)); + assert!(arr[0].1.contains(ATTR_IDX)); + + let arr = index_list() + .indexes_for_table(&dst_nsp, &table.name.to_string(), &table, false, false) + .unwrap(); + assert_eq!(0, arr.len()); +} + const THING_GQL: &str = r#" type Thing @entity { id: ID! @@ -1109,3 +1177,15 @@ on "sgd0815"."stats_3_day" using btree("volume"); create index stats_3_day_dims on "sgd0815"."stats_3_day"(group_2, group_1, timestamp); "#; + +const BLOCK_GQL: &str = r#" +type Block @entity(immutable: true) { + id: ID! + number: Int! +} + +type Data @entity(immutable: true) { + id: ID! + block: Block! +} +"#; diff --git a/store/postgres/src/relational/index.rs b/store/postgres/src/relational/index.rs index 4f72e773ee6..77e7c2c400d 100644 --- a/store/postgres/src/relational/index.rs +++ b/store/postgres/src/relational/index.rs @@ -123,7 +123,7 @@ impl Display for Expr { Expr::Column(s) => write!(f, "{s}")?, Expr::Prefix(s, _) => write!(f, "{s}")?, Expr::Vid => write!(f, "vid")?, - Expr::Block => write!(f, "block")?, + Expr::Block => write!(f, "{BLOCK_COLUMN}")?, Expr::BlockRange => write!(f, "block_range")?, Expr::BlockRangeLower => write!(f, "lower(block_range)")?, Expr::BlockRangeUpper => write!(f, "upper(block_range)")?, @@ -488,12 +488,29 @@ impl CreateIndex { && columns[1] == Expr::BlockRange } Method::Brin => false, - Method::BTree | Method::Gin => { + Method::Gin => { + // 'using gin()' columns.len() == 1 && columns[0].is_attribute() && cond.is_none() && with.is_none() } + Method::BTree => { + match columns.len() { + 1 => { + // 'using btree()' + columns[0].is_attribute() && cond.is_none() && with.is_none() + } + 2 => { + // 'using btree(, block$)' + columns[0].is_attribute() + && columns[1] == Expr::Block + && cond.is_none() + && with.is_none() + } + _ => false, + } + } Method::Unknown(_) => false, } } @@ -537,6 +554,7 @@ impl CreateIndex { None, ), dummy(false, BTree, &[Expr::BlockRangeUpper], Some(Cond::Closed)), + dummy(false, BTree, &[Expr::Block], None), ] }; } @@ -630,7 +648,7 @@ impl CreateIndex { } pub fn fields_exist_in_dest<'a>(&self, dest_table: &'a Table) -> bool { - fn column_exists<'a>(it: &mut impl Iterator, column_name: &String) -> bool { + fn column_exists<'a>(it: &mut impl Iterator, column_name: &str) -> bool { it.any(|c| *c == *column_name) } @@ -667,9 +685,7 @@ impl CreateIndex { } Expr::Vid => (), Expr::Block => { - if !column_exists(cols, &"block".to_string()) { - return false; - } + return dest_table.immutable; } Expr::Unknown(expression) => { if some_column_contained( @@ -776,7 +792,7 @@ impl IndexList { // First we check if the fields do exist in the destination subgraph. // In case of grafting that is not given. if ci.fields_exist_in_dest(dest_table) - // Then we check if the index is one of the default indexes not based on + // Then we check if the index is one of the default indexes not based on // the attributes. Those will be created anyway and we should skip them. && !ci.is_default_non_attr_index() // Then ID based indexes in the immutable tables are also created initially From b27cc722e5ac0c0bcf5ef4e0bae7ade35e27203f Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 25 Mar 2025 17:10:06 -0700 Subject: [PATCH 018/160] store: IndexList.indexes_for_table: split concurrent and if_not_exists At the end of copying, we do not want to create indexes concurrently, but we do want the creation to be idempotent, i.e., have a 'if not exists' clause --- store/postgres/src/copy.rs | 1 + store/postgres/src/relational/ddl.rs | 9 ++++++++- store/postgres/src/relational/ddl_tests.rs | 18 ++++++++++++++++-- store/postgres/src/relational/index.rs | 7 ++++--- 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index d82bc33e4a8..d92064c7f5c 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -731,6 +731,7 @@ impl Connection { &table.dst, true, false, + true, )?; for (_, sql) in arr { diff --git a/store/postgres/src/relational/ddl.rs b/store/postgres/src/relational/ddl.rs index 980bca2b9fd..e85281a5899 100644 --- a/store/postgres/src/relational/ddl.rs +++ b/store/postgres/src/relational/ddl.rs @@ -408,7 +408,14 @@ impl Table { if index_def.is_some() && ENV_VARS.postpone_attribute_index_creation { let arr = index_def .unwrap() - .indexes_for_table(&self.nsp, &self.name.to_string(), &self, false, false) + .indexes_for_table( + &self.nsp, + &self.name.to_string(), + &self, + false, + false, + false, + ) .map_err(|_| fmt::Error)?; for (_, sql) in arr { writeln!(out, "{};", sql).expect("properly formated index statements") diff --git a/store/postgres/src/relational/ddl_tests.rs b/store/postgres/src/relational/ddl_tests.rs index 53106be2b1a..c9e44854e8f 100644 --- a/store/postgres/src/relational/ddl_tests.rs +++ b/store/postgres/src/relational/ddl_tests.rs @@ -408,14 +408,28 @@ fn postponed_indexes_with_block_column() { let dst_nsp = Namespace::new("sgd2".to_string()).unwrap(); let arr = index_list() - .indexes_for_table(&dst_nsp, &table.name.to_string(), &table, true, false) + .indexes_for_table( + &dst_nsp, + &table.name.to_string(), + &table, + true, + false, + false, + ) .unwrap(); assert_eq!(1, arr.len()); assert!(!arr[0].1.contains(BLOCK_IDX)); assert!(arr[0].1.contains(ATTR_IDX)); let arr = index_list() - .indexes_for_table(&dst_nsp, &table.name.to_string(), &table, false, false) + .indexes_for_table( + &dst_nsp, + &table.name.to_string(), + &table, + false, + false, + false, + ) .unwrap(); assert_eq!(0, arr.len()); } diff --git a/store/postgres/src/relational/index.rs b/store/postgres/src/relational/index.rs index 77e7c2c400d..5776bf8f01f 100644 --- a/store/postgres/src/relational/index.rs +++ b/store/postgres/src/relational/index.rs @@ -784,7 +784,8 @@ impl IndexList { table_name: &String, dest_table: &Table, postponed: bool, - concurrent_if_not_exist: bool, + concurrent: bool, + if_not_exists: bool, ) -> Result, String)>, Error> { let mut arr = vec![]; if let Some(vec) = self.indexes.get(table_name) { @@ -805,7 +806,7 @@ impl IndexList { { if let Ok(sql) = ci .with_nsp(namespace.to_string())? - .to_sql(concurrent_if_not_exist, concurrent_if_not_exist) + .to_sql(concurrent, if_not_exists) { arr.push((ci.name(), sql)) } @@ -829,7 +830,7 @@ impl IndexList { let namespace = &layout.catalog.site.namespace; for table in layout.tables.values() { for (ind_name, create_query) in - self.indexes_for_table(namespace, &table.name.to_string(), table, true, true)? + self.indexes_for_table(namespace, &table.name.to_string(), table, true, true, true)? { if let Some(index_name) = ind_name { let table_name = table.name.clone(); From 6aa489ccb7f5d78e9ba35dd670844390fd32626f Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 26 Mar 2025 12:14:00 -0700 Subject: [PATCH 019/160] store: Make test postponed_indexes_with_block_column more specific Test that the create index statements do/do not contain 'if not exists' --- store/postgres/src/relational/ddl_tests.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/store/postgres/src/relational/ddl_tests.rs b/store/postgres/src/relational/ddl_tests.rs index c9e44854e8f..b15a40cecfb 100644 --- a/store/postgres/src/relational/ddl_tests.rs +++ b/store/postgres/src/relational/ddl_tests.rs @@ -380,6 +380,15 @@ fn postponed_indexes_with_block_column() { ); IndexList { indexes } } + + fn cr(index: &str) -> String { + format!("create index{}", index) + } + + fn cre(index: &str) -> String { + format!("create index if not exists{}", index) + } + // Names of the two indexes we are interested in. Not the leading space // to guard a little against overlapping names const BLOCK_IDX: &str = " data_block"; @@ -389,12 +398,12 @@ fn postponed_indexes_with_block_column() { // Create everything let sql = layout.as_ddl(None).unwrap(); - assert!(sql.contains(BLOCK_IDX)); - assert!(sql.contains(ATTR_IDX)); + assert!(sql.contains(&cr(BLOCK_IDX))); + assert!(sql.contains(&cr(ATTR_IDX))); // Defer attribute indexes let sql = layout.as_ddl(Some(index_list())).unwrap(); - assert!(sql.contains(BLOCK_IDX)); + assert!(sql.contains(&cr(BLOCK_IDX))); assert!(!sql.contains(ATTR_IDX)); // This used to be duplicated let count = sql.matches(BLOCK_IDX).count(); @@ -404,7 +413,7 @@ fn postponed_indexes_with_block_column() { let sql = table.create_postponed_indexes(vec![], false); assert_eq!(1, sql.len()); assert!(!sql[0].contains(BLOCK_IDX)); - assert!(sql[0].contains(ATTR_IDX)); + assert!(sql[0].contains(&cre(ATTR_IDX))); let dst_nsp = Namespace::new("sgd2".to_string()).unwrap(); let arr = index_list() @@ -419,7 +428,7 @@ fn postponed_indexes_with_block_column() { .unwrap(); assert_eq!(1, arr.len()); assert!(!arr[0].1.contains(BLOCK_IDX)); - assert!(arr[0].1.contains(ATTR_IDX)); + assert!(arr[0].1.contains(&cr(ATTR_IDX))); let arr = index_list() .indexes_for_table( From 0e2ee0ae0b3f011c918187fd409c2c309671516f Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 26 Mar 2025 12:14:53 -0700 Subject: [PATCH 020/160] store: Address logic error in CreateIndex.fields_exist_in_dest Do not short-circuit checking other columns in the check for Block --- store/postgres/src/relational/index.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/store/postgres/src/relational/index.rs b/store/postgres/src/relational/index.rs index 5776bf8f01f..efa82e901f0 100644 --- a/store/postgres/src/relational/index.rs +++ b/store/postgres/src/relational/index.rs @@ -685,7 +685,9 @@ impl CreateIndex { } Expr::Vid => (), Expr::Block => { - return dest_table.immutable; + if !dest_table.immutable { + return false; + } } Expr::Unknown(expression) => { if some_column_contained( From f2c8a261d3b9733badd50c07a8b59aba2ddc6f25 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 24 Mar 2025 16:21:34 -0700 Subject: [PATCH 021/160] store: Refactor handling of manifest indexes for ds copying --- store/postgres/src/dynds/private.rs | 73 ++++++++++++++++++----------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/store/postgres/src/dynds/private.rs b/store/postgres/src/dynds/private.rs index e8e7f4ce992..f48192b45d4 100644 --- a/store/postgres/src/dynds/private.rs +++ b/store/postgres/src/dynds/private.rs @@ -1,4 +1,4 @@ -use std::ops::Bound; +use std::{collections::HashMap, ops::Bound}; use diesel::{ pg::sql_types, @@ -252,32 +252,11 @@ impl DataSourcesTable { .order_by(&self.vid) .load::(conn)?; + let manifest_map = + ManifestIdxMap::new(src_manifest_idx_and_name, dst_manifest_idx_and_name); let mut count = 0; - for (block_range, src_manifest_idx, param, context, causality_region, done_at) in src_tuples - { - let name = &src_manifest_idx_and_name - .iter() - .find(|(idx, _)| idx == &src_manifest_idx) - .with_context(|| { - anyhow!( - "the source {} does not have a template with index {}", - self.namespace, - src_manifest_idx - ) - })? - .1; - let dst_manifest_idx = dst_manifest_idx_and_name - .iter() - .find(|(_, n)| n == name) - .with_context(|| { - anyhow!( - "the destination {} is missing a template with name {}. The source {} created one at block {:?}", - dst.namespace, - name, self.namespace, block_range.0 - ) - })? - .0; - + for (block_range, src_idx, param, context, causality_region, done_at) in src_tuples { + let dst_idx = manifest_map.dst_idx(src_idx)?; let query = format!( "\ insert into {dst}(block_range, manifest_idx, param, context, causality_region, done_at) @@ -293,7 +272,7 @@ impl DataSourcesTable { count += sql_query(query) .bind::(target_block) .bind::, _>(block_range) - .bind::(dst_manifest_idx) + .bind::(dst_idx) .bind::, _>(param) .bind::, _>(context) .bind::(causality_region) @@ -361,3 +340,43 @@ impl DataSourcesTable { .optional()?) } } + +/// Map src manifest indexes to dst manifest indexes. If the +/// destination is missing an entry, put `None` as the value for the +/// source index +struct ManifestIdxMap<'a> { + map: HashMap, &'a String)>, +} + +impl<'a> ManifestIdxMap<'a> { + fn new(src: &'a [(i32, String)], dst: &'a [(i32, String)]) -> Self { + let map = src + .iter() + .map(|(src_idx, src_name)| { + ( + *src_idx, + ( + dst.iter() + .find(|(_, dst_name)| src_name == dst_name) + .map(|(dst_idx, _)| *dst_idx), + src_name, + ), + ) + }) + .collect(); + ManifestIdxMap { map } + } + + fn dst_idx(&self, src_idx: i32) -> Result { + let (dst_idx, name) = self.map.get(&src_idx).with_context(|| { + anyhow!("the source does not have a template with index {}", src_idx) + })?; + let dst_idx = dst_idx.with_context(|| { + anyhow!( + "the destination does not have a template with name {}", + name + ) + })?; + Ok(dst_idx) + } +} From a20cc8ad67e763b7798c0cd680fc0bd75a19e524 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 24 Mar 2025 18:05:59 -0700 Subject: [PATCH 022/160] store: Copy private data sources in batches For large numbers of data sources, the existing RBAR behavior can be very slow --- store/postgres/src/dynds/private.rs | 147 ++++++++++++++++------- store/postgres/src/relational_queries.rs | 2 +- 2 files changed, 106 insertions(+), 43 deletions(-) diff --git a/store/postgres/src/dynds/private.rs b/store/postgres/src/dynds/private.rs index f48192b45d4..50a433df006 100644 --- a/store/postgres/src/dynds/private.rs +++ b/store/postgres/src/dynds/private.rs @@ -1,8 +1,9 @@ use std::{collections::HashMap, ops::Bound}; use diesel::{ - pg::sql_types, + pg::{sql_types, Pg}, prelude::*, + query_builder::{AstPass, QueryFragment, QueryId}, sql_query, sql_types::{Binary, Bool, Integer, Jsonb, Nullable}, PgConnection, QueryDsl, RunQueryDsl, @@ -16,7 +17,7 @@ use graph::{ prelude::{serde_json, BlockNumber, StoreError}, }; -use crate::primary::Namespace; +use crate::{primary::Namespace, relational_queries::POSTGRES_MAX_PARAMETERS}; type DynTable = diesel_dynamic_schema::Table; type DynColumn = diesel_dynamic_schema::Column; @@ -226,16 +227,12 @@ impl DataSourcesTable { return Ok(count as usize); } - type Tuple = ( - (Bound, Bound), - i32, - Option>, - Option, - i32, - Option, - ); + let manifest_map = + ManifestIdxMap::new(src_manifest_idx_and_name, dst_manifest_idx_and_name); - let src_tuples = self + // Load all data sources that were created up to and including + // `target_block` and transform them ready for insertion + let dss: Vec<_> = self .table .clone() .filter( @@ -250,34 +247,18 @@ impl DataSourcesTable { &self.done_at, )) .order_by(&self.vid) - .load::(conn)?; + .load::(conn)? + .into_iter() + .map(|ds| ds.src_to_dst(target_block, &manifest_map)) + .collect::>()?; - let manifest_map = - ManifestIdxMap::new(src_manifest_idx_and_name, dst_manifest_idx_and_name); + // Split all dss into chunks so that we never use more than + // `POSTGRES_MAX_PARAMETERS` bind variables per chunk + let chunk_size = POSTGRES_MAX_PARAMETERS / CopyDsQuery::BIND_PARAMS; let mut count = 0; - for (block_range, src_idx, param, context, causality_region, done_at) in src_tuples { - let dst_idx = manifest_map.dst_idx(src_idx)?; - let query = format!( - "\ - insert into {dst}(block_range, manifest_idx, param, context, causality_region, done_at) - values(case - when upper($2) <= $1 then $2 - else int4range(lower($2), null) - end, - $3, $4, $5, $6, $7) - ", - dst = dst.qname - ); - - count += sql_query(query) - .bind::(target_block) - .bind::, _>(block_range) - .bind::(dst_idx) - .bind::, _>(param) - .bind::, _>(context) - .bind::(causality_region) - .bind::, _>(done_at) - .execute(conn)?; + for chunk in dss.chunks(chunk_size) { + let query = CopyDsQuery::new(dst, chunk)?; + count += query.execute(conn)?; } // If the manifest idxes remained constant, we can test that both tables have the same @@ -344,12 +325,12 @@ impl DataSourcesTable { /// Map src manifest indexes to dst manifest indexes. If the /// destination is missing an entry, put `None` as the value for the /// source index -struct ManifestIdxMap<'a> { - map: HashMap, &'a String)>, +struct ManifestIdxMap { + map: HashMap, String)>, } -impl<'a> ManifestIdxMap<'a> { - fn new(src: &'a [(i32, String)], dst: &'a [(i32, String)]) -> Self { +impl ManifestIdxMap { + fn new(src: &[(i32, String)], dst: &[(i32, String)]) -> Self { let map = src .iter() .map(|(src_idx, src_name)| { @@ -359,7 +340,7 @@ impl<'a> ManifestIdxMap<'a> { dst.iter() .find(|(_, dst_name)| src_name == dst_name) .map(|(dst_idx, _)| *dst_idx), - src_name, + src_name.to_string(), ), ) }) @@ -380,3 +361,85 @@ impl<'a> ManifestIdxMap<'a> { Ok(dst_idx) } } + +#[derive(Queryable)] +struct DsForCopy { + block_range: (Bound, Bound), + idx: i32, + param: Option>, + context: Option, + causality_region: i32, + done_at: Option, +} + +impl DsForCopy { + fn src_to_dst( + mut self, + target_block: BlockNumber, + map: &ManifestIdxMap, + ) -> Result { + // unclamp block range if it ends beyond target block + match self.block_range.1 { + Bound::Included(block) if block > target_block => self.block_range.1 = Bound::Unbounded, + _ => { /* use block range as is */ } + } + // Translate manifest index + self.idx = map.dst_idx(self.idx)?; + Ok(self) + } +} + +struct CopyDsQuery<'a> { + dst: &'a DataSourcesTable, + dss: &'a [DsForCopy], +} + +impl<'a> CopyDsQuery<'a> { + const BIND_PARAMS: usize = 6; + + fn new(dst: &'a DataSourcesTable, dss: &'a [DsForCopy]) -> Result { + Ok(CopyDsQuery { dst, dss }) + } +} + +impl<'a> QueryFragment for CopyDsQuery<'a> { + fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> QueryResult<()> { + out.unsafe_to_cache_prepared(); + out.push_sql("insert into "); + out.push_sql(&self.dst.qname); + out.push_sql( + "(block_range, manifest_idx, param, context, causality_region, done_at) values ", + ); + let mut first = true; + for ds in self.dss.iter() { + if first { + first = false; + } else { + out.push_sql(", "); + } + out.push_sql("("); + out.push_bind_param::, _>(&ds.block_range)?; + out.push_sql(", "); + out.push_bind_param::(&ds.idx)?; + out.push_sql(", "); + out.push_bind_param::, _>(&ds.param)?; + out.push_sql(", "); + out.push_bind_param::, _>(&ds.context)?; + out.push_sql(", "); + out.push_bind_param::(&ds.causality_region)?; + out.push_sql(", "); + out.push_bind_param::, _>(&ds.done_at)?; + out.push_sql(")"); + } + + Ok(()) + } +} + +impl<'a> QueryId for CopyDsQuery<'a> { + type QueryId = (); + + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl<'a, Conn> RunQueryDsl for CopyDsQuery<'a> {} diff --git a/store/postgres/src/relational_queries.rs b/store/postgres/src/relational_queries.rs index c6567c5d4f7..028f6044c34 100644 --- a/store/postgres/src/relational_queries.rs +++ b/store/postgres/src/relational_queries.rs @@ -53,7 +53,7 @@ use crate::{ const BASE_SQL_COLUMNS: [&str; 2] = ["id", "vid"]; /// The maximum number of bind variables that can be used in a query -const POSTGRES_MAX_PARAMETERS: usize = u16::MAX as usize; // 65535 +pub(crate) const POSTGRES_MAX_PARAMETERS: usize = u16::MAX as usize; // 65535 const SORT_KEY_COLUMN: &str = "sort_key$"; From 6e6ea3b0e370cdaa865e48d90ac819cecab3c6a4 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 24 Mar 2025 20:48:19 -0700 Subject: [PATCH 023/160] store: Provide more detail in errors from private data source copy --- store/postgres/src/dynds/private.rs | 30 ++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/store/postgres/src/dynds/private.rs b/store/postgres/src/dynds/private.rs index 50a433df006..ebfd109b206 100644 --- a/store/postgres/src/dynds/private.rs +++ b/store/postgres/src/dynds/private.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, ops::Bound}; +use std::{collections::HashMap, i32, ops::Bound}; use diesel::{ pg::{sql_types, Pg}, @@ -249,7 +249,7 @@ impl DataSourcesTable { .order_by(&self.vid) .load::(conn)? .into_iter() - .map(|ds| ds.src_to_dst(target_block, &manifest_map)) + .map(|ds| ds.src_to_dst(target_block, &manifest_map, &self.namespace, &dst.namespace)) .collect::>()?; // Split all dss into chunks so that we never use more than @@ -348,14 +348,23 @@ impl ManifestIdxMap { ManifestIdxMap { map } } - fn dst_idx(&self, src_idx: i32) -> Result { + fn dst_idx( + &self, + src_idx: i32, + src_nsp: &Namespace, + src_created: BlockNumber, + dst_nsp: &Namespace, + ) -> Result { let (dst_idx, name) = self.map.get(&src_idx).with_context(|| { - anyhow!("the source does not have a template with index {}", src_idx) + anyhow!( + "the source {src_nsp} does not have a template with \ + index {src_idx} but created one at block {src_created}" + ) })?; let dst_idx = dst_idx.with_context(|| { anyhow!( - "the destination does not have a template with name {}", - name + "the destination {dst_nsp} is missing a template with \ + name {name}. The source {src_nsp} created one at block {src_created}" ) })?; Ok(dst_idx) @@ -377,6 +386,8 @@ impl DsForCopy { mut self, target_block: BlockNumber, map: &ManifestIdxMap, + src_nsp: &Namespace, + dst_nsp: &Namespace, ) -> Result { // unclamp block range if it ends beyond target block match self.block_range.1 { @@ -384,7 +395,12 @@ impl DsForCopy { _ => { /* use block range as is */ } } // Translate manifest index - self.idx = map.dst_idx(self.idx)?; + let src_created = match self.block_range.0 { + Bound::Included(block) => block, + Bound::Excluded(block) => block + 1, + Bound::Unbounded => i32::MAX, + }; + self.idx = map.dst_idx(self.idx, src_nsp, src_created, dst_nsp)?; Ok(self) } } From aa43630155ccf46c1c994183132209145fc2ac15 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 27 Mar 2025 16:57:13 -0700 Subject: [PATCH 024/160] store: Make ManifestIdxMap::new a little more efficient --- store/postgres/src/dynds/private.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/store/postgres/src/dynds/private.rs b/store/postgres/src/dynds/private.rs index ebfd109b206..e22d58f4fae 100644 --- a/store/postgres/src/dynds/private.rs +++ b/store/postgres/src/dynds/private.rs @@ -331,17 +331,14 @@ struct ManifestIdxMap { impl ManifestIdxMap { fn new(src: &[(i32, String)], dst: &[(i32, String)]) -> Self { + let dst_idx_map: HashMap<&String, i32> = + HashMap::from_iter(dst.iter().map(|(idx, name)| (name, *idx))); let map = src .iter() .map(|(src_idx, src_name)| { ( *src_idx, - ( - dst.iter() - .find(|(_, dst_name)| src_name == dst_name) - .map(|(dst_idx, _)| *dst_idx), - src_name.to_string(), - ), + (dst_idx_map.get(src_name).copied(), src_name.to_string()), ) }) .collect(); From 776afa14c324614abc84a72587f199cd7681153a Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 27 Mar 2025 17:05:31 -0700 Subject: [PATCH 025/160] store: Fix handling of bounds in DsForCopy::src_to_dst --- store/postgres/src/dynds/private.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/store/postgres/src/dynds/private.rs b/store/postgres/src/dynds/private.rs index e22d58f4fae..243a7dc5a57 100644 --- a/store/postgres/src/dynds/private.rs +++ b/store/postgres/src/dynds/private.rs @@ -389,13 +389,16 @@ impl DsForCopy { // unclamp block range if it ends beyond target block match self.block_range.1 { Bound::Included(block) if block > target_block => self.block_range.1 = Bound::Unbounded, + Bound::Excluded(block) if block - 1 > target_block => { + self.block_range.1 = Bound::Unbounded + } _ => { /* use block range as is */ } } // Translate manifest index let src_created = match self.block_range.0 { Bound::Included(block) => block, Bound::Excluded(block) => block + 1, - Bound::Unbounded => i32::MAX, + Bound::Unbounded => 0, }; self.idx = map.dst_idx(self.idx, src_nsp, src_created, dst_nsp)?; Ok(self) From 5ff19746bfe08497c316a9ce59dc93c501eae131 Mon Sep 17 00:00:00 2001 From: Krishnanand V P <44740264+incrypto32@users.noreply.github.com> Date: Mon, 31 Mar 2025 12:42:20 +0400 Subject: [PATCH 026/160] Subgraph Composition: Option to force rpc to fetch block ptrs (#5876) * chain/ethereum: Add parallel block fetching with configurable batch size when using firehose with composable subgraphs * ethereum: Add option to force RPC for block pointer lookups This adds GRAPH_ETHEREUM_FORCE_RPC_FOR_BLOCK_PTRS env var which when enabled forces the use of RPC instead of Firehose for loading block pointers by numbers, with Firehose fallback. Useful for composable subgraphs. * graph: change log level for get_block_by_number * graph: Add get_block_number_with_retry method for firehose endpoint * Address review comments --- chain/ethereum/src/chain.rs | 91 ++++++++++++++++----- chain/ethereum/src/env.rs | 7 ++ graph/src/env/mod.rs | 6 ++ graph/src/firehose/endpoints.rs | 140 +++++++++++++++++++++----------- node/src/chain.rs | 1 + 5 files changed, 175 insertions(+), 70 deletions(-) diff --git a/chain/ethereum/src/chain.rs b/chain/ethereum/src/chain.rs index f632ee36d93..117e3033b18 100644 --- a/chain/ethereum/src/chain.rs +++ b/chain/ethereum/src/chain.rs @@ -17,7 +17,7 @@ use graph::prelude::{ EthereumCallCache, LightEthereumBlock, LightEthereumBlockExt, MetricsRegistry, }; use graph::schema::InputSchema; -use graph::slog::{debug, error, trace}; +use graph::slog::{debug, error, trace, warn}; use graph::substreams::Clock; use graph::{ blockchain::{ @@ -257,6 +257,7 @@ pub struct EthereumAdapterSelector { client: Arc>, registry: Arc, chain_store: Arc, + eth_adapters: Arc, } impl EthereumAdapterSelector { @@ -265,12 +266,14 @@ impl EthereumAdapterSelector { client: Arc>, registry: Arc, chain_store: Arc, + eth_adapters: Arc, ) -> Self { Self { logger_factory, client, registry, chain_store, + eth_adapters, } } } @@ -296,6 +299,7 @@ impl TriggersAdapterSelector for EthereumAdapterSelector { chain_store: self.chain_store.cheap_clone(), unified_api_version, capabilities: *capabilities, + eth_adapters: self.eth_adapters.cheap_clone(), }; Ok(Arc::new(adapter)) } @@ -739,6 +743,7 @@ pub struct TriggersAdapter { chain_client: Arc>, capabilities: NodeCapabilities, unified_api_version: UnifiedMappingApiVersion, + eth_adapters: Arc, } /// Fetches blocks from the cache based on block numbers, excluding duplicates @@ -784,12 +789,34 @@ async fn fetch_unique_blocks_from_cache( "Loading {} block(s) not in the block cache", missing_blocks.len() ); - debug!(logger, "Missing blocks {:?}", missing_blocks); + trace!(logger, "Missing blocks {:?}", missing_blocks.len()); } (blocks, missing_blocks) } +// This is used to load blocks from the RPC. +async fn load_blocks_with_rpc( + logger: &Logger, + adapter: Arc, + chain_store: Arc, + block_numbers: BTreeSet, +) -> Result> { + let logger_clone = logger.clone(); + load_blocks( + logger, + chain_store, + block_numbers, + |missing_numbers| async move { + adapter + .load_block_ptrs_by_numbers_rpc(logger_clone, missing_numbers) + .try_collect() + .await + }, + ) + .await +} + /// Fetches blocks by their numbers, first attempting to load from cache. /// Missing blocks are retrieved from an external source, with all blocks sorted and converted to `BlockFinality` format. async fn load_blocks( @@ -847,6 +874,37 @@ impl TriggersAdapterTrait for TriggersAdapter { ) -> Result> { match &*self.chain_client { ChainClient::Firehose(endpoints) => { + // If the force_rpc_for_block_ptrs flag is set, we will use the RPC to load the blocks + // even if the firehose is available. If no adapter is available, we will log an error. + // And then fallback to the firehose. + if ENV_VARS.force_rpc_for_block_ptrs { + trace!( + logger, + "Loading blocks from RPC (force_rpc_for_block_ptrs is set)"; + "block_numbers" => format!("{:?}", block_numbers) + ); + match self.eth_adapters.cheapest_with(&self.capabilities).await { + Ok(adapter) => { + match load_blocks_with_rpc( + &logger, + adapter, + self.chain_store.clone(), + block_numbers.clone(), + ) + .await + { + Ok(blocks) => return Ok(blocks), + Err(e) => { + warn!(logger, "Error loading blocks from RPC: {}", e); + } + } + } + Err(e) => { + warn!(logger, "Error getting cheapest adapter: {}", e); + } + } + } + trace!( logger, "Loading blocks from firehose"; @@ -884,29 +942,16 @@ impl TriggersAdapterTrait for TriggersAdapter { .await } - ChainClient::Rpc(client) => { + ChainClient::Rpc(eth_adapters) => { trace!( logger, "Loading blocks from RPC"; "block_numbers" => format!("{:?}", block_numbers) ); - let adapter = client.cheapest_with(&self.capabilities).await?; - let chain_store = self.chain_store.clone(); - let logger_clone = logger.clone(); - - load_blocks( - &logger, - chain_store, - block_numbers, - |missing_numbers| async move { - adapter - .load_block_ptrs_by_numbers_rpc(logger_clone, missing_numbers) - .try_collect() - .await - }, - ) - .await + let adapter = eth_adapters.cheapest_with(&self.capabilities).await?; + load_blocks_with_rpc(&logger, adapter, self.chain_store.clone(), block_numbers) + .await } } } @@ -973,10 +1018,12 @@ impl TriggersAdapterTrait for TriggersAdapter { ChainClient::Firehose(endpoints) => { let endpoint = endpoints.endpoint().await?; let block = endpoint - .get_block_by_number::(ptr.number as u64, &self.logger) + .get_block_by_number_with_retry::(ptr.number as u64, &self.logger) .await - .map_err(|e| anyhow!("Failed to fetch block from firehose: {}", e))?; - + .context(format!( + "Failed to fetch block {} from firehose", + ptr.number + ))?; Ok(block.hash() == ptr.hash) } ChainClient::Rpc(adapter) => { diff --git a/chain/ethereum/src/env.rs b/chain/ethereum/src/env.rs index bc7223dbc07..027a26b623f 100644 --- a/chain/ethereum/src/env.rs +++ b/chain/ethereum/src/env.rs @@ -91,6 +91,10 @@ pub struct EnvVars { /// This is a comma separated list of chain ids for which the gas field will not be set /// when calling `eth_call`. pub eth_call_no_gas: Vec, + /// Set by the flag `GRAPH_ETHEREUM_FORCE_RPC_FOR_BLOCK_PTRS`. On by default. + /// When enabled, forces the use of RPC instead of Firehose for loading block pointers by numbers. + /// This is used in composable subgraphs. Firehose can be slow for loading block pointers by numbers. + pub force_rpc_for_block_ptrs: bool, } // This does not print any values avoid accidentally leaking any sensitive env vars @@ -141,6 +145,7 @@ impl From for EnvVars { .filter(|s| !s.is_empty()) .map(str::to_string) .collect(), + force_rpc_for_block_ptrs: x.force_rpc_for_block_ptrs.0, } } } @@ -192,4 +197,6 @@ struct Inner { genesis_block_number: u64, #[envconfig(from = "GRAPH_ETH_CALL_NO_GAS", default = "421613,421614")] eth_call_no_gas: String, + #[envconfig(from = "GRAPH_ETHEREUM_FORCE_RPC_FOR_BLOCK_PTRS", default = "true")] + force_rpc_for_block_ptrs: EnvVarBoolean, } diff --git a/graph/src/env/mod.rs b/graph/src/env/mod.rs index 4383ce17b5c..01d09365c3b 100644 --- a/graph/src/env/mod.rs +++ b/graph/src/env/mod.rs @@ -247,6 +247,9 @@ pub struct EnvVars { /// Set by the environment variable `GRAPH_FIREHOSE_FETCH_BLOCK_TIMEOUT_SECS`. /// The default value is 60 seconds. pub firehose_block_fetch_timeout: u64, + /// Set by the environment variable `GRAPH_FIREHOSE_BLOCK_BATCH_SIZE`. + /// The default value is 10. + pub firehose_block_batch_size: usize, } impl EnvVars { @@ -339,6 +342,7 @@ impl EnvVars { block_write_capacity: inner.block_write_capacity.0, firehose_block_fetch_retry_limit: inner.firehose_block_fetch_retry_limit, firehose_block_fetch_timeout: inner.firehose_block_fetch_timeout, + firehose_block_batch_size: inner.firehose_block_fetch_batch_size, }) } @@ -506,6 +510,8 @@ struct Inner { firehose_block_fetch_retry_limit: usize, #[envconfig(from = "GRAPH_FIREHOSE_FETCH_BLOCK_TIMEOUT_SECS", default = "60")] firehose_block_fetch_timeout: u64, + #[envconfig(from = "GRAPH_FIREHOSE_FETCH_BLOCK_BATCH_SIZE", default = "10")] + firehose_block_fetch_batch_size: usize, } #[derive(Clone, Debug)] diff --git a/graph/src/firehose/endpoints.rs b/graph/src/firehose/endpoints.rs index 825f3ddbd20..0ec95c3e2c5 100644 --- a/graph/src/firehose/endpoints.rs +++ b/graph/src/firehose/endpoints.rs @@ -13,8 +13,9 @@ use crate::{ prelude::{anyhow, debug, DeploymentHash}, substreams_rpc, }; +use anyhow::Context; use async_trait::async_trait; -use futures03::StreamExt; +use futures03::{StreamExt, TryStreamExt}; use http::uri::{Scheme, Uri}; use itertools::Itertools; use slog::{error, info, trace, Logger}; @@ -443,15 +444,47 @@ impl FirehoseEndpoint { } } - pub async fn get_block_by_number( - &self, - number: u64, + pub async fn get_block_by_ptr_with_retry( + self: Arc, + ptr: &BlockPtr, logger: &Logger, ) -> Result where M: prost::Message + BlockchainBlock + Default + 'static, { - debug!( + let retry_log_message = format!("get_block_by_ptr for block {}", ptr); + let endpoint = self.cheap_clone(); + let logger = logger.cheap_clone(); + let ptr_for_retry = ptr.clone(); + + retry(retry_log_message, &logger) + .limit(ENV_VARS.firehose_block_fetch_retry_limit) + .timeout_secs(ENV_VARS.firehose_block_fetch_timeout) + .run(move || { + let endpoint = endpoint.cheap_clone(); + let logger = logger.cheap_clone(); + let ptr = ptr_for_retry.clone(); + async move { + endpoint + .get_block_by_ptr::(&ptr, &logger) + .await + .context(format!( + "Failed to fetch block by ptr {} from firehose", + ptr + )) + } + }) + .await + .map_err(move |e| { + anyhow::anyhow!("Failed to fetch block by ptr {} from firehose: {}", ptr, e) + }) + } + + async fn get_block_by_number(&self, number: u64, logger: &Logger) -> Result + where + M: prost::Message + BlockchainBlock + Default + 'static, + { + trace!( logger, "Connecting to firehose to retrieve block for number {}", number; "provider" => self.provider.as_str(), @@ -473,6 +506,44 @@ impl FirehoseEndpoint { } } + pub async fn get_block_by_number_with_retry( + self: Arc, + number: u64, + logger: &Logger, + ) -> Result + where + M: prost::Message + BlockchainBlock + Default + 'static, + { + let retry_log_message = format!("get_block_by_number for block {}", number); + let endpoint = self.cheap_clone(); + let logger = logger.cheap_clone(); + + retry(retry_log_message, &logger) + .limit(ENV_VARS.firehose_block_fetch_retry_limit) + .timeout_secs(ENV_VARS.firehose_block_fetch_timeout) + .run(move || { + let endpoint = endpoint.cheap_clone(); + let logger = logger.cheap_clone(); + async move { + endpoint + .get_block_by_number::(number, &logger) + .await + .context(format!( + "Failed to fetch block by number {} from firehose", + number + )) + } + }) + .await + .map_err(|e| { + anyhow::anyhow!( + "Failed to fetch block by number {} from firehose: {}", + number, + e + ) + }) + } + pub async fn load_blocks_by_numbers( self: Arc, numbers: Vec, @@ -481,51 +552,24 @@ impl FirehoseEndpoint { where M: prost::Message + BlockchainBlock + Default + 'static, { - let mut blocks = Vec::with_capacity(numbers.len()); - - for number in numbers { - let provider_name = self.provider.as_str(); + let logger = logger.clone(); + let logger_for_error = logger.clone(); + + let blocks_stream = futures03::stream::iter(numbers) + .map(move |number| { + let e = self.cheap_clone(); + let l = logger.clone(); + async move { e.get_block_by_number_with_retry::(number, &l).await } + }) + .buffered(ENV_VARS.firehose_block_batch_size); - trace!( - logger, - "Loading block for block number {}", number; - "provider" => provider_name, + let blocks = blocks_stream.try_collect::>().await.map_err(|e| { + error!( + logger_for_error, + "Failed to load blocks from firehose: {}", e; ); - - let retry_log_message = format!("get_block_by_number for block {}", number); - let endpoint_for_retry = self.cheap_clone(); - - let logger_for_retry = logger.clone(); - let logger_for_error = logger.clone(); - - let block = retry(retry_log_message, &logger_for_retry) - .limit(ENV_VARS.firehose_block_fetch_retry_limit) - .timeout_secs(ENV_VARS.firehose_block_fetch_timeout) - .run(move || { - let e = endpoint_for_retry.cheap_clone(); - let l = logger_for_retry.clone(); - async move { e.get_block_by_number::(number, &l).await } - }) - .await; - - match block { - Ok(block) => { - blocks.push(block); - } - Err(e) => { - error!( - logger_for_error, - "Failed to load block number {}: {}", number, e; - "provider" => provider_name, - ); - return Err(anyhow::format_err!( - "failed to load block number {}: {}", - number, - e - )); - } - } - } + anyhow::format_err!("failed to load blocks from firehose: {}", e) + })?; Ok(blocks) } diff --git a/node/src/chain.rs b/node/src/chain.rs index 00785d11876..239db116e55 100644 --- a/node/src/chain.rs +++ b/node/src/chain.rs @@ -441,6 +441,7 @@ pub async fn networks_as_chains( client.clone(), metrics_registry.clone(), chain_store.clone(), + eth_adapters.clone(), ); let call_cache = chain_store.cheap_clone(); From 127d15c12560e4974d4c41a4778ba4b7f221410a Mon Sep 17 00:00:00 2001 From: Krishnanand V P <44740264+incrypto32@users.noreply.github.com> Date: Mon, 31 Mar 2025 13:39:56 +0400 Subject: [PATCH 027/160] Validate if the source subgraph graft chain contains any incompatible spec version (#5911) * graph: Validate if the graft chain contains any incompatible spec version for composed subgraphs * graph: allow subgraphs with prune set to never to be sourcable * graph: remove the pruning check completely for source subgraphs * Address review comments --- graph/src/data_source/subgraph.rs | 81 ++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 17 deletions(-) diff --git a/graph/src/data_source/subgraph.rs b/graph/src/data_source/subgraph.rs index 9e120a4c82c..2dd3a35c571 100644 --- a/graph/src/data_source/subgraph.rs +++ b/graph/src/data_source/subgraph.rs @@ -270,6 +270,55 @@ impl UnresolvedDataSource { .map(Arc::new) } + /// Recursively verifies that all grafts in the chain meet the minimum spec version requirement for a subgraph source + async fn verify_graft_chain_sourcable( + manifest: Arc>, + resolver: &Arc, + logger: &Logger, + graft_chain: &mut Vec, + ) -> Result<(), Error> { + // Add current manifest to graft chain + graft_chain.push(manifest.id.to_string()); + + // Check if current manifest meets spec version requirement + if manifest.spec_version < SPEC_VERSION_1_3_0 { + return Err(anyhow!( + "Subgraph with a spec version {} is not supported for a subgraph source, minimum supported version is {}. Graft chain: {}", + manifest.spec_version, + SPEC_VERSION_1_3_0, + graft_chain.join(" -> ") + )); + } + + // If there's a graft, recursively verify it + if let Some(graft) = &manifest.graft { + let graft_raw = resolver + .cat(logger, &graft.base.to_ipfs_link()) + .await + .context("Failed to resolve graft base manifest")?; + + let graft_raw: serde_yaml::Mapping = serde_yaml::from_slice(&graft_raw) + .context("Failed to parse graft base manifest as YAML")?; + + let graft_manifest = + UnresolvedSubgraphManifest::::parse(graft.base.clone(), graft_raw) + .context("Failed to parse graft base manifest")? + .resolve(resolver, logger, LATEST_VERSION.clone()) + .await + .context("Failed to resolve graft base manifest")?; + + Box::pin(Self::verify_graft_chain_sourcable( + Arc::new(graft_manifest), + resolver, + logger, + graft_chain, + )) + .await?; + } + + Ok(()) + } + #[allow(dead_code)] pub(super) async fn resolve( self, @@ -286,15 +335,6 @@ impl UnresolvedDataSource { let kind = self.kind.clone(); let source_manifest = self.resolve_source_manifest::(resolver, logger).await?; let source_spec_version = &source_manifest.spec_version; - - if source_manifest - .data_sources - .iter() - .any(|ds| matches!(ds, crate::data_source::DataSource::Subgraph(_))) - { - return Err(anyhow!("Nested subgraph data sources are not supported.")); - } - if source_spec_version < &SPEC_VERSION_1_3_0 { return Err(anyhow!( "Source subgraph manifest spec version {} is not supported, minimum supported version is {}", @@ -303,15 +343,22 @@ impl UnresolvedDataSource { )); } - let pruning_enabled = match source_manifest.indexer_hints.as_ref() { - None => false, - Some(hints) => hints.prune.is_some(), - }; + // Verify the entire graft chain meets spec version requirements + let mut graft_chain = Vec::new(); + Self::verify_graft_chain_sourcable( + source_manifest.clone(), + resolver, + logger, + &mut graft_chain, + ) + .await?; - if pruning_enabled { - return Err(anyhow!( - "Pruning is enabled for source subgraph, which is not supported" - )); + if source_manifest + .data_sources + .iter() + .any(|ds| matches!(ds, crate::data_source::DataSource::Subgraph(_))) + { + return Err(anyhow!("Nested subgraph data sources are not supported.")); } let mapping_entities: Vec = self From 38c94bb60e64bef5d531bb27d62fbaa9442ee560 Mon Sep 17 00:00:00 2001 From: Krishnanand V P <44740264+incrypto32@users.noreply.github.com> Date: Mon, 31 Mar 2025 13:54:21 +0400 Subject: [PATCH 028/160] Do not allow mutable entities in entity handlers (#5909) * graph: do not allow mutable entities in entity handlers of composed subgraphs * runtime: Remove ToAscPtr implementation for entity trigger * tests: Update subgraph composition integration tests to work with immutable entities * store: Update composition tests to work with immutable entity check --- graph/src/data_source/subgraph.rs | 11 ++- runtime/wasm/src/module/mod.rs | 12 +-- runtime/wasm/src/to_from/external.rs | 37 +-------- .../tests/chain/ethereum/manifest.rs | 77 ++++++++++++++++++- tests/docker-compose.yml | 4 +- .../src/mapping.ts | 30 ++++---- .../subgraph.yaml | 4 +- .../source-subgraph-a/schema.graphql | 2 +- .../source-subgraph-b/schema.graphql | 2 +- .../source-subgraph/schema.graphql | 5 +- .../source-subgraph/src/mapping.ts | 34 +------- .../subgraph-data-sources/src/mapping.ts | 36 ++++----- .../subgraph-data-sources/subgraph.yaml | 4 +- tests/tests/integration_tests.rs | 73 ++++-------------- 14 files changed, 143 insertions(+), 188 deletions(-) diff --git a/graph/src/data_source/subgraph.rs b/graph/src/data_source/subgraph.rs index 2dd3a35c571..d8ef847aee4 100644 --- a/graph/src/data_source/subgraph.rs +++ b/graph/src/data_source/subgraph.rs @@ -239,7 +239,16 @@ impl UnresolvedDataSource { None => { return Err(anyhow!("Entity {} not found in source manifest", entity)); } - Some(TypeKind::Object) => {} + Some(TypeKind::Object) => { + // Check if the entity is immutable + let entity_type = source_manifest.schema.entity_type(entity)?; + if !entity_type.is_immutable() { + return Err(anyhow!( + "Entity {} is not immutable and cannot be used as a mapping entity", + entity + )); + } + } } } Ok(()) diff --git a/runtime/wasm/src/module/mod.rs b/runtime/wasm/src/module/mod.rs index 4b01b3a5fd8..b911542ffe5 100644 --- a/runtime/wasm/src/module/mod.rs +++ b/runtime/wasm/src/module/mod.rs @@ -70,23 +70,13 @@ impl ToAscPtr for offchain::TriggerData { } } -impl ToAscPtr for subgraph::TriggerData { - fn to_asc_ptr( - self, - heap: &mut H, - gas: &GasCounter, - ) -> Result, HostExportError> { - asc_new(heap, &self.entity, gas).map(|ptr| ptr.erase()) - } -} - impl ToAscPtr for subgraph::MappingEntityTrigger { fn to_asc_ptr( self, heap: &mut H, gas: &GasCounter, ) -> Result, HostExportError> { - asc_new(heap, &self.data.entity, gas).map(|ptr| ptr.erase()) + asc_new(heap, &self.data.entity.entity.sorted_ref(), gas).map(|ptr| ptr.erase()) } } diff --git a/runtime/wasm/src/to_from/external.rs b/runtime/wasm/src/to_from/external.rs index 9bbe0298abc..6bb7122613f 100644 --- a/runtime/wasm/src/to_from/external.rs +++ b/runtime/wasm/src/to_from/external.rs @@ -1,13 +1,11 @@ use ethabi; -use graph::blockchain::block_stream::{EntityOperationKind, EntitySourceOperation}; use graph::data::store::scalar::Timestamp; use graph::data::value::Word; use graph::prelude::{BigDecimal, BigInt}; use graph::runtime::gas::GasCounter; use graph::runtime::{ - asc_get, asc_new, AscIndexId, AscPtr, AscType, AscValue, HostExportError, IndexForAscTypeId, - ToAscObj, + asc_get, asc_new, AscIndexId, AscPtr, AscType, AscValue, HostExportError, ToAscObj, }; use graph::{data::store, runtime::DeterministicHostError}; use graph::{prelude::serde_json, runtime::FromAscObj}; @@ -474,39 +472,6 @@ pub enum AscSubgraphEntityOp { Delete, } -#[derive(AscType)] -pub struct AscEntityTrigger { - pub entity_op: AscSubgraphEntityOp, - pub entity_type: AscPtr, - pub entity: AscPtr, - pub vid: i64, -} - -impl ToAscObj for EntitySourceOperation { - fn to_asc_obj( - &self, - heap: &mut H, - gas: &GasCounter, - ) -> Result { - let entity_op = match self.entity_op { - EntityOperationKind::Create => AscSubgraphEntityOp::Create, - EntityOperationKind::Modify => AscSubgraphEntityOp::Modify, - EntityOperationKind::Delete => AscSubgraphEntityOp::Delete, - }; - - Ok(AscEntityTrigger { - entity_op, - entity_type: asc_new(heap, &self.entity_type.as_str(), gas)?, - entity: asc_new(heap, &self.entity.sorted_ref(), gas)?, - vid: self.vid, - }) - } -} - -impl AscIndexId for AscEntityTrigger { - const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::AscEntityTrigger; -} - impl ToAscObj> for serde_yaml::Value { fn to_asc_obj( &self, diff --git a/store/test-store/tests/chain/ethereum/manifest.rs b/store/test-store/tests/chain/ethereum/manifest.rs index 9d094ae5817..02f4e1413f9 100644 --- a/store/test-store/tests/chain/ethereum/manifest.rs +++ b/store/test-store/tests/chain/ethereum/manifest.rs @@ -47,9 +47,10 @@ specVersion: 1.3.0 "; const SOURCE_SUBGRAPH_SCHEMA: &str = " -type TestEntity @entity { id: ID! } -type User @entity { id: ID! } -type Profile @entity { id: ID! } +type TestEntity @entity(immutable: true) { id: ID! } +type MutableEntity @entity { id: ID! } +type User @entity(immutable: true) { id: ID! } +type Profile @entity(immutable: true) { id: ID! } type TokenData @entity(timeseries: true) { id: Int8! @@ -1761,6 +1762,7 @@ specVersion: 1.3.0 let result = try_resolve_manifest(yaml, SPEC_VERSION_1_3_0).await; assert!(result.is_err()); let err = result.unwrap_err(); + println!("Error: {}", err); assert!(err .to_string() .contains("Subgraph datasources cannot be used alongside onchain datasources")); @@ -1857,3 +1859,72 @@ specVersion: 1.3.0 } }) } + +#[tokio::test] +async fn subgraph_ds_manifest_mutable_entities_should_fail() { + let yaml = " +schema: + file: + /: /ipfs/Qmschema +dataSources: + - name: SubgraphSource + kind: subgraph + entities: + - Gravatar + network: mainnet + source: + address: 'QmSource' + startBlock: 9562480 + mapping: + apiVersion: 0.0.6 + language: wasm/assemblyscript + entities: + - TestEntity + file: + /: /ipfs/Qmmapping + handlers: + - handler: handleEntity + entity: MutableEntity # This is a mutable entity and should fail +specVersion: 1.3.0 +"; + + let result = try_resolve_manifest(yaml, SPEC_VERSION_1_3_0).await; + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err + .to_string() + .contains("Entity MutableEntity is not immutable and cannot be used as a mapping entity")); +} + +#[tokio::test] +async fn subgraph_ds_manifest_immutable_entities_should_succeed() { + let yaml = " +schema: + file: + /: /ipfs/Qmschema +dataSources: + - name: SubgraphSource + kind: subgraph + entities: + - Gravatar + network: mainnet + source: + address: 'QmSource' + startBlock: 9562480 + mapping: + apiVersion: 0.0.6 + language: wasm/assemblyscript + entities: + - TestEntity + file: + /: /ipfs/Qmmapping + handlers: + - handler: handleEntity + entity: User # This is an immutable entity and should succeed +specVersion: 1.3.0 +"; + + let result = try_resolve_manifest(yaml, SPEC_VERSION_1_3_0).await; + + assert!(result.is_ok()); +} diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml index 9f05a680e7c..f45360fd367 100644 --- a/tests/docker-compose.yml +++ b/tests/docker-compose.yml @@ -1,7 +1,7 @@ version: '3' services: ipfs: - image: docker.io/ipfs/kubo:v0.17.0 + image: docker.io/ipfs/kubo:v0.34.1 ports: - '127.0.0.1:3001:5001' postgres: @@ -20,7 +20,7 @@ services: POSTGRES_DB: graph-node POSTGRES_INITDB_ARGS: "-E UTF8 --locale=C" anvil: - image: ghcr.io/foundry-rs/foundry:latest + image: ghcr.io/foundry-rs/foundry:stable ports: - '3021:8545' command: "'anvil --host 0.0.0.0 --gas-limit 100000000000 --base-fee 1 --block-time 5 --mnemonic \"test test test test test test test test test test test junk\"'" diff --git a/tests/integration-tests/multiple-subgraph-datasources/src/mapping.ts b/tests/integration-tests/multiple-subgraph-datasources/src/mapping.ts index 649d92d3f5f..373ddd7e99e 100644 --- a/tests/integration-tests/multiple-subgraph-datasources/src/mapping.ts +++ b/tests/integration-tests/multiple-subgraph-datasources/src/mapping.ts @@ -1,28 +1,26 @@ import { dataSource, EntityTrigger, log } from '@graphprotocol/graph-ts' import { AggregatedData } from '../generated/schema' -import { SourceAData } from '../generated/subgraph-QmPWnNsD4m8T9EEF1ec5d8wetFxrMebggLj1efFHzdnZhx' -import { SourceBData } from '../generated/subgraph-Qma4Rk2D1w6mFiP15ZtHHx7eWkqFR426RWswreLiDanxej' +import { SourceAData } from '../generated/subgraph-QmYHp1bPEf7EoYBpEtJUpZv1uQHYQfWE4AhvR6frjB1Huj' +import { SourceBData } from '../generated/subgraph-QmYBEzastJi7bsa722ac78tnZa6xNnV9vvweerY4kVyJtq' -export function handleSourceAData(data: EntityTrigger): void { - let aggregated = AggregatedData.load(data.data.id) - if (!aggregated) { - aggregated = new AggregatedData(data.data.id) - aggregated.sourceA = data.data.data - aggregated.first = 'sourceA' - } else { - aggregated.sourceA = data.data.data - } + +// We know this handler will run first since its defined first in the manifest +// So we dont need to check if the Aggregated data exists +export function handleSourceAData(data: SourceAData): void { + let aggregated = new AggregatedData(data.id) + aggregated.sourceA = data.data + aggregated.first = 'sourceA' aggregated.save() } -export function handleSourceBData(data: EntityTrigger): void { - let aggregated = AggregatedData.load(data.data.id) +export function handleSourceBData(data: SourceBData): void { + let aggregated = AggregatedData.load(data.id) if (!aggregated) { - aggregated = new AggregatedData(data.data.id) - aggregated.sourceB = data.data.data + aggregated = new AggregatedData(data.id) + aggregated.sourceB = data.data aggregated.first = 'sourceB' } else { - aggregated.sourceB = data.data.data + aggregated.sourceB = data.data } aggregated.save() } diff --git a/tests/integration-tests/multiple-subgraph-datasources/subgraph.yaml b/tests/integration-tests/multiple-subgraph-datasources/subgraph.yaml index 296777c578c..4dc4fc7a9b6 100644 --- a/tests/integration-tests/multiple-subgraph-datasources/subgraph.yaml +++ b/tests/integration-tests/multiple-subgraph-datasources/subgraph.yaml @@ -6,7 +6,7 @@ dataSources: name: SourceA network: test source: - address: 'QmPWnNsD4m8T9EEF1ec5d8wetFxrMebggLj1efFHzdnZhx' + address: 'QmYHp1bPEf7EoYBpEtJUpZv1uQHYQfWE4AhvR6frjB1Huj' startBlock: 0 mapping: apiVersion: 0.0.7 @@ -22,7 +22,7 @@ dataSources: name: SourceB network: test source: - address: 'Qma4Rk2D1w6mFiP15ZtHHx7eWkqFR426RWswreLiDanxej' + address: 'QmYBEzastJi7bsa722ac78tnZa6xNnV9vvweerY4kVyJtq' startBlock: 0 mapping: apiVersion: 0.0.7 diff --git a/tests/integration-tests/source-subgraph-a/schema.graphql b/tests/integration-tests/source-subgraph-a/schema.graphql index 10be822d900..2348c9b5c57 100644 --- a/tests/integration-tests/source-subgraph-a/schema.graphql +++ b/tests/integration-tests/source-subgraph-a/schema.graphql @@ -1,4 +1,4 @@ -type SourceAData @entity { +type SourceAData @entity(immutable: true) { id: ID! data: String! blockNumber: BigInt! diff --git a/tests/integration-tests/source-subgraph-b/schema.graphql b/tests/integration-tests/source-subgraph-b/schema.graphql index 9a84bdcbba3..0b012273112 100644 --- a/tests/integration-tests/source-subgraph-b/schema.graphql +++ b/tests/integration-tests/source-subgraph-b/schema.graphql @@ -1,4 +1,4 @@ -type SourceBData @entity { +type SourceBData @entity(immutable: true) { id: ID! data: String! blockNumber: BigInt! diff --git a/tests/integration-tests/source-subgraph/schema.graphql b/tests/integration-tests/source-subgraph/schema.graphql index 15bb2a33921..4fab5be71b9 100644 --- a/tests/integration-tests/source-subgraph/schema.graphql +++ b/tests/integration-tests/source-subgraph/schema.graphql @@ -1,11 +1,10 @@ -type Block @entity { +type Block @entity(immutable: true) { id: ID! number: BigInt! hash: Bytes! - testMessage: String } -type Block2 @entity { +type Block2 @entity(immutable: true) { id: ID! number: BigInt! hash: Bytes! diff --git a/tests/integration-tests/source-subgraph/src/mapping.ts b/tests/integration-tests/source-subgraph/src/mapping.ts index ad27c43c2a3..119fb9b912b 100644 --- a/tests/integration-tests/source-subgraph/src/mapping.ts +++ b/tests/integration-tests/source-subgraph/src/mapping.ts @@ -1,6 +1,5 @@ import { ethereum, log, store } from '@graphprotocol/graph-ts'; import { Block, Block2 } from '../generated/schema'; -import { BigInt } from '@graphprotocol/graph-ts'; export function handleBlock(block: ethereum.Block): void { log.info('handleBlock {}', [block.number.toString()]); @@ -21,37 +20,6 @@ export function handleBlock(block: ethereum.Block): void { let blockEntity3 = new Block2(id3); blockEntity3.number = block.number; blockEntity3.hash = block.hash; + blockEntity3.testMessage = block.number.toString().concat('-message'); blockEntity3.save(); - - if (block.number.equals(BigInt.fromI32(1))) { - let id = 'TEST'; - let entity = new Block(id); - entity.number = block.number; - entity.hash = block.hash; - entity.testMessage = 'Created at block 1'; - log.info('Created entity at block 1', []); - entity.save(); - } - - if (block.number.equals(BigInt.fromI32(2))) { - let id = 'TEST'; - let blockEntity1 = Block.load(id); - if (blockEntity1) { - // Update the block entity - blockEntity1.testMessage = 'Updated at block 2'; - log.info('Updated entity at block 2', []); - blockEntity1.save(); - } - } - - if (block.number.equals(BigInt.fromI32(3))) { - let id = 'TEST'; - let blockEntity1 = Block.load(id); - if (blockEntity1) { - blockEntity1.testMessage = 'Deleted at block 3'; - log.info('Deleted entity at block 3', []); - blockEntity1.save(); - store.remove('Block', id); - } - } } diff --git a/tests/integration-tests/subgraph-data-sources/src/mapping.ts b/tests/integration-tests/subgraph-data-sources/src/mapping.ts index 45ecbd41076..9062970361a 100644 --- a/tests/integration-tests/subgraph-data-sources/src/mapping.ts +++ b/tests/integration-tests/subgraph-data-sources/src/mapping.ts @@ -1,26 +1,26 @@ -import { Entity, log, store, BigInt, EntityTrigger, EntityOp } from '@graphprotocol/graph-ts'; -import { Block } from '../generated/subgraph-QmVz1Pt7NhgCkz4gfavmNrMhojnMT9hW81QDqVjy56ZMUP'; +import { log, store } from '@graphprotocol/graph-ts'; +import { Block, Block2 } from '../generated/subgraph-QmWi3H11QFE2PiWx6WcQkZYZdA5UasaBptUJqGn54MFux5'; import { MirrorBlock } from '../generated/schema'; -export function handleEntity(trigger: EntityTrigger): void { - let blockEntity = trigger.data; - let id = blockEntity.id; +export function handleEntity(block: Block): void { + let id = block.id; - if (trigger.operation === EntityOp.Remove) { - log.info('Removing block entity with id: {}', [id]); - store.remove('MirrorBlock', id); - return; - } + let blockEntity = loadOrCreateMirrorBlock(id); + blockEntity.number = block.number; + blockEntity.hash = block.hash; - let block = loadOrCreateMirrorBlock(id); - block.number = blockEntity.number; - block.hash = blockEntity.hash; - - if (blockEntity.testMessage) { - block.testMessage = blockEntity.testMessage; - } + blockEntity.save(); +} + +export function handleEntity2(block: Block2): void { + let id = block.id; + + let blockEntity = loadOrCreateMirrorBlock(id); + blockEntity.number = block.number; + blockEntity.hash = block.hash; + blockEntity.testMessage = block.testMessage; - block.save(); + blockEntity.save(); } export function loadOrCreateMirrorBlock(id: string): MirrorBlock { diff --git a/tests/integration-tests/subgraph-data-sources/subgraph.yaml b/tests/integration-tests/subgraph-data-sources/subgraph.yaml index 3fdc76ac089..92dc7140514 100644 --- a/tests/integration-tests/subgraph-data-sources/subgraph.yaml +++ b/tests/integration-tests/subgraph-data-sources/subgraph.yaml @@ -6,7 +6,7 @@ dataSources: name: Contract network: test source: - address: 'QmVz1Pt7NhgCkz4gfavmNrMhojnMT9hW81QDqVjy56ZMUP' + address: 'QmWi3H11QFE2PiWx6WcQkZYZdA5UasaBptUJqGn54MFux5' startBlock: 0 mapping: apiVersion: 0.0.7 @@ -16,6 +16,6 @@ dataSources: handlers: - handler: handleEntity entity: Block - - handler: handleEntity + - handler: handleEntity2 entity: Block2 file: ./src/mapping.ts diff --git a/tests/tests/integration_tests.rs b/tests/tests/integration_tests.rs index d10df25698b..5c6ab96968d 100644 --- a/tests/tests/integration_tests.rs +++ b/tests/tests/integration_tests.rs @@ -523,79 +523,34 @@ async fn subgraph_data_sources(ctx: TestContext) -> anyhow::Result<()> { assert!(subgraph.healthy); let expected_response = json!({ "mirrorBlocks": [ - { "id": "1-v1", "number": "1" }, - { "id": "1-v2", "number": "1" }, - { "id": "1-v3", "number": "1" }, - { "id": "2-v1", "number": "2" }, - { "id": "2-v2", "number": "2" }, - { "id": "2-v3", "number": "2" }, - { "id": "3-v1", "number": "3" }, - { "id": "3-v2", "number": "3" }, - { "id": "3-v3", "number": "3" }, - { "id": "4-v1", "number": "4" }, - { "id": "4-v2", "number": "4" }, - { "id": "4-v3", "number": "4" }, - { "id": "5-v1", "number": "5" }, - { "id": "5-v2", "number": "5" }, - { "id": "5-v3", "number": "5" }, - { "id": "6-v1", "number": "6" }, - { "id": "6-v2", "number": "6" }, - { "id": "6-v3", "number": "6" }, - { "id": "7-v1", "number": "7" }, - { "id": "7-v2", "number": "7" }, - { "id": "7-v3", "number": "7" }, - { "id": "8-v1", "number": "8" }, - { "id": "8-v2", "number": "8" }, - { "id": "8-v3", "number": "8" }, - { "id": "9-v1", "number": "9" }, - { "id": "9-v2", "number": "9" }, - { "id": "9-v3", "number": "9" }, - { "id": "10-v1", "number": "10" }, - { "id": "10-v2", "number": "10" }, - { "id": "10-v3", "number": "10" }, + { "id": "1-v1", "number": "1", "testMessage": null }, + { "id": "1-v2", "number": "1", "testMessage": null }, + { "id": "1-v3", "number": "1", "testMessage": "1-message" }, + { "id": "2-v1", "number": "2", "testMessage": null }, + { "id": "2-v2", "number": "2", "testMessage": null }, + { "id": "2-v3", "number": "2", "testMessage": "2-message" }, + { "id": "3-v1", "number": "3", "testMessage": null }, + { "id": "3-v2", "number": "3", "testMessage": null }, + { "id": "3-v3", "number": "3", "testMessage": "3-message" }, ] }); query_succeeds( - "Blocks should be right", + "Query all blocks with testMessage", &subgraph, - "{ mirrorBlocks(where: {number_lte: 10}, orderBy: number) { id, number } }", + "{ mirrorBlocks(where: {number_lte: 3}, orderBy: number) { id, number, testMessage } }", expected_response, ) .await?; let expected_response = json!({ - "mirrorBlock": { "id": "TEST", "number": "1", "testMessage": "Created at block 1" }, + "mirrorBlock": { "id": "1-v3", "number": "1", "testMessage": "1-message" }, }); query_succeeds( - "Blocks should be right", + "Query specific block with testMessage", &subgraph, - "{ mirrorBlock(id: \"TEST\", block: {number: 1}) { id, number, testMessage } }", - expected_response, - ) - .await?; - - let expected_response = json!({ - "mirrorBlock": { "id": "TEST", "number": "1", "testMessage": "Updated at block 2" }, - }); - - query_succeeds( - "Blocks should be right", - &subgraph, - "{ mirrorBlock(id: \"TEST\", block: {number: 2}) { id, number, testMessage } }", - expected_response, - ) - .await?; - - let expected_response = json!({ - "mirrorBlock": null, - }); - - query_succeeds( - "Blocks should be right", - &subgraph, - "{ mirrorBlock(id: \"TEST\", block: {number: 3}) { id, number, testMessage } }", + "{ mirrorBlock(id: \"1-v3\") { id, number, testMessage } }", expected_response, ) .await?; From 7f494ce544a8bde536888b9e031abc4d50d4f1b0 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 26 Mar 2025 12:37:47 -0700 Subject: [PATCH 029/160] graph: Introduce StoreError::StatementTimeout --- graph/src/components/store/err.rs | 44 ++++++++++++++++--------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/graph/src/components/store/err.rs b/graph/src/components/store/err.rs index 3aa65c3ecb2..6af676f8e52 100644 --- a/graph/src/components/store/err.rs +++ b/graph/src/components/store/err.rs @@ -74,6 +74,8 @@ pub enum StoreError { UnsupportedFilter(String, String), #[error("writing {0} entities at block {1} failed: {2} Query: {3}")] WriteFailure(String, BlockNumber, String, String), + #[error("database query timed out")] + StatementTimeout, } // Convenience to report a constraint violation @@ -133,25 +135,29 @@ impl Clone for StoreError { Self::WriteFailure(arg0, arg1, arg2, arg3) => { Self::WriteFailure(arg0.clone(), arg1.clone(), arg2.clone(), arg3.clone()) } + Self::StatementTimeout => Self::StatementTimeout, } } } impl StoreError { - fn database_unavailable(e: &DieselError) -> Option { - // When the error is caused by a closed connection, treat the error - // as 'database unavailable'. When this happens during indexing, the - // indexing machinery will retry in that case rather than fail the - // subgraph - if let DieselError::DatabaseError(_, info) = e { - if info - .message() - .contains("server closed the connection unexpectedly") - { - return Some(Self::DatabaseUnavailable); - } + fn from_diesel_error(e: &DieselError) -> Option { + const CONN_CLOSE: &str = "server closed the connection unexpectedly"; + const STMT_TIMEOUT: &str = "canceling statement due to statement timeout"; + let DieselError::DatabaseError(_, info) = e else { + return None; + }; + if info.message().contains(CONN_CLOSE) { + // When the error is caused by a closed connection, treat the error + // as 'database unavailable'. When this happens during indexing, the + // indexing machinery will retry in that case rather than fail the + // subgraph + Some(StoreError::DatabaseUnavailable) + } else if info.message().contains(STMT_TIMEOUT) { + Some(StoreError::StatementTimeout) + } else { + None } - None } pub fn write_failure( @@ -160,19 +166,15 @@ impl StoreError { block: BlockNumber, query: String, ) -> Self { - match Self::database_unavailable(&error) { - Some(e) => return e, - None => StoreError::WriteFailure(entity.to_string(), block, error.to_string(), query), - } + Self::from_diesel_error(&error).unwrap_or_else(|| { + StoreError::WriteFailure(entity.to_string(), block, error.to_string(), query) + }) } } impl From for StoreError { fn from(e: DieselError) -> Self { - match Self::database_unavailable(&e) { - Some(e) => return e, - None => StoreError::Unknown(e.into()), - } + Self::from_diesel_error(&e).unwrap_or_else(|| StoreError::Unknown(e.into())) } } From 3c18373109d16dadab2d9c52fdd5f81cf3de6dbe Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 26 Mar 2025 13:09:46 -0700 Subject: [PATCH 030/160] graph, store: Add a timeout for each batch of data copying When the data distribution in a table is bad (e.g., if it contains huge arrays after a certain point) it can causes batches to run for many hours. We now set a timeout and reset the batch size to 1 when that happens so that we can slowly inch to a more reasonable batch size. --- docs/environment-variables.md | 8 ++++ graph/src/env/store.rs | 7 ++++ store/postgres/src/copy.rs | 65 ++++++++++++++++++++++++++++++- store/postgres/src/vid_batcher.rs | 4 ++ 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 8b395680e6a..f174e0e2e54 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -223,6 +223,14 @@ those. copying or grafting should take. This limits how long transactions for such long running operations will be, and therefore helps control bloat in other tables. Value is in seconds and defaults to 180s. +- `GRAPH_STORE_BATCH_TIMEOUT`: How long a batch operation during copying or + grafting is allowed to take at most. This is meant to guard against + batches that are catastrophically big and should be set to a small + multiple of `GRAPH_STORE_BATCH_TARGET_DURATION`, like 10 times that + value, and needs to be at least 2 times that value when set. If this + timeout is hit, the batch size is reset to 1 so we can be sure that + batches stay below `GRAPH_STORE_BATCH_TARGET_DURATION` and the smaller + batch is retried. Value is in seconds and defaults to unlimited. - `GRAPH_START_BLOCK`: block hash:block number where the forked subgraph will start indexing at. - `GRAPH_FORK_BASE`: api url for where the graph node will fork from, use `https://api.thegraph.com/subgraphs/id/` for the hosted service. diff --git a/graph/src/env/store.rs b/graph/src/env/store.rs index 3b4e50ec87d..6d2f383133e 100644 --- a/graph/src/env/store.rs +++ b/graph/src/env/store.rs @@ -81,6 +81,10 @@ pub struct EnvVarsStore { /// The default is 180s. pub batch_target_duration: Duration, + /// Cancel and reset a batch copy operation if it takes longer than + /// this. Set by `GRAPH_STORE_BATCH_TIMEOUT`. Unlimited by default + pub batch_timeout: Option, + /// Prune tables where we will remove at least this fraction of entity /// versions by rebuilding the table. Set by /// `GRAPH_STORE_HISTORY_REBUILD_THRESHOLD`. The default is 0.5 @@ -168,6 +172,7 @@ impl From for EnvVarsStore { connection_idle_timeout: Duration::from_secs(x.connection_idle_timeout_in_secs), write_queue_size: x.write_queue_size, batch_target_duration: Duration::from_secs(x.batch_target_duration_in_secs), + batch_timeout: x.batch_timeout_in_secs.map(Duration::from_secs), rebuild_threshold: x.rebuild_threshold.0, delete_threshold: x.delete_threshold.0, history_slack_factor: x.history_slack_factor.0, @@ -222,6 +227,8 @@ pub struct InnerStore { write_queue_size: usize, #[envconfig(from = "GRAPH_STORE_BATCH_TARGET_DURATION", default = "180")] batch_target_duration_in_secs: u64, + #[envconfig(from = "GRAPH_STORE_BATCH_TIMEOUT")] + batch_timeout_in_secs: Option, #[envconfig(from = "GRAPH_STORE_HISTORY_REBUILD_THRESHOLD", default = "0.5")] rebuild_threshold: ZeroToOneF64, #[envconfig(from = "GRAPH_STORE_HISTORY_DELETE_THRESHOLD", default = "0.05")] diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index d92064c7f5c..80a596b7f2c 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -19,6 +19,7 @@ use std::{ }; use diesel::{ + connection::SimpleConnection as _, dsl::sql, insert_into, r2d2::{ConnectionManager, PooledConnection}, @@ -27,7 +28,7 @@ use diesel::{ }; use graph::{ constraint_violation, - prelude::{info, o, warn, BlockNumber, BlockPtr, Logger, StoreError}, + prelude::{info, lazy_static, o, warn, BlockNumber, BlockPtr, Logger, StoreError, ENV_VARS}, schema::EntityType, }; use itertools::Itertools; @@ -54,6 +55,13 @@ const ACCEPTABLE_REPLICATION_LAG: Duration = Duration::from_secs(30); /// the lag again const REPLICATION_SLEEP: Duration = Duration::from_secs(10); +lazy_static! { + static ref STATEMENT_TIMEOUT: Option = ENV_VARS + .store + .batch_timeout + .map(|duration| format!("set local statement_timeout={}", duration.as_millis())); +} + table! { subgraphs.copy_state(dst) { // deployment_schemas.id @@ -509,6 +517,22 @@ impl TableState { Ok(Status::Finished) } + + fn set_batch_size(&mut self, conn: &mut PgConnection, size: usize) -> Result<(), StoreError> { + use copy_table_state as cts; + + self.batcher.set_batch_size(size); + + update( + cts::table + .filter(cts::dst.eq(self.dst_site.id)) + .filter(cts::entity_type.eq(self.dst.object.as_str())), + ) + .set(cts::batch_size.eq(self.batcher.batch_size() as i64)) + .execute(conn)?; + + Ok(()) + } } // A helper for logging progress while data is being copied @@ -711,7 +735,44 @@ impl Connection { } } - let status = self.transaction(|conn| table.copy_batch(conn))?; + let status = { + loop { + match self.transaction(|conn| { + if let Some(timeout) = STATEMENT_TIMEOUT.as_ref() { + conn.batch_execute(timeout)?; + } + table.copy_batch(conn) + }) { + Ok(status) => { + break status; + } + Err(StoreError::StatementTimeout) => { + warn!( + logger, + "Current batch took longer than GRAPH_STORE_BATCH_TIMEOUT seconds. Retrying with a smaller batch size." + ); + } + Err(e) => { + return Err(e); + } + } + // We hit a timeout. Reset the batch size to 1. + // That's small enough that we will make _some_ + // progress, assuming the timeout is set to a + // reasonable value (several minutes) + // + // Our estimation of batch sizes is generally good + // and stays within the prescribed bounds, but there + // are cases where proper estimation of the batch + // size is nearly impossible since the size of the + // rows in the table jumps sharply at some point + // that is hard to predict. This mechanism ensures + // that if our estimation is wrong, the consequences + // aren't too severe. + self.transaction(|conn| table.set_batch_size(conn, 1))?; + } + }; + if status == Status::Cancelled { return Ok(status); } diff --git a/store/postgres/src/vid_batcher.rs b/store/postgres/src/vid_batcher.rs index 2a1c30e7889..ef5948efd06 100644 --- a/store/postgres/src/vid_batcher.rs +++ b/store/postgres/src/vid_batcher.rs @@ -233,6 +233,10 @@ impl VidBatcher { } } } + + pub(crate) fn set_batch_size(&mut self, size: usize) { + self.batch_size.size = size as i64; + } } #[derive(Copy, Clone, QueryableByName)] From cfc4d8d7913dae64e3b24613b5a22e84773551f8 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 28 Mar 2025 09:48:37 -0700 Subject: [PATCH 031/160] graph: Check that BATCH_TIMEOUT is big enough A value that's too small will just needlessly cause timeouts and slow down copies. --- graph/src/env/mod.rs | 4 ++-- graph/src/env/store.rs | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/graph/src/env/mod.rs b/graph/src/env/mod.rs index 01d09365c3b..48fa0ba4688 100644 --- a/graph/src/env/mod.rs +++ b/graph/src/env/mod.rs @@ -253,11 +253,11 @@ pub struct EnvVars { } impl EnvVars { - pub fn from_env() -> Result { + pub fn from_env() -> Result { let inner = Inner::init_from_env()?; let graphql = InnerGraphQl::init_from_env()?.into(); let mapping_handlers = InnerMappingHandlers::init_from_env()?.into(); - let store = InnerStore::init_from_env()?.into(); + let store = InnerStore::init_from_env()?.try_into()?; // The default reorganization (reorg) threshold is set to 250. // For testing purposes, we need to set this threshold to 0 because: diff --git a/graph/src/env/store.rs b/graph/src/env/store.rs index 6d2f383133e..4fb30f58079 100644 --- a/graph/src/env/store.rs +++ b/graph/src/env/store.rs @@ -142,9 +142,11 @@ impl fmt::Debug for EnvVarsStore { } } -impl From for EnvVarsStore { - fn from(x: InnerStore) -> Self { - Self { +impl TryFrom for EnvVarsStore { + type Error = anyhow::Error; + + fn try_from(x: InnerStore) -> Result { + let vars = Self { chain_head_watcher_timeout: Duration::from_secs(x.chain_head_watcher_timeout_in_secs), query_stats_refresh_interval: Duration::from_secs( x.query_stats_refresh_interval_in_secs, @@ -184,7 +186,15 @@ impl From for EnvVarsStore { last_rollup_from_poi: x.last_rollup_from_poi, insert_extra_cols: x.insert_extra_cols, fdw_fetch_size: x.fdw_fetch_size, + }; + if let Some(timeout) = vars.batch_timeout { + if timeout < 2 * vars.batch_target_duration { + bail!( + "GRAPH_STORE_BATCH_TIMEOUT must be greater than 2*GRAPH_STORE_BATCH_TARGET_DURATION" + ); + } } + Ok(vars) } } From a5ac766655e0eb9d6823b3e0ffd7c35d6c6cd999 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 28 Mar 2025 10:04:52 -0700 Subject: [PATCH 032/160] store: Factor the loop for copying a table into a method --- store/postgres/src/copy.rs | 149 +++++++++++++++++++++---------------- 1 file changed, 83 insertions(+), 66 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 80a596b7f2c..6e909906c43 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -710,75 +710,12 @@ impl Connection { progress.start(); for table in state.tables.iter_mut().filter(|table| !table.finished()) { - while !table.finished() { - // It is important that this check happens outside the write - // transaction so that we do not hold on to locks acquired - // by the check - if table.is_cancelled(&mut self.conn)? { + match self.copy_table(logger, &mut progress, table)? { + Status::Finished => { /* Move on to the next table */ } + Status::Cancelled => { return Ok(Status::Cancelled); } - - // Pause copying if replication is lagging behind to avoid - // overloading replicas - let mut lag = catalog::replication_lag(&mut self.conn)?; - if lag > MAX_REPLICATION_LAG { - loop { - info!(&self.logger, - "Replicas are lagging too much; pausing copying for {}s to allow them to catch up", - REPLICATION_SLEEP.as_secs(); - "lag_s" => lag.as_secs()); - std::thread::sleep(REPLICATION_SLEEP); - lag = catalog::replication_lag(&mut self.conn)?; - if lag <= ACCEPTABLE_REPLICATION_LAG { - break; - } - } - } - - let status = { - loop { - match self.transaction(|conn| { - if let Some(timeout) = STATEMENT_TIMEOUT.as_ref() { - conn.batch_execute(timeout)?; - } - table.copy_batch(conn) - }) { - Ok(status) => { - break status; - } - Err(StoreError::StatementTimeout) => { - warn!( - logger, - "Current batch took longer than GRAPH_STORE_BATCH_TIMEOUT seconds. Retrying with a smaller batch size." - ); - } - Err(e) => { - return Err(e); - } - } - // We hit a timeout. Reset the batch size to 1. - // That's small enough that we will make _some_ - // progress, assuming the timeout is set to a - // reasonable value (several minutes) - // - // Our estimation of batch sizes is generally good - // and stays within the prescribed bounds, but there - // are cases where proper estimation of the batch - // size is nearly impossible since the size of the - // rows in the table jumps sharply at some point - // that is hard to predict. This mechanism ensures - // that if our estimation is wrong, the consequences - // aren't too severe. - self.transaction(|conn| table.set_batch_size(conn, 1))?; - } - }; - - if status == Status::Cancelled { - return Ok(status); - } - progress.update(&table.dst.object, &table.batcher); } - progress.table_finished(&table.batcher); } // Create indexes for all the attributes that were postponed at the start of @@ -828,6 +765,86 @@ impl Connection { Ok(Status::Finished) } + fn copy_table( + &mut self, + logger: &Logger, + progress: &mut CopyProgress<'_>, + table: &mut TableState, + ) -> Result { + use Status::*; + + while !table.finished() { + // It is important that this check happens outside the write + // transaction so that we do not hold on to locks acquired + // by the check + if table.is_cancelled(&mut self.conn)? { + return Ok(Cancelled); + } + + // Pause copying if replication is lagging behind to avoid + // overloading replicas + let mut lag = catalog::replication_lag(&mut self.conn)?; + if lag > MAX_REPLICATION_LAG { + loop { + info!(&self.logger, + "Replicas are lagging too much; pausing copying for {}s to allow them to catch up", + REPLICATION_SLEEP.as_secs(); + "lag_s" => lag.as_secs()); + std::thread::sleep(REPLICATION_SLEEP); + lag = catalog::replication_lag(&mut self.conn)?; + if lag <= ACCEPTABLE_REPLICATION_LAG { + break; + } + } + } + + let status = { + loop { + match self.transaction(|conn| { + if let Some(timeout) = STATEMENT_TIMEOUT.as_ref() { + conn.batch_execute(timeout)?; + } + table.copy_batch(conn) + }) { + Ok(status) => { + break status; + } + Err(StoreError::StatementTimeout) => { + warn!( + logger, + "Current batch took longer than GRAPH_STORE_BATCH_TIMEOUT seconds. Retrying with a smaller batch size." + ); + } + Err(e) => { + return Err(e); + } + } + // We hit a timeout. Reset the batch size to 1. + // That's small enough that we will make _some_ + // progress, assuming the timeout is set to a + // reasonable value (several minutes) + // + // Our estimation of batch sizes is generally good + // and stays within the prescribed bounds, but there + // are cases where proper estimation of the batch + // size is nearly impossible since the size of the + // rows in the table jumps sharply at some point + // that is hard to predict. This mechanism ensures + // that if our estimation is wrong, the consequences + // aren't too severe. + self.transaction(|conn| table.set_batch_size(conn, 1))?; + } + }; + + if status == Cancelled { + return Ok(Cancelled); + } + progress.update(&table.dst.object, &table.batcher); + } + progress.table_finished(&table.batcher); + Ok(Finished) + } + /// Copy the data for the subgraph `src` to the subgraph `dst`. The /// schema for both subgraphs must have already been set up. The /// `target_block` must be far enough behind the chain head so that the From 2efe3a47bc72abd5a25147cd3b70a48f80ef022a Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 28 Mar 2025 10:40:06 -0700 Subject: [PATCH 033/160] store: Make copy_table a free-standing function --- store/postgres/src/copy.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 6e909906c43..758ec98cedf 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -122,7 +122,6 @@ pub enum Status { Cancelled, } -#[allow(dead_code)] struct CopyState { src: Arc, dst: Arc, @@ -710,7 +709,7 @@ impl Connection { progress.start(); for table in state.tables.iter_mut().filter(|table| !table.finished()) { - match self.copy_table(logger, &mut progress, table)? { + match Self::copy_table(&mut self.conn, logger, &mut progress, table)? { Status::Finished => { /* Move on to the next table */ } Status::Cancelled => { return Ok(Status::Cancelled); @@ -766,7 +765,7 @@ impl Connection { } fn copy_table( - &mut self, + conn: &mut PgConnection, logger: &Logger, progress: &mut CopyProgress<'_>, table: &mut TableState, @@ -777,21 +776,21 @@ impl Connection { // It is important that this check happens outside the write // transaction so that we do not hold on to locks acquired // by the check - if table.is_cancelled(&mut self.conn)? { + if table.is_cancelled(conn)? { return Ok(Cancelled); } // Pause copying if replication is lagging behind to avoid // overloading replicas - let mut lag = catalog::replication_lag(&mut self.conn)?; + let mut lag = catalog::replication_lag(conn)?; if lag > MAX_REPLICATION_LAG { loop { - info!(&self.logger, + info!(logger, "Replicas are lagging too much; pausing copying for {}s to allow them to catch up", REPLICATION_SLEEP.as_secs(); "lag_s" => lag.as_secs()); std::thread::sleep(REPLICATION_SLEEP); - lag = catalog::replication_lag(&mut self.conn)?; + lag = catalog::replication_lag(conn)?; if lag <= ACCEPTABLE_REPLICATION_LAG { break; } @@ -800,7 +799,7 @@ impl Connection { let status = { loop { - match self.transaction(|conn| { + match conn.transaction(|conn| { if let Some(timeout) = STATEMENT_TIMEOUT.as_ref() { conn.batch_execute(timeout)?; } @@ -832,7 +831,7 @@ impl Connection { // that is hard to predict. This mechanism ensures // that if our estimation is wrong, the consequences // aren't too severe. - self.transaction(|conn| table.set_batch_size(conn, 1))?; + conn.transaction(|conn| table.set_batch_size(conn, 1))?; } }; From 9cfafa31bfbf2b83061dab67d1192dd5d4d7f83f Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 28 Mar 2025 12:29:01 -0700 Subject: [PATCH 034/160] store: Asyncify subgraph start, and thereby copying --- store/postgres/src/copy.rs | 9 ++++++--- store/postgres/src/deployment_store.rs | 4 ++-- store/postgres/src/writable.rs | 6 ++++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 758ec98cedf..2582237a827 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -698,7 +698,10 @@ impl Connection { Ok(()) } - pub fn copy_data_internal(&mut self, index_list: IndexList) -> Result { + pub async fn copy_data_internal( + &mut self, + index_list: IndexList, + ) -> Result { let src = self.src.clone(); let dst = self.dst.clone(); let target_block = self.target_block.clone(); @@ -860,7 +863,7 @@ impl Connection { /// lower(v1.block_range) => v2.vid > v1.vid` and we can therefore stop /// the copying of each table as soon as we hit `max_vid = max { v.vid | /// lower(v.block_range) <= target_block.number }`. - pub fn copy_data(&mut self, index_list: IndexList) -> Result { + pub async fn copy_data(&mut self, index_list: IndexList) -> Result { // We require sole access to the destination site, and that we get a // consistent view of what has been copied so far. In general, that // is always true. It can happen though that this function runs when @@ -874,7 +877,7 @@ impl Connection { "Obtaining copy lock (this might take a long time if another process is still copying)" ); advisory_lock::lock_copying(&mut self.conn, self.dst.site.as_ref())?; - let res = self.copy_data_internal(index_list); + let res = self.copy_data_internal(index_list).await; advisory_lock::unlock_copying(&mut self.conn, self.dst.site.as_ref())?; if matches!(res, Ok(Status::Cancelled)) { warn!(&self.logger, "Copying was cancelled and is incomplete"); diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index 01f705158d3..248ba5a5473 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -1491,7 +1491,7 @@ impl DeploymentStore { /// to the graph point, so that calling this needlessly with `Some(..)` /// will remove any progress that might have been made since the last /// time the deployment was started. - pub(crate) fn start_subgraph( + pub(crate) async fn start_subgraph( &self, logger: &Logger, site: Arc, @@ -1528,7 +1528,7 @@ impl DeploymentStore { src_manifest_idx_and_name, dst_manifest_idx_and_name, )?; - let status = copy_conn.copy_data(index_list)?; + let status = copy_conn.copy_data(index_list).await?; if status == crate::copy::Status::Cancelled { return Err(StoreError::Canceled); } diff --git a/store/postgres/src/writable.rs b/store/postgres/src/writable.rs index 07d116790c0..26e559bcbc9 100644 --- a/store/postgres/src/writable.rs +++ b/store/postgres/src/writable.rs @@ -220,8 +220,10 @@ impl SyncStore { } None => None, }; - self.writable - .start_subgraph(logger, self.site.clone(), graft_base)?; + graph::block_on( + self.writable + .start_subgraph(logger, self.site.clone(), graft_base), + )?; self.store.primary_conn()?.copy_finished(self.site.as_ref()) }) } From d6e337c2ddeb954c57624ac0e84d353b698d24ac Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 28 Mar 2025 12:52:01 -0700 Subject: [PATCH 035/160] store: Do not access conn in copy::Connection directly We leave one place where it is accessed, but in all other places we go through copy::Connection::transaction; the reason will become apparent in the next commit(s) --- store/postgres/src/copy.rs | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 2582237a827..3bcfd6ecb03 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -13,7 +13,6 @@ //! `graph-node` was restarted while the copy was running. use std::{ convert::TryFrom, - ops::DerefMut, sync::Arc, time::{Duration, Instant}, }; @@ -28,7 +27,9 @@ use diesel::{ }; use graph::{ constraint_violation, - prelude::{info, lazy_static, o, warn, BlockNumber, BlockPtr, Logger, StoreError, ENV_VARS}, + prelude::{ + info, lazy_static, o, warn, BlockNumber, BlockPtr, CheapClone, Logger, StoreError, ENV_VARS, + }, schema::EntityType, }; use itertools::Itertools; @@ -623,8 +624,8 @@ pub struct Connection { src: Arc, dst: Arc, target_block: BlockPtr, - src_manifest_idx_and_name: Vec<(i32, String)>, - dst_manifest_idx_and_name: Vec<(i32, String)>, + src_manifest_idx_and_name: Arc>, + dst_manifest_idx_and_name: Arc>, } impl Connection { @@ -661,6 +662,8 @@ impl Connection { } false })?; + let src_manifest_idx_and_name = Arc::new(src_manifest_idx_and_name); + let dst_manifest_idx_and_name = Arc::new(dst_manifest_idx_and_name); Ok(Self { logger, conn, @@ -683,15 +686,16 @@ impl Connection { /// has a private data sources table. The copying is done in its own /// transaction. fn copy_private_data_sources(&mut self, state: &CopyState) -> Result<(), StoreError> { + let src_manifest_idx_and_name = self.src_manifest_idx_and_name.cheap_clone(); + let dst_manifest_idx_and_name = self.dst_manifest_idx_and_name.cheap_clone(); if state.src.site.schema_version.private_data_sources() { - let conn = &mut self.conn; - conn.transaction(|conn| { + self.transaction(|conn| { DataSourcesTable::new(state.src.site.namespace.clone()).copy_to( conn, &DataSourcesTable::new(state.dst.site.namespace.clone()), state.target_block.number, - &self.src_manifest_idx_and_name, - &self.dst_manifest_idx_and_name, + &src_manifest_idx_and_name, + &dst_manifest_idx_and_name, ) })?; } @@ -723,7 +727,6 @@ impl Connection { // Create indexes for all the attributes that were postponed at the start of // the copy/graft operations. // First recreate the indexes that existed in the original subgraph. - let conn = self.conn.deref_mut(); for table in state.tables.iter() { let arr = index_list.indexes_for_table( &self.dst.site.namespace, @@ -736,7 +739,7 @@ impl Connection { for (_, sql) in arr { let query = sql_query(format!("{};", sql)); - query.execute(conn)?; + self.transaction(|conn| query.execute(conn).map_err(StoreError::from))?; } } @@ -755,7 +758,7 @@ impl Connection { .into_iter() { let query = sql_query(sql); - query.execute(conn)?; + self.transaction(|conn| query.execute(conn).map_err(StoreError::from))?; } } @@ -876,9 +879,10 @@ impl Connection { &self.logger, "Obtaining copy lock (this might take a long time if another process is still copying)" ); - advisory_lock::lock_copying(&mut self.conn, self.dst.site.as_ref())?; + let dst_site = self.dst.site.cheap_clone(); + self.transaction(|conn| advisory_lock::lock_copying(conn, &dst_site))?; let res = self.copy_data_internal(index_list).await; - advisory_lock::unlock_copying(&mut self.conn, self.dst.site.as_ref())?; + self.transaction(|conn| advisory_lock::unlock_copying(conn, &dst_site))?; if matches!(res, Ok(Status::Cancelled)) { warn!(&self.logger, "Copying was cancelled and is incomplete"); } From 6c8016aeafac4ef3f081b8b9696c4f8d1cda7771 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 28 Mar 2025 13:02:42 -0700 Subject: [PATCH 036/160] store: Allow running copy_table without holding a reference to self --- store/postgres/src/copy.rs | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 3bcfd6ecb03..ea954af2cf8 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -56,6 +56,8 @@ const ACCEPTABLE_REPLICATION_LAG: Duration = Duration::from_secs(30); /// the lag again const REPLICATION_SLEEP: Duration = Duration::from_secs(10); +type PooledPgConnection = PooledConnection>; + lazy_static! { static ref STATEMENT_TIMEOUT: Option = ENV_VARS .store @@ -620,7 +622,17 @@ pub struct Connection { /// The connection pool for the shard that will contain the destination /// of the copy logger: Logger, - conn: PooledConnection>, + /// We always have one database connection to make sure that copy jobs, + /// once started, can eventually finished so that we don't have + /// different copy jobs that are all half done and have to wait for + /// other jobs to finish + /// + /// This is an `Option` because we need to take this connection out of + /// `self` at some point to spawn a background task to copy an + /// individual table. Except for that case, this will always be + /// `Some(..)`. Most code shouldn't access `self.conn` directly, but use + /// `self.transaction` + conn: Option, src: Arc, dst: Arc, target_block: BlockPtr, @@ -662,6 +674,7 @@ impl Connection { } false })?; + let conn = Some(conn); let src_manifest_idx_and_name = Arc::new(src_manifest_idx_and_name); let dst_manifest_idx_and_name = Arc::new(dst_manifest_idx_and_name); Ok(Self { @@ -679,7 +692,12 @@ impl Connection { where F: FnOnce(&mut PgConnection) -> Result, { - self.conn.transaction(|conn| f(conn)) + let Some(conn) = self.conn.as_mut() else { + return Err(constraint_violation!( + "copy connection has been handed to background task but not returned yet" + )); + }; + conn.transaction(|conn| f(conn)) } /// Copy private data sources if the source uses a schema version that @@ -716,7 +734,15 @@ impl Connection { progress.start(); for table in state.tables.iter_mut().filter(|table| !table.finished()) { - match Self::copy_table(&mut self.conn, logger, &mut progress, table)? { + // Take self.conn to decouple it from self, copy the table and + // put the connection back + let mut conn = self.conn.take().ok_or_else(|| { + constraint_violation!("copy connection is not where it is supposed to be") + })?; + let res = Self::copy_table(&mut conn, logger, &mut progress, table); + self.conn = Some(conn); + + match res? { Status::Finished => { /* Move on to the next table */ } Status::Cancelled => { return Ok(Status::Cancelled); From 2665dca412d7856310b6ff633a4bd57b7ec19520 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 28 Mar 2025 13:12:28 -0700 Subject: [PATCH 037/160] store: Remove lifetime from CopyProgress --- store/postgres/src/copy.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index ea954af2cf8..8ff63bdf42f 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -538,8 +538,8 @@ impl TableState { } // A helper for logging progress while data is being copied -struct CopyProgress<'a> { - logger: &'a Logger, +struct CopyProgress { + logger: Logger, last_log: Instant, src: Arc, dst: Arc, @@ -547,8 +547,8 @@ struct CopyProgress<'a> { target_vid: i64, } -impl<'a> CopyProgress<'a> { - fn new(logger: &'a Logger, state: &CopyState) -> Self { +impl CopyProgress { + fn new(logger: Logger, state: &CopyState) -> Self { let target_vid: i64 = state .tables .iter() @@ -730,7 +730,7 @@ impl Connection { let mut state = self.transaction(|conn| CopyState::new(conn, src, dst, target_block))?; let logger = &self.logger.clone(); - let mut progress = CopyProgress::new(logger, &state); + let mut progress = CopyProgress::new(self.logger.cheap_clone(), &state); progress.start(); for table in state.tables.iter_mut().filter(|table| !table.finished()) { @@ -799,7 +799,7 @@ impl Connection { fn copy_table( conn: &mut PgConnection, logger: &Logger, - progress: &mut CopyProgress<'_>, + progress: &mut CopyProgress, table: &mut TableState, ) -> Result { use Status::*; From e09f6bb98768b36626dfe55df6d69da7b8507700 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 28 Mar 2025 13:33:37 -0700 Subject: [PATCH 038/160] store: Change CopyProgress to interior mutability --- store/postgres/src/copy.rs | 45 +++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 8ff63bdf42f..88b79e162b7 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -13,7 +13,10 @@ //! `graph-node` was restarted while the copy was running. use std::{ convert::TryFrom, - sync::Arc, + sync::{ + atomic::{AtomicI64, Ordering}, + Arc, Mutex, + }, time::{Duration, Instant}, }; @@ -540,10 +543,11 @@ impl TableState { // A helper for logging progress while data is being copied struct CopyProgress { logger: Logger, - last_log: Instant, + last_log: Arc>, src: Arc, dst: Arc, - current_vid: i64, + /// The sum of all `target_vid` of tables that have finished + current_vid: AtomicI64, target_vid: i64, } @@ -559,9 +563,10 @@ impl CopyProgress { .iter() .map(|table| table.batcher.next_vid()) .sum(); + let current_vid = AtomicI64::new(current_vid); Self { logger, - last_log: Instant::now(), + last_log: Arc::new(Mutex::new(Instant::now())), src: state.src.site.clone(), dst: state.dst.site.clone(), current_vid, @@ -590,8 +595,21 @@ impl CopyProgress { } } - fn update(&mut self, entity_type: &EntityType, batcher: &VidBatcher) { - if self.last_log.elapsed() > LOG_INTERVAL { + fn update(&self, entity_type: &EntityType, batcher: &VidBatcher) { + let mut last_log = self.last_log.lock().unwrap_or_else(|err| { + // Better to clear the poison error and skip a log message than + // crash for no important reason + warn!( + self.logger, + "Lock for progress locking was poisoned, skipping a log message" + ); + let mut last_log = err.into_inner(); + *last_log = Instant::now(); + self.last_log.clear_poison(); + last_log + }); + if last_log.elapsed() > LOG_INTERVAL { + let total_current_vid = self.current_vid.load(Ordering::SeqCst) + batcher.next_vid(); info!( self.logger, "Copied {:.2}% of `{}` entities ({}/{} entity versions), {:.2}% of overall data", @@ -599,14 +617,15 @@ impl CopyProgress { entity_type, batcher.next_vid(), batcher.target_vid(), - Self::progress_pct(self.current_vid + batcher.next_vid(), self.target_vid) + Self::progress_pct(total_current_vid, self.target_vid) ); - self.last_log = Instant::now(); + *last_log = Instant::now(); } } - fn table_finished(&mut self, batcher: &VidBatcher) { - self.current_vid += batcher.next_vid(); + fn table_finished(&self, batcher: &VidBatcher) { + self.current_vid + .fetch_add(batcher.next_vid(), Ordering::SeqCst); } fn finished(&self) { @@ -730,7 +749,7 @@ impl Connection { let mut state = self.transaction(|conn| CopyState::new(conn, src, dst, target_block))?; let logger = &self.logger.clone(); - let mut progress = CopyProgress::new(self.logger.cheap_clone(), &state); + let progress = Arc::new(CopyProgress::new(self.logger.cheap_clone(), &state)); progress.start(); for table in state.tables.iter_mut().filter(|table| !table.finished()) { @@ -739,7 +758,7 @@ impl Connection { let mut conn = self.conn.take().ok_or_else(|| { constraint_violation!("copy connection is not where it is supposed to be") })?; - let res = Self::copy_table(&mut conn, logger, &mut progress, table); + let res = Self::copy_table(&mut conn, logger, progress.cheap_clone(), table); self.conn = Some(conn); match res? { @@ -799,7 +818,7 @@ impl Connection { fn copy_table( conn: &mut PgConnection, logger: &Logger, - progress: &mut CopyProgress, + progress: Arc, table: &mut TableState, ) -> Result { use Status::*; From 8d2697f6fd9e748ad138af5b31ba95d4a4828a75 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 28 Mar 2025 15:00:51 -0700 Subject: [PATCH 039/160] store: Change how we iterate over all unfinished tables --- store/postgres/src/copy.rs | 39 +++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 88b79e162b7..a7b683b57a1 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -132,7 +132,8 @@ struct CopyState { src: Arc, dst: Arc, target_block: BlockPtr, - tables: Vec, + finished: Vec, + unfinished: Vec, } impl CopyState { @@ -191,11 +192,13 @@ impl CopyState { target_block: BlockPtr, ) -> Result { let tables = TableState::load(conn, src.as_ref(), dst.as_ref())?; + let (finished, unfinished) = tables.into_iter().partition(|table| table.finished()); Ok(CopyState { src, dst, target_block, - tables, + finished, + unfinished, }) } @@ -217,7 +220,7 @@ impl CopyState { )) .execute(conn)?; - let mut tables: Vec<_> = dst + let mut unfinished: Vec<_> = dst .tables .values() .filter_map(|dst_table| { @@ -235,9 +238,9 @@ impl CopyState { }) }) .collect::>()?; - tables.sort_by_key(|table| table.dst.object.to_string()); + unfinished.sort_by_key(|table| table.dst.object.to_string()); - let values = tables + let values = unfinished .iter() .map(|table| { ( @@ -255,7 +258,8 @@ impl CopyState { src, dst, target_block, - tables, + finished: Vec::new(), + unfinished, }) } @@ -299,6 +303,10 @@ impl CopyState { } Ok(()) } + + fn all_tables(&self) -> impl Iterator { + self.finished.iter().chain(self.unfinished.iter()) + } } pub(crate) fn source( @@ -554,13 +562,12 @@ struct CopyProgress { impl CopyProgress { fn new(logger: Logger, state: &CopyState) -> Self { let target_vid: i64 = state - .tables - .iter() + .all_tables() .map(|table| table.batcher.target_vid()) .sum(); let current_vid = state - .tables - .iter() + .all_tables() + .filter(|table| table.finished()) .map(|table| table.batcher.next_vid()) .sum(); let current_vid = AtomicI64::new(current_vid); @@ -752,17 +759,19 @@ impl Connection { let progress = Arc::new(CopyProgress::new(self.logger.cheap_clone(), &state)); progress.start(); - for table in state.tables.iter_mut().filter(|table| !table.finished()) { + while let Some(mut table) = state.unfinished.pop() { // Take self.conn to decouple it from self, copy the table and // put the connection back let mut conn = self.conn.take().ok_or_else(|| { constraint_violation!("copy connection is not where it is supposed to be") })?; - let res = Self::copy_table(&mut conn, logger, progress.cheap_clone(), table); + let res = Self::copy_table(&mut conn, logger, progress.cheap_clone(), &mut table); self.conn = Some(conn); match res? { - Status::Finished => { /* Move on to the next table */ } + Status::Finished => { + state.finished.push(table); + } Status::Cancelled => { return Ok(Status::Cancelled); } @@ -772,7 +781,7 @@ impl Connection { // Create indexes for all the attributes that were postponed at the start of // the copy/graft operations. // First recreate the indexes that existed in the original subgraph. - for table in state.tables.iter() { + for table in state.all_tables() { let arr = index_list.indexes_for_table( &self.dst.site.namespace, &table.src.name.to_string(), @@ -790,7 +799,7 @@ impl Connection { // Second create the indexes for the new fields. // Here we need to skip those created in the first step for the old fields. - for table in state.tables.iter() { + for table in state.all_tables() { let orig_colums = table .src .columns From 01a7eccb88399b0e8841b4e825d57e31e82e9896 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 28 Mar 2025 15:35:56 -0700 Subject: [PATCH 040/160] store: Rewrite copy loop so that we could run parallel copies For now, we still only copy one table at a time, but the code is closer to what we need to copy multiple tables concurrently --- store/postgres/src/copy.rs | 235 ++++++++++++++++++++++--------------- 1 file changed, 142 insertions(+), 93 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index a7b683b57a1..74f80252c13 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -14,7 +14,7 @@ use std::{ convert::TryFrom, sync::{ - atomic::{AtomicI64, Ordering}, + atomic::{AtomicBool, AtomicI64, Ordering}, Arc, Mutex, }, time::{Duration, Instant}, @@ -30,6 +30,7 @@ use diesel::{ }; use graph::{ constraint_violation, + futures03::future::select_all, prelude::{ info, lazy_static, o, warn, BlockNumber, BlockPtr, CheapClone, Logger, StoreError, ENV_VARS, }, @@ -548,7 +549,8 @@ impl TableState { } } -// A helper for logging progress while data is being copied +// A helper for logging progress while data is being copied and +// communicating across all copy workers struct CopyProgress { logger: Logger, last_log: Arc>, @@ -557,6 +559,7 @@ struct CopyProgress { /// The sum of all `target_vid` of tables that have finished current_vid: AtomicI64, target_vid: i64, + cancelled: AtomicBool, } impl CopyProgress { @@ -578,6 +581,7 @@ impl CopyProgress { dst: state.dst.site.clone(), current_vid, target_vid, + cancelled: AtomicBool::new(false), } } @@ -641,6 +645,120 @@ impl CopyProgress { "Finished copying data into {}[{}]", self.dst.deployment, self.dst.namespace ); } + + fn cancel(&self) { + self.cancelled.store(true, Ordering::SeqCst); + } + + fn is_cancelled(&self) -> bool { + self.cancelled.load(Ordering::SeqCst) + } +} + +/// A helper to run copying of one table. We need to thread `conn` and +/// `table` from the control loop to the background worker and back again to +/// the control loop. This worker facilitates that +struct CopyTableWorker { + conn: PooledPgConnection, + table: TableState, + result: Result, +} + +impl CopyTableWorker { + fn new(conn: PooledPgConnection, table: TableState) -> Self { + Self { + conn, + table, + result: Ok(Status::Cancelled), + } + } + + async fn run(mut self, logger: Logger, progress: Arc) -> Self { + self.result = self.run_inner(logger, &progress); + self + } + + fn run_inner(&mut self, logger: Logger, progress: &CopyProgress) -> Result { + use Status::*; + + let conn = &mut self.conn; + while !self.table.finished() { + // It is important that this check happens outside the write + // transaction so that we do not hold on to locks acquired + // by the check + if self.table.is_cancelled(conn)? || progress.is_cancelled() { + progress.cancel(); + return Ok(Cancelled); + } + + // Pause copying if replication is lagging behind to avoid + // overloading replicas + let mut lag = catalog::replication_lag(conn)?; + if lag > MAX_REPLICATION_LAG { + loop { + info!(logger, + "Replicas are lagging too much; pausing copying for {}s to allow them to catch up", + REPLICATION_SLEEP.as_secs(); + "lag_s" => lag.as_secs()); + std::thread::sleep(REPLICATION_SLEEP); + lag = catalog::replication_lag(conn)?; + if lag <= ACCEPTABLE_REPLICATION_LAG { + break; + } + } + } + + let status = { + loop { + if progress.is_cancelled() { + break Cancelled; + } + + match conn.transaction(|conn| { + if let Some(timeout) = STATEMENT_TIMEOUT.as_ref() { + conn.batch_execute(timeout)?; + } + self.table.copy_batch(conn) + }) { + Ok(status) => { + break status; + } + Err(StoreError::StatementTimeout) => { + warn!( + logger, + "Current batch took longer than GRAPH_STORE_BATCH_TIMEOUT seconds. Retrying with a smaller batch size." + ); + } + Err(e) => { + return Err(e); + } + } + // We hit a timeout. Reset the batch size to 1. + // That's small enough that we will make _some_ + // progress, assuming the timeout is set to a + // reasonable value (several minutes) + // + // Our estimation of batch sizes is generally good + // and stays within the prescribed bounds, but there + // are cases where proper estimation of the batch + // size is nearly impossible since the size of the + // rows in the table jumps sharply at some point + // that is hard to predict. This mechanism ensures + // that if our estimation is wrong, the consequences + // aren't too severe. + conn.transaction(|conn| self.table.set_batch_size(conn, 1))?; + } + }; + + if status == Cancelled { + progress.cancel(); + return Ok(Cancelled); + } + progress.update(&self.table.dst.object, &self.table.batcher); + } + progress.table_finished(&self.table.batcher); + Ok(Finished) + } } /// A helper for copying subgraphs @@ -759,22 +877,33 @@ impl Connection { let progress = Arc::new(CopyProgress::new(self.logger.cheap_clone(), &state)); progress.start(); - while let Some(mut table) = state.unfinished.pop() { + let mut workers = Vec::new(); + while let Some(table) = state.unfinished.pop() { // Take self.conn to decouple it from self, copy the table and // put the connection back - let mut conn = self.conn.take().ok_or_else(|| { + let conn = self.conn.take().ok_or_else(|| { constraint_violation!("copy connection is not where it is supposed to be") })?; - let res = Self::copy_table(&mut conn, logger, progress.cheap_clone(), &mut table); - self.conn = Some(conn); - match res? { - Status::Finished => { - state.finished.push(table); - } - Status::Cancelled => { - return Ok(Status::Cancelled); - } + let worker = CopyTableWorker::new(conn, table); + let fut = Box::pin(worker.run(logger.cheap_clone(), progress.cheap_clone())); + + workers.push(fut); + let (worker, _idx, remaining) = select_all(workers).await; + workers = remaining; + + // Put the connection back into self.conn so that we can use it + // in the next iteration + self.conn = Some(worker.conn); + state.finished.push(worker.table); + + if worker.result.is_err() { + progress.cancel(); + return worker.result; + } + + if progress.is_cancelled() { + return Ok(Status::Cancelled); } } @@ -824,86 +953,6 @@ impl Connection { Ok(Status::Finished) } - fn copy_table( - conn: &mut PgConnection, - logger: &Logger, - progress: Arc, - table: &mut TableState, - ) -> Result { - use Status::*; - - while !table.finished() { - // It is important that this check happens outside the write - // transaction so that we do not hold on to locks acquired - // by the check - if table.is_cancelled(conn)? { - return Ok(Cancelled); - } - - // Pause copying if replication is lagging behind to avoid - // overloading replicas - let mut lag = catalog::replication_lag(conn)?; - if lag > MAX_REPLICATION_LAG { - loop { - info!(logger, - "Replicas are lagging too much; pausing copying for {}s to allow them to catch up", - REPLICATION_SLEEP.as_secs(); - "lag_s" => lag.as_secs()); - std::thread::sleep(REPLICATION_SLEEP); - lag = catalog::replication_lag(conn)?; - if lag <= ACCEPTABLE_REPLICATION_LAG { - break; - } - } - } - - let status = { - loop { - match conn.transaction(|conn| { - if let Some(timeout) = STATEMENT_TIMEOUT.as_ref() { - conn.batch_execute(timeout)?; - } - table.copy_batch(conn) - }) { - Ok(status) => { - break status; - } - Err(StoreError::StatementTimeout) => { - warn!( - logger, - "Current batch took longer than GRAPH_STORE_BATCH_TIMEOUT seconds. Retrying with a smaller batch size." - ); - } - Err(e) => { - return Err(e); - } - } - // We hit a timeout. Reset the batch size to 1. - // That's small enough that we will make _some_ - // progress, assuming the timeout is set to a - // reasonable value (several minutes) - // - // Our estimation of batch sizes is generally good - // and stays within the prescribed bounds, but there - // are cases where proper estimation of the batch - // size is nearly impossible since the size of the - // rows in the table jumps sharply at some point - // that is hard to predict. This mechanism ensures - // that if our estimation is wrong, the consequences - // aren't too severe. - conn.transaction(|conn| table.set_batch_size(conn, 1))?; - } - }; - - if status == Cancelled { - return Ok(Cancelled); - } - progress.update(&table.dst.object, &table.batcher); - } - progress.table_finished(&table.batcher); - Ok(Finished) - } - /// Copy the data for the subgraph `src` to the subgraph `dst`. The /// schema for both subgraphs must have already been set up. The /// `target_block` must be far enough behind the chain head so that the From 3828ed710c42cbaadf4b7add3ff268acaa6a368f Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 28 Mar 2025 16:13:17 -0700 Subject: [PATCH 041/160] store: Factor creating a worker into a helper --- store/postgres/src/copy.rs | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 74f80252c13..e022366a091 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -13,6 +13,8 @@ //! `graph-node` was restarted while the copy was running. use std::{ convert::TryFrom, + future::Future, + pin::Pin, sync::{ atomic::{AtomicBool, AtomicI64, Ordering}, Arc, Mutex, @@ -864,6 +866,20 @@ impl Connection { Ok(()) } + fn default_worker( + &mut self, + state: &mut CopyState, + progress: Arc, + ) -> Option>>> { + let conn = self.conn.take()?; + let table = state.unfinished.pop()?; + + let worker = CopyTableWorker::new(conn, table); + Some(Box::pin( + worker.run(self.logger.cheap_clone(), progress.cheap_clone()), + )) + } + pub async fn copy_data_internal( &mut self, index_list: IndexList, @@ -873,22 +889,15 @@ impl Connection { let target_block = self.target_block.clone(); let mut state = self.transaction(|conn| CopyState::new(conn, src, dst, target_block))?; - let logger = &self.logger.clone(); let progress = Arc::new(CopyProgress::new(self.logger.cheap_clone(), &state)); progress.start(); let mut workers = Vec::new(); - while let Some(table) = state.unfinished.pop() { - // Take self.conn to decouple it from self, copy the table and - // put the connection back - let conn = self.conn.take().ok_or_else(|| { - constraint_violation!("copy connection is not where it is supposed to be") - })?; - - let worker = CopyTableWorker::new(conn, table); - let fut = Box::pin(worker.run(logger.cheap_clone(), progress.cheap_clone())); + while !state.unfinished.is_empty() && !workers.is_empty() { + if let Some(worker) = self.default_worker(&mut state, progress.cheap_clone()) { + workers.push(worker); + } - workers.push(fut); let (worker, _idx, remaining) = select_all(workers).await; workers = remaining; From 716138ac2ccd2194bb1ba04cd9c700655c913beb Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 28 Mar 2025 16:43:06 -0700 Subject: [PATCH 042/160] store: Copy multiple tables in parallel if there are idle connections --- store/postgres/src/connection_pool.rs | 44 +++++++++++--- store/postgres/src/copy.rs | 83 ++++++++++++++++++++++++--- 2 files changed, 109 insertions(+), 18 deletions(-) diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index 6267a41628a..f710fd2316d 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -524,6 +524,15 @@ impl ConnectionPool { self.get_ready()?.get_fdw(logger, timeout) } + /// Get a connection from the pool for foreign data wrapper access if + /// one is available + pub fn try_get_fdw( + &self, + logger: &Logger, + ) -> Result>>, StoreError> { + self.get_ready()?.try_get_fdw(logger) + } + pub fn connection_detail(&self) -> Result { let pool = self.get_ready()?; ForeignServer::new(pool.shard.clone(), &pool.postgres_url).map_err(|e| e.into()) @@ -980,6 +989,23 @@ impl PoolInner { } } + /// Get the pool for fdw connections. It is an error if none is configured + fn fdw_pool( + &self, + logger: &Logger, + ) -> Result<&Pool>, StoreError> { + let pool = match &self.fdw_pool { + Some(pool) => pool, + None => { + const MSG: &str = + "internal error: trying to get fdw connection on a pool that doesn't have any"; + error!(logger, "{}", MSG); + return Err(constraint_violation!(MSG)); + } + }; + Ok(pool) + } + /// Get a connection from the pool for foreign data wrapper access; /// since that pool can be very contended, periodically log that we are /// still waiting for a connection @@ -995,15 +1021,7 @@ impl PoolInner { where F: FnMut() -> bool, { - let pool = match &self.fdw_pool { - Some(pool) => pool, - None => { - const MSG: &str = - "internal error: trying to get fdw connection on a pool that doesn't have any"; - error!(logger, "{}", MSG); - return Err(constraint_violation!(MSG)); - } - }; + let pool = self.fdw_pool(logger)?; loop { match pool.get() { Ok(conn) => return Ok(conn), @@ -1016,6 +1034,14 @@ impl PoolInner { } } + /// Get a connection from the fdw pool if one is available + pub fn try_get_fdw( + &self, + logger: &Logger, + ) -> Result>>, StoreError> { + Ok(self.fdw_pool(logger)?.try_get()) + } + pub fn connection_detail(&self) -> Result { ForeignServer::new(self.shard.clone(), &self.postgres_url).map_err(|e| e.into()) } diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index e022366a091..c8079adc52e 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -598,6 +598,16 @@ impl CopyProgress { ); } + fn start_table(&self, table: &TableState) { + info!( + self.logger, + "Starting to copy `{}` entities from {} to {}", + table.dst.object, + table.src.qualified_name, + table.dst.qualified_name + ); + } + fn progress_pct(current_vid: i64, target_vid: i64) -> f64 { // When a step is done, current_vid == target_vid + 1; don't report // more than 100% completion @@ -710,6 +720,7 @@ impl CopyTableWorker { } } + progress.start_table(&self.table); let status = { loop { if progress.is_cancelled() { @@ -779,6 +790,8 @@ pub struct Connection { /// `Some(..)`. Most code shouldn't access `self.conn` directly, but use /// `self.transaction` conn: Option, + pool: ConnectionPool, + workers: usize, src: Arc, dst: Arc, target_block: BlockPtr, @@ -826,6 +839,8 @@ impl Connection { Ok(Self { logger, conn, + pool, + workers: 5, src, dst, target_block, @@ -866,18 +881,50 @@ impl Connection { Ok(()) } + /// Create a worker using the connection in `self.conn`. This may return + /// `None` if there are no more tables that need to be copied. It is an + /// error to call this if `self.conn` is `None` fn default_worker( &mut self, state: &mut CopyState, - progress: Arc, - ) -> Option>>> { - let conn = self.conn.take()?; - let table = state.unfinished.pop()?; + progress: &Arc, + ) -> Result>>>, StoreError> { + let conn = self.conn.take().ok_or_else(|| { + constraint_violation!( + "copy connection has been handed to background task but not returned yet" + ) + })?; + let Some(table) = state.unfinished.pop() else { + return Ok(None); + }; + + let worker = CopyTableWorker::new(conn, table); + Ok(Some(Box::pin( + worker.run(self.logger.cheap_clone(), progress.cheap_clone()), + ))) + } + + /// Opportunistically create an extra worker if we have more tables to + /// copy and there are idle fdw connections. If there are no more tables + /// or no idle connections, this will return `None`. + fn extra_worker( + &mut self, + state: &mut CopyState, + progress: &Arc, + ) -> Result>>>, StoreError> { + // It's important that we get the connection before the table since + // we remove the table from the state and could drop it otherwise + let Some(conn) = self.pool.try_get_fdw(&self.logger)? else { + return Ok(None); + }; + let Some(table) = state.unfinished.pop() else { + return Ok(None); + }; let worker = CopyTableWorker::new(conn, table); - Some(Box::pin( + Ok(Some(Box::pin( worker.run(self.logger.cheap_clone(), progress.cheap_clone()), - )) + ))) } pub async fn copy_data_internal( @@ -892,17 +939,34 @@ impl Connection { let progress = Arc::new(CopyProgress::new(self.logger.cheap_clone(), &state)); progress.start(); + // Run as many copy jobs as we can in parallel, up to `self.workers` + // many. We can always start at least one worker because of the + // connection in `self.conn`. If the fdw pool has idle connections + // and there are more tables to be copied, we can start more + // workers, up to `self.workers` many let mut workers = Vec::new(); while !state.unfinished.is_empty() && !workers.is_empty() { - if let Some(worker) = self.default_worker(&mut state, progress.cheap_clone()) { + // We usually add at least one job here, except if we are out of + // tables to copy. In that case, we go through the `while` loop + // every time one of the tables we are currently copying + // finishes + if let Some(worker) = self.default_worker(&mut state, &progress)? { + workers.push(worker); + } + loop { + if workers.len() >= self.workers { + break; + } + let Some(worker) = self.extra_worker(&mut state, &progress)? else { + break; + }; workers.push(worker); } - let (worker, _idx, remaining) = select_all(workers).await; workers = remaining; // Put the connection back into self.conn so that we can use it - // in the next iteration + // in the next iteration. self.conn = Some(worker.conn); state.finished.push(worker.table); @@ -915,6 +979,7 @@ impl Connection { return Ok(Status::Cancelled); } } + debug_assert!(self.conn.is_some()); // Create indexes for all the attributes that were postponed at the start of // the copy/graft operations. From 9d260fb8bfe5ca6715376c1f03bb6a5c08484fe0 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 28 Mar 2025 16:48:52 -0700 Subject: [PATCH 043/160] all: Make number of copy workers configurable --- docs/environment-variables.md | 4 ++++ graph/src/env/store.rs | 12 ++++++++++++ store/postgres/src/copy.rs | 4 ++-- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/docs/environment-variables.md b/docs/environment-variables.md index f174e0e2e54..46903185ccf 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -231,6 +231,10 @@ those. timeout is hit, the batch size is reset to 1 so we can be sure that batches stay below `GRAPH_STORE_BATCH_TARGET_DURATION` and the smaller batch is retried. Value is in seconds and defaults to unlimited. +- `GRAPH_STORE_BATCH_WORKERS`: The number of workers to use for batch + operations. If there are idle connectiosn, each subgraph copy operation + will use up to this many workers to copy tables in parallel. Defaults + to 1 and must be at least 1 - `GRAPH_START_BLOCK`: block hash:block number where the forked subgraph will start indexing at. - `GRAPH_FORK_BASE`: api url for where the graph node will fork from, use `https://api.thegraph.com/subgraphs/id/` for the hosted service. diff --git a/graph/src/env/store.rs b/graph/src/env/store.rs index 4fb30f58079..1bdb3c4d902 100644 --- a/graph/src/env/store.rs +++ b/graph/src/env/store.rs @@ -85,6 +85,12 @@ pub struct EnvVarsStore { /// this. Set by `GRAPH_STORE_BATCH_TIMEOUT`. Unlimited by default pub batch_timeout: Option, + /// The number of workers to use for batch operations. If there are idle + /// connectiosn, each subgraph copy operation will use up to this many + /// workers to copy tables in parallel. Defaults to 1 and must be at + /// least 1 + pub batch_workers: usize, + /// Prune tables where we will remove at least this fraction of entity /// versions by rebuilding the table. Set by /// `GRAPH_STORE_HISTORY_REBUILD_THRESHOLD`. The default is 0.5 @@ -175,6 +181,7 @@ impl TryFrom for EnvVarsStore { write_queue_size: x.write_queue_size, batch_target_duration: Duration::from_secs(x.batch_target_duration_in_secs), batch_timeout: x.batch_timeout_in_secs.map(Duration::from_secs), + batch_workers: x.batch_workers, rebuild_threshold: x.rebuild_threshold.0, delete_threshold: x.delete_threshold.0, history_slack_factor: x.history_slack_factor.0, @@ -194,6 +201,9 @@ impl TryFrom for EnvVarsStore { ); } } + if vars.batch_workers < 1 { + bail!("GRAPH_STORE_BATCH_WORKERS must be at least 1"); + } Ok(vars) } } @@ -239,6 +249,8 @@ pub struct InnerStore { batch_target_duration_in_secs: u64, #[envconfig(from = "GRAPH_STORE_BATCH_TIMEOUT")] batch_timeout_in_secs: Option, + #[envconfig(from = "GRAPH_STORE_BATCH_WORKERS", default = "1")] + batch_workers: usize, #[envconfig(from = "GRAPH_STORE_HISTORY_REBUILD_THRESHOLD", default = "0.5")] rebuild_threshold: ZeroToOneF64, #[envconfig(from = "GRAPH_STORE_HISTORY_DELETE_THRESHOLD", default = "0.05")] diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index c8079adc52e..4dbc312bc9d 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -840,7 +840,7 @@ impl Connection { logger, conn, pool, - workers: 5, + workers: ENV_VARS.store.batch_workers, src, dst, target_block, @@ -945,7 +945,7 @@ impl Connection { // and there are more tables to be copied, we can start more // workers, up to `self.workers` many let mut workers = Vec::new(); - while !state.unfinished.is_empty() && !workers.is_empty() { + while !state.unfinished.is_empty() || !workers.is_empty() { // We usually add at least one job here, except if we are out of // tables to copy. In that case, we go through the `while` loop // every time one of the tables we are currently copying From b3543bbb3d0f65aadb6f8b4d13ed7458ee8bbf0b Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 31 Mar 2025 07:59:23 -0700 Subject: [PATCH 044/160] graph: Fix typo in comment --- graph/src/env/store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graph/src/env/store.rs b/graph/src/env/store.rs index 1bdb3c4d902..ded0be9f144 100644 --- a/graph/src/env/store.rs +++ b/graph/src/env/store.rs @@ -86,7 +86,7 @@ pub struct EnvVarsStore { pub batch_timeout: Option, /// The number of workers to use for batch operations. If there are idle - /// connectiosn, each subgraph copy operation will use up to this many + /// connections, each subgraph copy operation will use up to this many /// workers to copy tables in parallel. Defaults to 1 and must be at /// least 1 pub batch_workers: usize, From 5d75dc6bdca05bb1fc823d7dcbef316a1da549fb Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 31 Mar 2025 18:58:29 -0700 Subject: [PATCH 045/160] store: Log 'starting to copy table' only once --- store/postgres/src/copy.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 4dbc312bc9d..55fed5cde2f 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -694,6 +694,7 @@ impl CopyTableWorker { use Status::*; let conn = &mut self.conn; + progress.start_table(&self.table); while !self.table.finished() { // It is important that this check happens outside the write // transaction so that we do not hold on to locks acquired @@ -720,7 +721,6 @@ impl CopyTableWorker { } } - progress.start_table(&self.table); let status = { loop { if progress.is_cancelled() { From 3369381b47de210270cb4041575fbd64222efacb Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 31 Mar 2025 19:29:12 -0700 Subject: [PATCH 046/160] graph, store: Wait for a little bit for an addl fdw connection If we don't wait at all, we can use fewer connections than are available since the pool might be below its capacity but has no idle connections open currently --- graph/src/env/store.rs | 9 +++++++++ store/postgres/src/connection_pool.rs | 17 ++++++++++++++--- store/postgres/src/copy.rs | 5 ++++- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/graph/src/env/store.rs b/graph/src/env/store.rs index ded0be9f144..661d0356446 100644 --- a/graph/src/env/store.rs +++ b/graph/src/env/store.rs @@ -91,6 +91,12 @@ pub struct EnvVarsStore { /// least 1 pub batch_workers: usize, + /// How long to wait to get an additional connection for a batch worker. + /// This should just be big enough to allow the connection pool to + /// establish a connection. Set by `GRAPH_STORE_BATCH_WORKER_WAIT`. + /// Value is in ms and defaults to 2000ms + pub batch_worker_wait: Duration, + /// Prune tables where we will remove at least this fraction of entity /// versions by rebuilding the table. Set by /// `GRAPH_STORE_HISTORY_REBUILD_THRESHOLD`. The default is 0.5 @@ -182,6 +188,7 @@ impl TryFrom for EnvVarsStore { batch_target_duration: Duration::from_secs(x.batch_target_duration_in_secs), batch_timeout: x.batch_timeout_in_secs.map(Duration::from_secs), batch_workers: x.batch_workers, + batch_worker_wait: Duration::from_millis(x.batch_worker_wait), rebuild_threshold: x.rebuild_threshold.0, delete_threshold: x.delete_threshold.0, history_slack_factor: x.history_slack_factor.0, @@ -251,6 +258,8 @@ pub struct InnerStore { batch_timeout_in_secs: Option, #[envconfig(from = "GRAPH_STORE_BATCH_WORKERS", default = "1")] batch_workers: usize, + #[envconfig(from = "GRAPH_STORE_BATCH_WORKER_WAIT", default = "2000")] + batch_worker_wait: u64, #[envconfig(from = "GRAPH_STORE_HISTORY_REBUILD_THRESHOLD", default = "0.5")] rebuild_threshold: ZeroToOneF64, #[envconfig(from = "GRAPH_STORE_HISTORY_DELETE_THRESHOLD", default = "0.05")] diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index f710fd2316d..782c2a57489 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -529,8 +529,9 @@ impl ConnectionPool { pub fn try_get_fdw( &self, logger: &Logger, + timeout: Duration, ) -> Result>>, StoreError> { - self.get_ready()?.try_get_fdw(logger) + self.get_ready()?.try_get_fdw(logger, timeout) } pub fn connection_detail(&self) -> Result { @@ -1034,12 +1035,22 @@ impl PoolInner { } } - /// Get a connection from the fdw pool if one is available + /// Get a connection from the fdw pool if one is available. We wait for + /// `timeout` for a connection which should be set just big enough to + /// allow establishing a connection pub fn try_get_fdw( &self, logger: &Logger, + timeout: Duration, ) -> Result>>, StoreError> { - Ok(self.fdw_pool(logger)?.try_get()) + // Any error trying to get a connection is treated as "couldn't get + // a connection in time". If there is a serious error with the + // database, e.g., because it's not available, the next database + // operation will run into it and report it. + self.fdw_pool(logger)? + .get_timeout(timeout) + .map(|conn| Some(conn)) + .or_else(|_| Ok(None)) } pub fn connection_detail(&self) -> Result { diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 55fed5cde2f..6e21b019f3d 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -914,7 +914,10 @@ impl Connection { ) -> Result>>>, StoreError> { // It's important that we get the connection before the table since // we remove the table from the state and could drop it otherwise - let Some(conn) = self.pool.try_get_fdw(&self.logger)? else { + let Some(conn) = self + .pool + .try_get_fdw(&self.logger, ENV_VARS.store.batch_worker_wait)? + else { return Ok(None); }; let Some(table) = state.unfinished.pop() else { From 5d7e4cebc46488e7b8faa3d92f33545d3c707e7b Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 31 Mar 2025 19:32:48 -0700 Subject: [PATCH 047/160] store: Sort unfinished tables in CopyState That makes the order in which tables are copied predictable --- store/postgres/src/copy.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 6e21b019f3d..25792bba04e 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -195,7 +195,9 @@ impl CopyState { target_block: BlockPtr, ) -> Result { let tables = TableState::load(conn, src.as_ref(), dst.as_ref())?; - let (finished, unfinished) = tables.into_iter().partition(|table| table.finished()); + let (finished, mut unfinished): (Vec<_>, Vec<_>) = + tables.into_iter().partition(|table| table.finished()); + unfinished.sort_by_key(|table| table.dst.object.to_string()); Ok(CopyState { src, dst, From 36ad6a24a2456fa5504f14b20dd59c0cffae2b40 Mon Sep 17 00:00:00 2001 From: Filipe Azevedo Date: Tue, 1 Apr 2025 20:13:47 +0100 Subject: [PATCH 048/160] fix firehose tls (#5923) --- Cargo.toml | 9 ++++++++- graph/src/firehose/endpoints.rs | 11 ++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a8193c3f0ed..b938992bc30 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,7 +47,14 @@ chrono = "0.4.38" bs58 = "0.5.1" clap = { version = "4.5.4", features = ["derive", "env"] } derivative = "2.2.0" -diesel = { version = "2.2.7", features = ["postgres", "serde_json", "numeric", "r2d2", "chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes"] } +diesel = { version = "2.2.7", features = [ + "postgres", + "serde_json", + "numeric", + "r2d2", + "chrono", + "i-implement-a-third-party-backend-and-opt-into-breaking-changes", +] } diesel-derive-enum = { version = "2.1.0", features = ["postgres"] } diesel-dynamic-schema = { version = "0.2.3", features = ["postgres"] } diesel_derives = "2.2.3" diff --git a/graph/src/firehose/endpoints.rs b/graph/src/firehose/endpoints.rs index 0ec95c3e2c5..448eb845496 100644 --- a/graph/src/firehose/endpoints.rs +++ b/graph/src/firehose/endpoints.rs @@ -194,9 +194,14 @@ impl FirehoseEndpoint { let endpoint_builder = match uri.scheme().unwrap_or(&Scheme::HTTP).as_str() { "http" => Channel::builder(uri), - "https" => Channel::builder(uri) - .tls_config(ClientTlsConfig::new()) - .expect("TLS config on this host is invalid"), + "https" => { + let mut tls = ClientTlsConfig::new(); + tls = tls.with_native_roots(); + + Channel::builder(uri) + .tls_config(tls) + .expect("TLS config on this host is invalid") + } _ => panic!("invalid uri scheme for firehose endpoint"), }; From 7478019bc49561267b0c567caea76e1213f1f1a0 Mon Sep 17 00:00:00 2001 From: Zoran Cvetkov <36600146+zorancv@users.noreply.github.com> Date: Wed, 2 Apr 2025 19:48:04 +0300 Subject: [PATCH 049/160] Hash legacy hash value when grafting from pre 0.0.6 spec_version --- Cargo.lock | 1 + graph/Cargo.toml | 1 + .../subgraph/proof_of_indexing/online.rs | 14 +++++++++++--- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a7ea378f9e3..33815c70807 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1872,6 +1872,7 @@ dependencies = [ "serde_plain", "serde_regex", "serde_yaml", + "sha2", "slog", "slog-async", "slog-envlogger", diff --git a/graph/Cargo.toml b/graph/Cargo.toml index 163838f5d00..dc4bd6e42e9 100644 --- a/graph/Cargo.toml +++ b/graph/Cargo.toml @@ -48,6 +48,7 @@ serde_derive = { workspace = true } serde_json = { workspace = true } serde_regex = { workspace = true } serde_yaml = { workspace = true } +sha2 = "0.10.8" slog = { version = "2.7.0", features = [ "release_max_level_trace", "max_level_trace", diff --git a/graph/src/components/subgraph/proof_of_indexing/online.rs b/graph/src/components/subgraph/proof_of_indexing/online.rs index d47f08b0a8f..ebf7a65e2f9 100644 --- a/graph/src/components/subgraph/proof_of_indexing/online.rs +++ b/graph/src/components/subgraph/proof_of_indexing/online.rs @@ -9,6 +9,7 @@ use crate::{ prelude::{debug, BlockNumber, DeploymentHash, Logger, ENV_VARS}, util::stable_hash_glue::AsBytes, }; +use sha2::{Digest, Sha256}; use stable_hash::{fast::FastStableHasher, FieldAddress, StableHash, StableHasher}; use stable_hash_legacy::crypto::{Blake3SeqNo, SetHasher}; use stable_hash_legacy::prelude::{ @@ -31,6 +32,8 @@ enum Hashers { Legacy(SetHasher), } +const STABLE_HASH_LEN: usize = 32; + impl Hashers { fn new(version: ProofOfIndexingVersion) -> Self { match version { @@ -132,9 +135,14 @@ impl BlockEventStream { } Hashers::Fast(mut digest) => { if let Some(prev) = prev { - let prev = prev - .try_into() - .expect("Expected valid fast stable hash representation"); + let prev = if prev.len() == STABLE_HASH_LEN { + prev.try_into() + .expect("Expected valid fast stable hash representation") + } else { + let mut hasher = Sha256::new(); + hasher.update(prev); + hasher.finalize().into() + }; let prev = FastStableHasher::from_bytes(prev); digest.mixin(&prev); } From e51dd2478699138fd9538b5d8e5a20c1be43b62d Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 1 Apr 2025 09:49:18 -0700 Subject: [PATCH 050/160] store: Address corner case in VidBatcher --- store/postgres/src/vid_batcher.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/store/postgres/src/vid_batcher.rs b/store/postgres/src/vid_batcher.rs index ef5948efd06..b8bbaa91113 100644 --- a/store/postgres/src/vid_batcher.rs +++ b/store/postgres/src/vid_batcher.rs @@ -258,7 +258,10 @@ impl VidRange { } pub fn is_empty(&self) -> bool { - self.max == -1 + // min > max can happen when we restart a copy job that has finished + // some tables. For those, min (the next_vid) will be larger than + // max (the target_vid) + self.max == -1 || self.min > self.max } pub fn size(&self) -> usize { From 197d3d1150a44b2a40a2ab4073d3dd526d6a276b Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 1 Apr 2025 09:57:21 -0700 Subject: [PATCH 051/160] store: Never set start of VidBatcher beyond max_vid+1 --- store/postgres/src/vid_batcher.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/store/postgres/src/vid_batcher.rs b/store/postgres/src/vid_batcher.rs index b8bbaa91113..78abf84529e 100644 --- a/store/postgres/src/vid_batcher.rs +++ b/store/postgres/src/vid_batcher.rs @@ -226,7 +226,8 @@ impl VidBatcher { let duration = self.step_timer.elapsed(); let batch_size = self.batch_size.adapt(duration); - self.start = self.end + 1; + // We can't possibly copy farther than `max_vid` + self.start = (self.end + 1).min(self.max_vid + 1); self.end = ogive.next_point(self.start, batch_size as usize)?; Ok((duration, Some(res))) From a9844a6c5c5909fc293f07390ffe6d81a114d08c Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 1 Apr 2025 10:28:13 -0700 Subject: [PATCH 052/160] store: Do not mark database unavailable for failed try_get_fdw --- store/postgres/src/connection_pool.rs | 40 ++++++++++++++++++++++----- store/postgres/src/copy.rs | 8 +++--- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index 782c2a57489..31dfdfd8eb2 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -347,12 +347,14 @@ impl PoolName { #[derive(Clone)] struct PoolStateTracker { available: Arc, + ignore_timeout: Arc, } impl PoolStateTracker { fn new() -> Self { Self { available: Arc::new(AtomicBool::new(true)), + ignore_timeout: Arc::new(AtomicBool::new(false)), } } @@ -367,6 +369,20 @@ impl PoolStateTracker { fn is_available(&self) -> bool { self.available.load(Ordering::Relaxed) } + + fn timeout_is_ignored(&self) -> bool { + self.ignore_timeout.load(Ordering::Relaxed) + } + + fn ignore_timeout(&self, f: F) -> R + where + F: FnOnce() -> R, + { + self.ignore_timeout.store(true, Ordering::Relaxed); + let res = f(); + self.ignore_timeout.store(false, Ordering::Relaxed); + res + } } impl ConnectionPool { @@ -530,8 +546,12 @@ impl ConnectionPool { &self, logger: &Logger, timeout: Duration, - ) -> Result>>, StoreError> { - self.get_ready()?.try_get_fdw(logger, timeout) + ) -> Option>> { + let Ok(inner) = self.get_ready() else { + return None; + }; + self.state_tracker + .ignore_timeout(|| inner.try_get_fdw(logger, timeout)) } pub fn connection_detail(&self) -> Result { @@ -740,6 +760,9 @@ impl HandleEvent for EventHandler { } fn handle_timeout(&self, event: e::TimeoutEvent) { + if self.state_tracker.timeout_is_ignored() { + return; + } self.add_conn_wait_time(event.timeout()); if self.state_tracker.is_available() { error!(self.logger, "Connection checkout timed out"; @@ -1042,15 +1065,18 @@ impl PoolInner { &self, logger: &Logger, timeout: Duration, - ) -> Result>>, StoreError> { + ) -> Option>> { // Any error trying to get a connection is treated as "couldn't get // a connection in time". If there is a serious error with the // database, e.g., because it's not available, the next database // operation will run into it and report it. - self.fdw_pool(logger)? - .get_timeout(timeout) - .map(|conn| Some(conn)) - .or_else(|_| Ok(None)) + let Ok(fdw_pool) = self.fdw_pool(logger) else { + return None; + }; + let Ok(conn) = fdw_pool.get_timeout(timeout) else { + return None; + }; + Some(conn) } pub fn connection_detail(&self) -> Result { diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 25792bba04e..6438f641eaa 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -125,7 +125,7 @@ pub fn is_source(conn: &mut PgConnection, site: &Site) -> Result Result>>>, StoreError> { let conn = self.conn.take().ok_or_else(|| { constraint_violation!( - "copy connection has been handed to background task but not returned yet" + "copy connection has been handed to background task but not returned yet (default worker)" ) })?; let Some(table) = state.unfinished.pop() else { @@ -918,7 +918,7 @@ impl Connection { // we remove the table from the state and could drop it otherwise let Some(conn) = self .pool - .try_get_fdw(&self.logger, ENV_VARS.store.batch_worker_wait)? + .try_get_fdw(&self.logger, ENV_VARS.store.batch_worker_wait) else { return Ok(None); }; From d2ef8f408bc778c20ca0d43120ad6fbc36e213a2 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 1 Apr 2025 10:50:10 -0700 Subject: [PATCH 053/160] store: Be more careful about exiting copy_data_internal early We need to be absolutely sure that when `copy_data_internal` is done, we have a connection in `self.conn` and therefore want to make it clear when we might exit early with an error --- store/postgres/src/copy.rs | 55 +++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 6438f641eaa..0307e10b0f5 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -890,20 +890,18 @@ impl Connection { &mut self, state: &mut CopyState, progress: &Arc, - ) -> Result>>>, StoreError> { - let conn = self.conn.take().ok_or_else(|| { - constraint_violation!( - "copy connection has been handed to background task but not returned yet (default worker)" - ) - })?; + ) -> Option>>> { + let Some(conn) = self.conn.take() else { + return None; + }; let Some(table) = state.unfinished.pop() else { - return Ok(None); + return None; }; let worker = CopyTableWorker::new(conn, table); - Ok(Some(Box::pin( + Some(Box::pin( worker.run(self.logger.cheap_clone(), progress.cheap_clone()), - ))) + )) } /// Opportunistically create an extra worker if we have more tables to @@ -913,23 +911,44 @@ impl Connection { &mut self, state: &mut CopyState, progress: &Arc, - ) -> Result>>>, StoreError> { + ) -> Option>>> { // It's important that we get the connection before the table since // we remove the table from the state and could drop it otherwise let Some(conn) = self .pool .try_get_fdw(&self.logger, ENV_VARS.store.batch_worker_wait) else { - return Ok(None); + return None; }; let Some(table) = state.unfinished.pop() else { - return Ok(None); + return None; }; let worker = CopyTableWorker::new(conn, table); - Ok(Some(Box::pin( + Some(Box::pin( worker.run(self.logger.cheap_clone(), progress.cheap_clone()), - ))) + )) + } + + /// Check that we can make progress, i.e., that we have at least one + /// worker that copies as long as there are unfinished tables. This is a + /// safety check to guard against `copy_data_internal` looping forever + /// because of some internal inconsistency + fn assert_progress(&self, num_workers: usize, state: &CopyState) -> Result<(), StoreError> { + if num_workers == 0 && !state.unfinished.is_empty() { + // Something bad happened. We should have at least one + // worker if there are still tables to copy + if self.conn.is_none() { + return Err(constraint_violation!( + "copy connection has been handed to background task but not returned yet (copy_data_internal)" + )); + } else { + return Err(constraint_violation!( + "no workers left but still tables to copy" + )); + } + } + Ok(()) } pub async fn copy_data_internal( @@ -949,24 +968,28 @@ impl Connection { // connection in `self.conn`. If the fdw pool has idle connections // and there are more tables to be copied, we can start more // workers, up to `self.workers` many + // + // The loop has to be very careful about terminating early so that + // we do not ever leave the loop with `self.conn == None` let mut workers = Vec::new(); while !state.unfinished.is_empty() || !workers.is_empty() { // We usually add at least one job here, except if we are out of // tables to copy. In that case, we go through the `while` loop // every time one of the tables we are currently copying // finishes - if let Some(worker) = self.default_worker(&mut state, &progress)? { + if let Some(worker) = self.default_worker(&mut state, &progress) { workers.push(worker); } loop { if workers.len() >= self.workers { break; } - let Some(worker) = self.extra_worker(&mut state, &progress)? else { + let Some(worker) = self.extra_worker(&mut state, &progress) else { break; }; workers.push(worker); } + self.assert_progress(workers.len(), &state)?; let (worker, _idx, remaining) = select_all(workers).await; workers = remaining; From 9af403e0f808ca4d838163e96e2b52e8af4999fe Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 1 Apr 2025 11:27:46 -0700 Subject: [PATCH 054/160] store: Spawn copy workers on the blocking pool Otherwise, they don't really run in parallel --- store/postgres/src/copy.rs | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 0307e10b0f5..e2865fc6863 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -687,9 +687,18 @@ impl CopyTableWorker { } } - async fn run(mut self, logger: Logger, progress: Arc) -> Self { - self.result = self.run_inner(logger, &progress); - self + async fn run( + mut self, + logger: Logger, + progress: Arc, + ) -> Result { + let object = self.table.dst.object.cheap_clone(); + graph::spawn_blocking_allow_panic(move || { + self.result = self.run_inner(logger, &progress); + self + }) + .await + .map_err(|e| constraint_violation!("copy worker for {} panicked: {}", object, e)) } fn run_inner(&mut self, logger: Logger, progress: &CopyProgress) -> Result { @@ -890,7 +899,7 @@ impl Connection { &mut self, state: &mut CopyState, progress: &Arc, - ) -> Option>>> { + ) -> Option>>>> { let Some(conn) = self.conn.take() else { return None; }; @@ -911,7 +920,7 @@ impl Connection { &mut self, state: &mut CopyState, progress: &Arc, - ) -> Option>>> { + ) -> Option>>>> { // It's important that we get the connection before the table since // we remove the table from the state and could drop it otherwise let Some(conn) = self @@ -990,7 +999,18 @@ impl Connection { workers.push(worker); } self.assert_progress(workers.len(), &state)?; - let (worker, _idx, remaining) = select_all(workers).await; + let (result, _idx, remaining) = select_all(workers).await; + + let worker = match result { + Ok(worker) => worker, + Err(e) => { + // This is a panic in the background task. We need to + // cancel all other tasks and return the error + progress.cancel(); + return Err(e); + } + }; + workers = remaining; // Put the connection back into self.conn so that we can use it From 5b8ba9dab37eadf4ae5d14ee8defde7c8c69c222 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 1 Apr 2025 13:52:46 -0700 Subject: [PATCH 055/160] store: Make sure we release the copy lock even if a worker panics --- store/postgres/src/copy.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index e2865fc6863..3288e6c9c56 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -1104,10 +1104,23 @@ impl Connection { &self.logger, "Obtaining copy lock (this might take a long time if another process is still copying)" ); + let dst_site = self.dst.site.cheap_clone(); self.transaction(|conn| advisory_lock::lock_copying(conn, &dst_site))?; + let res = self.copy_data_internal(index_list).await; + + if self.conn.is_none() { + // A background worker panicked and left us without our + // dedicated connection, but we still need to release the copy + // lock; get a normal connection, not from the fdw pool for that + // as that will be much less contended. We won't be holding on + // to the connection for long as `res` will be an error and we + // will abort starting this subgraph + self.conn = Some(self.pool.get()?); + } self.transaction(|conn| advisory_lock::unlock_copying(conn, &dst_site))?; + if matches!(res, Ok(Status::Cancelled)) { warn!(&self.logger, "Copying was cancelled and is incomplete"); } From 93d78620375c3299806a6814b29f102d79ae42af Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 1 Apr 2025 18:07:33 -0700 Subject: [PATCH 056/160] store: Better error when a copy batch times out --- store/postgres/src/copy.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 3288e6c9c56..fbf74d251cf 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -748,10 +748,19 @@ impl CopyTableWorker { break status; } Err(StoreError::StatementTimeout) => { + let timeout = ENV_VARS + .store + .batch_timeout + .map(|t| t.as_secs().to_string()) + .unwrap_or_else(|| "unlimted".to_string()); warn!( - logger, - "Current batch took longer than GRAPH_STORE_BATCH_TIMEOUT seconds. Retrying with a smaller batch size." - ); + logger, + "Current batch timed out. Retrying with a smaller batch size."; + "timeout_s" => timeout, + "table" => self.table.dst.qualified_name.as_str(), + "current_vid" => self.table.batcher.next_vid(), + "current_batch_size" => self.table.batcher.batch_size(), + ); } Err(e) => { return Err(e); From b04c3769a13ff9d2a4313d87f72b91d1c2ca0233 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 1 Apr 2025 18:10:49 -0700 Subject: [PATCH 057/160] store: Reset the end of VidBatcher when changing the size --- store/postgres/src/vid_batcher.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/store/postgres/src/vid_batcher.rs b/store/postgres/src/vid_batcher.rs index 78abf84529e..93197b5a85d 100644 --- a/store/postgres/src/vid_batcher.rs +++ b/store/postgres/src/vid_batcher.rs @@ -237,6 +237,10 @@ impl VidBatcher { pub(crate) fn set_batch_size(&mut self, size: usize) { self.batch_size.size = size as i64; + self.end = match &self.ogive { + Some(ogive) => ogive.next_point(self.start, size as usize).unwrap(), + None => self.start + size as i64, + }; } } From 02a96c2ee790bda9d3bb31ed7a2db777f82b7895 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 1 Apr 2025 20:54:22 -0700 Subject: [PATCH 058/160] graph: Reduce default fdw_fetch_size to 1000 10,000 seems too big and actually slows things down --- graph/src/env/store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graph/src/env/store.rs b/graph/src/env/store.rs index 661d0356446..3ecf92e0388 100644 --- a/graph/src/env/store.rs +++ b/graph/src/env/store.rs @@ -280,7 +280,7 @@ pub struct InnerStore { last_rollup_from_poi: bool, #[envconfig(from = "GRAPH_STORE_INSERT_EXTRA_COLS", default = "0")] insert_extra_cols: usize, - #[envconfig(from = "GRAPH_STORE_FDW_FETCH_SIZE", default = "10000")] + #[envconfig(from = "GRAPH_STORE_FDW_FETCH_SIZE", default = "1000")] fdw_fetch_size: usize, } From bbb7478477277a6102483799596efc8cbcbdac81 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 2 Apr 2025 11:37:29 -0700 Subject: [PATCH 059/160] store: If copying encounters an error, wait for all workers to finish --- store/postgres/src/copy.rs | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index fbf74d251cf..0d9c27cf6b7 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -37,6 +37,7 @@ use graph::{ info, lazy_static, o, warn, BlockNumber, BlockPtr, CheapClone, Logger, StoreError, ENV_VARS, }, schema::EntityType, + slog::error, }; use itertools::Itertools; @@ -969,6 +970,33 @@ impl Connection { Ok(()) } + /// Wait for all workers to finish. This is called when we a worker has + /// failed with an error that forces us to abort copying + async fn cancel_workers( + &mut self, + progress: Arc, + mut workers: Vec>>>>, + ) { + progress.cancel(); + error!( + self.logger, + "copying encountered an error; waiting for all workers to finish" + ); + while !workers.is_empty() { + let (result, _, remaining) = select_all(workers).await; + workers = remaining; + match result { + Ok(worker) => { + self.conn = Some(worker.conn); + } + Err(e) => { + /* Ignore; we had an error previously */ + error!(self.logger, "copy worker panicked: {}", e); + } + } + } + } + pub async fn copy_data_internal( &mut self, index_list: IndexList, @@ -1009,30 +1037,30 @@ impl Connection { } self.assert_progress(workers.len(), &state)?; let (result, _idx, remaining) = select_all(workers).await; + workers = remaining; let worker = match result { Ok(worker) => worker, Err(e) => { // This is a panic in the background task. We need to // cancel all other tasks and return the error - progress.cancel(); + self.cancel_workers(progress, workers).await; return Err(e); } }; - workers = remaining; - // Put the connection back into self.conn so that we can use it // in the next iteration. self.conn = Some(worker.conn); state.finished.push(worker.table); if worker.result.is_err() { - progress.cancel(); + self.cancel_workers(progress, workers).await; return worker.result; } if progress.is_cancelled() { + self.cancel_workers(progress, workers).await; return Ok(Status::Cancelled); } } From f56900ed79253ea9cb97a992010112975c5829b1 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 2 Apr 2025 11:45:25 -0700 Subject: [PATCH 060/160] store: Clarify the logic of the control loop in copy_data_internal --- store/postgres/src/copy.rs | 48 ++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 0d9c27cf6b7..b5db91d40da 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -1035,34 +1035,48 @@ impl Connection { }; workers.push(worker); } + self.assert_progress(workers.len(), &state)?; let (result, _idx, remaining) = select_all(workers).await; workers = remaining; - let worker = match result { - Ok(worker) => worker, + // Analyze `result` and take another trip through the loop if + // everything is ok; wait for pending workers and return if + // there was an error or if copying was cancelled. + match result { Err(e) => { // This is a panic in the background task. We need to // cancel all other tasks and return the error self.cancel_workers(progress, workers).await; return Err(e); } - }; - - // Put the connection back into self.conn so that we can use it - // in the next iteration. - self.conn = Some(worker.conn); - state.finished.push(worker.table); - - if worker.result.is_err() { - self.cancel_workers(progress, workers).await; - return worker.result; - } + Ok(worker) => { + // Put the connection back into self.conn so that we can use it + // in the next iteration. + self.conn = Some(worker.conn); - if progress.is_cancelled() { - self.cancel_workers(progress, workers).await; - return Ok(Status::Cancelled); - } + match (worker.result, progress.is_cancelled()) { + (Ok(Status::Finished), false) => { + // The worker finished successfully, and nothing was + // cancelled; take another trip through the loop + state.finished.push(worker.table); + } + (Ok(Status::Finished), true) => { + state.finished.push(worker.table); + self.cancel_workers(progress, workers).await; + return Ok(Status::Cancelled); + } + (Ok(Status::Cancelled), _) => { + self.cancel_workers(progress, workers).await; + return Ok(Status::Cancelled); + } + (Err(e), _) => { + self.cancel_workers(progress, workers).await; + return Err(e); + } + } + } + }; } debug_assert!(self.conn.is_some()); From 843278aa70991063c25c63378f31bcc64c433754 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 2 Apr 2025 12:15:34 -0700 Subject: [PATCH 061/160] store: Take ownership of self in CopyConnection.copy_data This ensures that `copy_data` can't be called more than once on any instance; when copying encounters an error, it might leave the CopyConnection in an inconsistent state and should therefore not be reused Also make `copy_data_internal` private; it should never be called from the outside --- store/postgres/src/copy.rs | 7 ++----- store/postgres/src/deployment_store.rs | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index b5db91d40da..9e9ba187c6a 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -997,10 +997,7 @@ impl Connection { } } - pub async fn copy_data_internal( - &mut self, - index_list: IndexList, - ) -> Result { + async fn copy_data_internal(&mut self, index_list: IndexList) -> Result { let src = self.src.clone(); let dst = self.dst.clone(); let target_block = self.target_block.clone(); @@ -1142,7 +1139,7 @@ impl Connection { /// lower(v1.block_range) => v2.vid > v1.vid` and we can therefore stop /// the copying of each table as soon as we hit `max_vid = max { v.vid | /// lower(v.block_range) <= target_block.number }`. - pub async fn copy_data(&mut self, index_list: IndexList) -> Result { + pub async fn copy_data(mut self, index_list: IndexList) -> Result { // We require sole access to the destination site, and that we get a // consistent view of what has been copied so far. In general, that // is always true. It can happen though that this function runs when diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index 248ba5a5473..96dd5507f3e 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -1519,7 +1519,7 @@ impl DeploymentStore { // as adding new tables in `self`; we only need to check that tables // that actually need to be copied from the source are compatible // with the corresponding tables in `self` - let mut copy_conn = crate::copy::Connection::new( + let copy_conn = crate::copy::Connection::new( logger, self.pool.clone(), src.clone(), From 9a13debfc56edd63602295609baf93b6f7c2572a Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 3 Apr 2025 11:58:44 -0700 Subject: [PATCH 062/160] store: Check that we map all the tables that MirrorJob needs --- store/postgres/src/connection_pool.rs | 30 ++++++++++++++++++++++++- store/postgres/src/primary.rs | 32 ++++++++++++++------------- 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index 31dfdfd8eb2..ace5cddd719 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -33,7 +33,7 @@ use std::{collections::HashMap, sync::RwLock}; use postgres::config::{Config, Host}; -use crate::primary::{self, NAMESPACE_PUBLIC}; +use crate::primary::{self, Mirror, NAMESPACE_PUBLIC}; use crate::{advisory_lock, catalog}; use crate::{Shard, PRIMARY_SHARD}; @@ -54,10 +54,36 @@ const SHARDED_TABLES: [(&str, &[&str]); 2] = [ "subgraph_error", "subgraph_manifest", "table_stats", + "subgraph", + "subgraph_version", + "subgraph_deployment_assignment", ], ), ]; +/// Make sure that the tables that `jobs::MirrorJob` wants to mirror are +/// actually mapped into the various shards. A failure here is simply a +/// coding mistake +fn check_mirrored_tables() { + for table in Mirror::PUBLIC_TABLES { + if !PRIMARY_TABLES.contains(&table) { + panic!("table {} is not in PRIMARY_TABLES", table); + } + } + + let subgraphs_tables = *SHARDED_TABLES + .iter() + .find(|(nsp, _)| *nsp == "subgraphs") + .map(|(_, tables)| tables) + .unwrap(); + + for table in Mirror::SUBGRAPHS_TABLES { + if !subgraphs_tables.contains(&table) { + panic!("table {} is not in SHARDED_TABLES[subgraphs]", table); + } + } +} + pub struct ForeignServer { pub name: String, pub shard: Shard, @@ -817,6 +843,8 @@ impl PoolInner { registry: Arc, state_tracker: PoolStateTracker, ) -> PoolInner { + check_mirrored_tables(); + let logger_store = logger.new(o!("component" => "Store")); let logger_pool = logger.new(o!("component" => "ConnectionPool")); let const_labels = { diff --git a/store/postgres/src/primary.rs b/store/postgres/src/primary.rs index ab6be9ee0ba..5ec81dcbd61 100644 --- a/store/postgres/src/primary.rs +++ b/store/postgres/src/primary.rs @@ -1839,6 +1839,20 @@ pub struct Mirror { } impl Mirror { + // The tables that we mirror + // + // `chains` needs to be mirrored before `deployment_schemas` because + // of the fk constraint on `deployment_schemas.network`. We don't + // care much about mirroring `active_copies` but it has a fk + // constraint on `deployment_schemas` and is tiny, therefore it's + // easiest to just mirror it + pub(crate) const PUBLIC_TABLES: [&str; 3] = ["chains", "deployment_schemas", "active_copies"]; + pub(crate) const SUBGRAPHS_TABLES: [&str; 3] = [ + "subgraph_deployment_assignment", + "subgraph", + "subgraph_version", + ]; + pub fn new(pools: &HashMap) -> Mirror { let primary = pools .get(&PRIMARY_SHARD) @@ -1895,18 +1909,6 @@ impl Mirror { conn: &mut PgConnection, handle: &CancelHandle, ) -> Result<(), StoreError> { - // `chains` needs to be mirrored before `deployment_schemas` because - // of the fk constraint on `deployment_schemas.network`. We don't - // care much about mirroring `active_copies` but it has a fk - // constraint on `deployment_schemas` and is tiny, therefore it's - // easiest to just mirror it - const PUBLIC_TABLES: [&str; 3] = ["chains", "deployment_schemas", "active_copies"]; - const SUBGRAPHS_TABLES: [&str; 3] = [ - "subgraph_deployment_assignment", - "subgraph", - "subgraph_version", - ]; - fn run_query(conn: &mut PgConnection, query: String) -> Result<(), StoreError> { conn.batch_execute(&query).map_err(StoreError::from) } @@ -1938,11 +1940,11 @@ impl Mirror { // Truncate all tables at once, otherwise truncation can fail // because of foreign key constraints - let tables = PUBLIC_TABLES + let tables = Self::PUBLIC_TABLES .iter() .map(|name| (NAMESPACE_PUBLIC, name)) .chain( - SUBGRAPHS_TABLES + Self::SUBGRAPHS_TABLES .iter() .map(|name| (NAMESPACE_SUBGRAPHS, name)), ) @@ -1953,7 +1955,7 @@ impl Mirror { check_cancel()?; // Repopulate `PUBLIC_TABLES` by copying their data wholesale - for table_name in PUBLIC_TABLES { + for table_name in Self::PUBLIC_TABLES { copy_table( conn, ForeignServer::PRIMARY_PUBLIC, From c765ce7a09dd93249a8541503f7c55f0a686dfc6 Mon Sep 17 00:00:00 2001 From: Krishnanand V P <44740264+incrypto32@users.noreply.github.com> Date: Mon, 7 Apr 2025 16:08:40 +0400 Subject: [PATCH 063/160] graphman: disable load management when using graphman (#5875) --- node/src/bin/manager.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/node/src/bin/manager.rs b/node/src/bin/manager.rs index 20cf93d94df..5142a2ab939 100644 --- a/node/src/bin/manager.rs +++ b/node/src/bin/manager.rs @@ -34,6 +34,7 @@ use graph_store_postgres::{ }; use itertools::Itertools; use lazy_static::lazy_static; +use std::env; use std::str::FromStr; use std::{collections::HashMap, num::ParseIntError, sync::Arc, time::Duration}; const VERSION_LABEL_KEY: &str = "version"; @@ -1030,6 +1031,9 @@ impl Context { #[tokio::main] async fn main() -> anyhow::Result<()> { + // Disable load management for graphman commands + env::set_var("GRAPH_LOAD_THRESHOLD", "0"); + let opt = Opt::parse(); Terminal::set_color_preference(&opt.color); From 8ddd068480c4754a86c6e73857204dffe1bb0232 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 4 Apr 2025 10:21:48 -0700 Subject: [PATCH 064/160] graph: Make metrics registration less noisy --- graph/src/components/metrics/registry.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/graph/src/components/metrics/registry.rs b/graph/src/components/metrics/registry.rs index e010d3a89fa..93cf51b3bd1 100644 --- a/graph/src/components/metrics/registry.rs +++ b/graph/src/components/metrics/registry.rs @@ -3,7 +3,7 @@ use std::sync::{Arc, RwLock}; use prometheus::IntGauge; use prometheus::{labels, Histogram, IntCounterVec}; -use slog::info; +use slog::debug; use crate::components::metrics::{counter_with_labels, gauge_with_labels}; use crate::prelude::Collector; @@ -133,7 +133,7 @@ impl MetricsRegistry { let mut result = self.registry.register(collector.clone()); if matches!(result, Err(PrometheusError::AlreadyReg)) { - info!(logger, "Resolving duplicate metric registration"); + debug!(logger, "Resolving duplicate metric registration"); // Since the current metric is a duplicate, // we can use it to unregister the previous registration. @@ -144,7 +144,6 @@ impl MetricsRegistry { match result { Ok(()) => { - info!(logger, "Successfully registered a new metric"); self.registered_metrics.inc(); } Err(err) => { From 13cce5330b19dc9488458342fb3c8a45c3dcb37f Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 3 Apr 2025 09:53:41 -0700 Subject: [PATCH 065/160] store: Allow creating special Namespaces --- store/postgres/src/primary.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/store/postgres/src/primary.rs b/store/postgres/src/primary.rs index 5ec81dcbd61..f329ae4bba2 100644 --- a/store/postgres/src/primary.rs +++ b/store/postgres/src/primary.rs @@ -266,6 +266,13 @@ impl Namespace { Namespace(format!("prune{id}")) } + /// A namespace that is not a deployment namespace. This is used for + /// special namespaces we use. No checking is done on `s` and the caller + /// must ensure it's a valid namespace name + pub fn special(s: impl Into) -> Self { + Namespace(s.into()) + } + pub fn as_str(&self) -> &str { &self.0 } From 3eac30c4e594875b8c136ec4cf27fab0faf5c0e9 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 3 Apr 2025 14:45:41 -0700 Subject: [PATCH 066/160] store: Factor the locale check into a method --- store/postgres/src/connection_pool.rs | 35 +++++++++++++++++---------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index ace5cddd719..05868815c02 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -1169,24 +1169,33 @@ impl PoolInner { .and_then(|()| pool.create_cross_shard_views(coord.servers.as_ref())); result.unwrap_or_else(|err| die(&pool.logger, "migrations failed", &err)); - // Locale check - if let Err(msg) = catalog::Locale::load(&mut conn)?.suitable() { - if &self.shard == &*PRIMARY_SHARD && primary::is_empty(&mut conn)? { - die( - &pool.logger, - "Database does not use C locale. \ - Please check the graph-node documentation for how to set up the database locale", - &msg, - ); - } else { - warn!(pool.logger, "{}.\nPlease check the graph-node documentation for how to set up the database locale", msg); - } - } + self.locale_check(&pool.logger, conn)?; debug!(&pool.logger, "Setup finished"; "setup_time_s" => start.elapsed().as_secs()); Ok(()) } + fn locale_check( + &self, + logger: &Logger, + mut conn: PooledConnection>, + ) -> Result<(), StoreError> { + Ok( + if let Err(msg) = catalog::Locale::load(&mut conn)?.suitable() { + if &self.shard == &*PRIMARY_SHARD && primary::is_empty(&mut conn)? { + const MSG: &str = + "Database does not use C locale. \ + Please check the graph-node documentation for how to set up the database locale"; + + crit!(logger, "{}: {}", MSG, msg); + panic!("{}: {}", MSG, msg); + } else { + warn!(logger, "{}.\nPlease check the graph-node documentation for how to set up the database locale", msg); + } + }, + ) + } + pub(crate) async fn query_permit(&self) -> tokio::sync::OwnedSemaphorePermit { let start = Instant::now(); let permit = self.query_semaphore.cheap_clone().acquire_owned().await; From 034cba57a833fd4db619f5ca7120cd5d4e957e78 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 4 Apr 2025 10:22:18 -0700 Subject: [PATCH 067/160] graph: Allow returning values from task_spawn --- graph/src/task_spawn.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/graph/src/task_spawn.rs b/graph/src/task_spawn.rs index 09055ad5381..dd1477bb1c8 100644 --- a/graph/src/task_spawn.rs +++ b/graph/src/task_spawn.rs @@ -57,10 +57,11 @@ pub fn block_on(f: impl Future03) -> T { } /// Spawns a thread with access to the tokio runtime. Panics if the thread cannot be spawned. -pub fn spawn_thread( - name: impl Into, - f: impl 'static + FnOnce() + Send, -) -> std::thread::JoinHandle<()> { +pub fn spawn_thread(name: impl Into, f: F) -> std::thread::JoinHandle +where + F: 'static + FnOnce() -> R + Send, + R: 'static + Send, +{ let conf = std::thread::Builder::new().name(name.into()); let runtime = tokio::runtime::Handle::current(); conf.spawn(move || { From cbbd4e1680ec8e5089df6234dc8364400ad2ff31 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 4 Apr 2025 10:37:06 -0700 Subject: [PATCH 068/160] store: Remove dead code from connection_pool --- store/postgres/src/connection_pool.rs | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index 05868815c02..7b4bc1bafb6 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -580,16 +580,6 @@ impl ConnectionPool { .ignore_timeout(|| inner.try_get_fdw(logger, timeout)) } - pub fn connection_detail(&self) -> Result { - let pool = self.get_ready()?; - ForeignServer::new(pool.shard.clone(), &pool.postgres_url).map_err(|e| e.into()) - } - - /// Check that we can connect to the database - pub fn check(&self) -> bool { - true - } - /// Setup the database for this pool. This includes configuring foreign /// data wrappers for cross-shard communication, and running any pending /// schema migrations for this database. @@ -1027,20 +1017,6 @@ impl PoolInner { self.pool.get().map_err(|_| StoreError::DatabaseUnavailable) } - pub fn get_with_timeout_warning( - &self, - logger: &Logger, - ) -> Result>, StoreError> { - loop { - match self.pool.get_timeout(ENV_VARS.store.connection_timeout) { - Ok(conn) => return Ok(conn), - Err(e) => error!(logger, "Error checking out connection, retrying"; - "error" => brief_error_msg(&e), - ), - } - } - } - /// Get the pool for fdw connections. It is an error if none is configured fn fdw_pool( &self, From f162e2f97bbeaeba8a43de3b012e29e51bd27568 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 4 Apr 2025 21:29:21 -0700 Subject: [PATCH 069/160] store: Do not manage anything about pg_stat_statements It should be up to the operator if they use it or not, and when they want to reset it --- store/postgres/src/connection_pool.rs | 5 ----- tests/src/config.rs | 1 - 2 files changed, 6 deletions(-) diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index 7b4bc1bafb6..e00071ef138 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -1364,11 +1364,6 @@ fn migrate_schema(logger: &Logger, conn: &mut PgConnection) -> Result Date: Thu, 3 Apr 2025 09:31:51 -0700 Subject: [PATCH 070/160] store: Change db setup strategy to guard better against races The current database setup code was inerently racy when several nodes were starting up as it relied on piecemeal locking of individual steps. This change completely revamps the strategy we use: setup now takes a lock on the primary, so that only one node at a time will run the setup code. --- graph/src/components/store/err.rs | 2 +- node/src/store_builder.rs | 7 +- store/postgres/src/advisory_lock.rs | 34 ++- store/postgres/src/connection_pool.rs | 291 ++++++++++++++++++-------- 4 files changed, 227 insertions(+), 107 deletions(-) diff --git a/graph/src/components/store/err.rs b/graph/src/components/store/err.rs index 6af676f8e52..76be7c311ce 100644 --- a/graph/src/components/store/err.rs +++ b/graph/src/components/store/err.rs @@ -141,7 +141,7 @@ impl Clone for StoreError { } impl StoreError { - fn from_diesel_error(e: &DieselError) -> Option { + pub fn from_diesel_error(e: &DieselError) -> Option { const CONN_CLOSE: &str = "server closed the connection unexpectedly"; const STMT_TIMEOUT: &str = "canceling statement due to statement timeout"; let DieselError::DatabaseError(_, info) = e else { diff --git a/node/src/store_builder.rs b/node/src/store_builder.rs index 7fadf6b92c2..abaf59471fd 100644 --- a/node/src/store_builder.rs +++ b/node/src/store_builder.rs @@ -1,7 +1,6 @@ use std::iter::FromIterator; use std::{collections::HashMap, sync::Arc}; -use graph::futures03::future::join_all; use graph::prelude::{o, MetricsRegistry, NodeId}; use graph::url::Url; use graph::{ @@ -62,7 +61,7 @@ impl StoreBuilder { // attempt doesn't work for all of them because the database is // unavailable, they will try again later in the normal course of // using the pool - join_all(pools.values().map(|pool| pool.setup())).await; + coord.setup_all(logger).await; let chains = HashMap::from_iter(config.chains.chains.iter().map(|(name, chain)| { let shard = ShardName::new(chain.shard.to_string()) @@ -196,8 +195,8 @@ impl StoreBuilder { Arc::new(DieselStore::new(subgraph_store, block_store)) } - /// Create a connection pool for the main database of the primary shard - /// without connecting to all the other configured databases + /// Create a connection pool for the main (non-replica) database of a + /// shard pub fn main_pool( logger: &Logger, node: &NodeId, diff --git a/store/postgres/src/advisory_lock.rs b/store/postgres/src/advisory_lock.rs index bd60d34c634..85e2cf5a4ae 100644 --- a/store/postgres/src/advisory_lock.rs +++ b/store/postgres/src/advisory_lock.rs @@ -6,7 +6,7 @@ //! has more details on advisory locks. //! //! We use the following 64 bit locks: -//! * 1,2: to synchronize on migratons +//! * 1: to synchronize on migratons //! //! We use the following 2x 32-bit locks //! * 1, n: to lock copying of the deployment with id n in the destination @@ -69,17 +69,31 @@ const COPY: Scope = Scope { id: 1 }; const WRITE: Scope = Scope { id: 2 }; const PRUNE: Scope = Scope { id: 3 }; -/// Get a lock for running migrations. Blocks until we get the lock. -pub(crate) fn lock_migration(conn: &mut PgConnection) -> Result<(), StoreError> { - sql_query("select pg_advisory_lock(1)").execute(conn)?; +/// Block until we can get the migration lock, then run `f` and unlock when +/// it is done. This is used to make sure that only one node runs setup at a +/// time. +pub(crate) async fn with_migration_lock( + conn: &mut PgConnection, + f: F, +) -> Result +where + F: FnOnce(&mut PgConnection) -> Fut, + Fut: std::future::Future>, +{ + fn execute(conn: &mut PgConnection, query: &str, msg: &str) -> Result<(), StoreError> { + sql_query(query).execute(conn).map(|_| ()).map_err(|e| { + StoreError::from_diesel_error(&e) + .unwrap_or_else(|| StoreError::Unknown(anyhow::anyhow!("{}: {}", msg, e))) + }) + } - Ok(()) -} + const LOCK: &str = "select pg_advisory_lock(1)"; + const UNLOCK: &str = "select pg_advisory_unlock(1)"; -/// Release the migration lock. -pub(crate) fn unlock_migration(conn: &mut PgConnection) -> Result<(), StoreError> { - sql_query("select pg_advisory_unlock(1)").execute(conn)?; - Ok(()) + execute(conn, LOCK, "failed to acquire migration lock")?; + let res = f(conn).await; + execute(conn, UNLOCK, "failed to release migration lock")?; + res } /// Take the lock used to keep two copy operations to run simultaneously on diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index e00071ef138..171fb8dbbb6 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -10,6 +10,8 @@ use diesel_migrations::{EmbeddedMigrations, HarnessWithOutput}; use graph::cheap_clone::CheapClone; use graph::components::store::QueryPermit; use graph::constraint_violation; +use graph::futures03::future::join_all; +use graph::futures03::FutureExt as _; use graph::prelude::tokio::time::Instant; use graph::prelude::{tokio, MetricsRegistry}; use graph::slog::warn; @@ -33,8 +35,9 @@ use std::{collections::HashMap, sync::RwLock}; use postgres::config::{Config, Host}; -use crate::primary::{self, Mirror, NAMESPACE_PUBLIC}; -use crate::{advisory_lock, catalog}; +use crate::advisory_lock::with_migration_lock; +use crate::catalog; +use crate::primary::{self, Mirror, Namespace, NAMESPACE_PUBLIC}; use crate::{Shard, PRIMARY_SHARD}; /// Tables that we map from the primary into `primary_public` in each shard @@ -479,12 +482,17 @@ impl ConnectionPool { } match &*guard { - PoolState::Created(pool, servers) => { - pool.setup(servers.clone())?; - let pool2 = pool.clone(); - *guard = PoolState::Ready(pool.clone()); - self.state_tracker.mark_available(); - Ok(pool2) + PoolState::Created(pool, coord) => { + let migrated = coord.cheap_clone().setup_bg(pool.cheap_clone())?; + + if migrated { + let pool2 = pool.clone(); + *guard = PoolState::Ready(pool.clone()); + self.state_tracker.mark_available(); + Ok(pool2) + } else { + Err(StoreError::DatabaseUnavailable) + } } PoolState::Ready(pool) => Ok(pool.clone()), PoolState::Disabled => Err(StoreError::DatabaseDisabled), @@ -580,23 +588,6 @@ impl ConnectionPool { .ignore_timeout(|| inner.try_get_fdw(logger, timeout)) } - /// Setup the database for this pool. This includes configuring foreign - /// data wrappers for cross-shard communication, and running any pending - /// schema migrations for this database. - /// - /// # Panics - /// - /// If any errors happen during the migration, the process panics - pub async fn setup(&self) { - let pool = self.clone(); - graph::spawn_blocking_allow_panic(move || { - pool.get_ready().ok(); - }) - .await - // propagate panics - .unwrap(); - } - pub(crate) async fn query_permit(&self) -> Result { let pool = match &*self.inner.lock(&self.logger) { PoolState::Created(pool, _) | PoolState::Ready(pool) => pool.clone(), @@ -1096,61 +1087,6 @@ impl PoolInner { .unwrap_or(false) } - /// Setup the database for this pool. This includes configuring foreign - /// data wrappers for cross-shard communication, and running any pending - /// schema migrations for this database. - /// - /// Returns `StoreError::DatabaseUnavailable` if we can't connect to the - /// database. Any other error causes a panic. - /// - /// # Panics - /// - /// If any errors happen during the migration, the process panics - fn setup(&self, coord: Arc) -> Result<(), StoreError> { - fn die(logger: &Logger, msg: &'static str, err: &dyn std::fmt::Display) -> ! { - crit!(logger, "{}", msg; "error" => format!("{:#}", err)); - panic!("{}: {}", msg, err); - } - - let pool = self.clone(); - let mut conn = self.get().map_err(|_| StoreError::DatabaseUnavailable)?; - - let start = Instant::now(); - - advisory_lock::lock_migration(&mut conn) - .unwrap_or_else(|err| die(&pool.logger, "failed to get migration lock", &err)); - // This code can cause a race in database setup: if pool A has had - // schema changes and pool B then tries to map tables from pool A, - // but does so before the concurrent thread running this code for - // pool B has at least finished `configure_fdw`, mapping tables will - // fail. In that case, the node must be restarted. The restart is - // guaranteed because this failure will lead to a panic in the setup - // for pool A - // - // This code can also leave the table mappings in a state where they - // have not been updated if the process is killed after migrating - // the schema but before finishing remapping in all shards. - // Addressing that would require keeping track of the need to remap - // in the database instead of just in memory - let result = pool - .configure_fdw(coord.servers.as_ref()) - .and_then(|()| pool.drop_cross_shard_views()) - .and_then(|()| migrate_schema(&pool.logger, &mut conn)); - debug!(&pool.logger, "Release migration lock"); - advisory_lock::unlock_migration(&mut conn).unwrap_or_else(|err| { - die(&pool.logger, "failed to release migration lock", &err); - }); - let result = result - .and_then(|count| coord.propagate(&pool, count)) - .and_then(|()| pool.create_cross_shard_views(coord.servers.as_ref())); - result.unwrap_or_else(|err| die(&pool.logger, "migrations failed", &err)); - - self.locale_check(&pool.logger, conn)?; - - debug!(&pool.logger, "Setup finished"; "setup_time_s" => start.elapsed().as_secs()); - Ok(()) - } - fn locale_check( &self, logger: &Logger, @@ -1199,6 +1135,28 @@ impl PoolInner { }) } + /// Do the part of database setup that only affects this pool. Those + /// steps are + /// 1. Configuring foreign servers and user mappings for talking to the + /// other shards + /// 2. Migrating the schema to the latest version + /// 3. Checking that the locale is set to C + async fn migrate( + self: Arc, + servers: &[ForeignServer], + ) -> Result<(Arc, MigrationCount), StoreError> { + self.configure_fdw(servers)?; + let mut conn = self.get()?; + let (this, count) = conn.transaction(|conn| -> Result<_, StoreError> { + let count = migrate_schema(&self.logger, conn)?; + Ok((self, count)) + })?; + + this.locale_check(&this.logger, conn)?; + + Ok((this, count)) + } + /// If this is the primary shard, drop the namespace `CROSS_SHARD_NSP` fn drop_cross_shard_views(&self) -> Result<(), StoreError> { if self.shard != *PRIMARY_SHARD { @@ -1242,14 +1200,17 @@ impl PoolInner { return Ok(()); } - info!(&self.logger, "Creating cross-shard views"); let mut conn = self.get()?; + let sharded = Namespace::special(ForeignServer::CROSS_SHARD_NSP); + if catalog::has_namespace(&mut conn, &sharded)? { + // We dropped the namespace before, but another node must have + // recreated it in the meantime so we don't need to do anything + return Ok(()); + } + info!(&self.logger, "Creating cross-shard views"); conn.transaction(|conn| { - let query = format!( - "create schema if not exists {}", - ForeignServer::CROSS_SHARD_NSP - ); + let query = format!("create schema {}", ForeignServer::CROSS_SHARD_NSP); conn.batch_execute(&query)?; for (src_nsp, src_tables) in SHARDED_TABLES { // Pairs of (shard, nsp) for all servers @@ -1458,13 +1419,7 @@ impl PoolCoordinator { if count.had_migrations() { let server = self.server(&pool.shard)?; for pool in self.pools.lock().unwrap().values() { - let mut conn = pool.get()?; - let remap_res = { - advisory_lock::lock_migration(&mut conn)?; - let res = pool.remap(server); - advisory_lock::unlock_migration(&mut conn)?; - res - }; + let remap_res = pool.remap(server); if let Err(e) = remap_res { error!(pool.logger, "Failed to map imports from {}", server.shard; "error" => e.to_string()); return Err(e); @@ -1488,4 +1443,156 @@ impl PoolCoordinator { .find(|server| &server.shard == shard) .ok_or_else(|| constraint_violation!("unknown shard {shard}")) } + + fn primary(&self) -> Result, StoreError> { + self.pools + .lock() + .unwrap() + .get(&*PRIMARY_SHARD) + .cloned() + .ok_or_else(|| { + constraint_violation!("internal error: primary shard not found in pool coordinator") + }) + } + + /// Setup all pools the coordinator knows about and return the number of + /// pools that were successfully set up. + /// + /// # Panics + /// + /// If any errors besides a database not being available happen during + /// the migration, the process panics + pub async fn setup_all(&self, logger: &Logger) -> usize { + let pools = self + .pools + .lock() + .unwrap() + .values() + .cloned() + .collect::>(); + + let res = self.setup(pools).await; + + match res { + Ok(count) => { + info!(logger, "Setup finished"; "shards" => count); + count + } + Err(e) => { + crit!(logger, "database setup failed"; "error" => format!("{e}")); + panic!("database setup failed: {}", e); + } + } + } + + /// A helper to call `setup` from a non-async context. Returns `true` if + /// the setup was actually run, i.e. if `pool` was available + fn setup_bg(self: Arc, pool: Arc) -> Result { + let migrated = graph::spawn_thread("database-setup", move || { + graph::block_on(self.setup(vec![pool.clone()])) + }) + .join() + // unwrap: propagate panics + .unwrap()?; + Ok(migrated == 1) + } + + /// Setup all pools by doing the following steps: + /// 1. Get the migration lock in the primary. This makes sure that only + /// one node runs migrations + /// 2. Remove the views in `sharded` as they might interfere with + /// running migrations + /// 3. In parallel, do the following in each pool: + /// 1. Configure fdw servers + /// 2. Run migrations in all pools in parallel + /// 4. In parallel, do the following in each pool: + /// 1. Create/update the mappings in `shard__subgraphs` and in + /// `primary_public` + /// 5. Create the views in `sharded` again + /// 6. Release the migration lock + /// + /// This method tolerates databases that are not available and will + /// simply ignore them. The returned count is the number of pools that + /// were successfully set up. + async fn setup(&self, pools: Vec>) -> Result { + type MigrationCounts = Vec<(Arc, MigrationCount)>; + + /// Filter out pools that are not available. We don't want to fail + /// because one of the pools is not available. We will just ignore + /// them and continue with the others. + fn filter_unavailable( + (pool, res): (Arc, Result), + ) -> Option> { + if let Err(StoreError::DatabaseUnavailable) = res { + error!( + pool.logger, + "migrations failed because database was unavailable" + ); + None + } else { + Some(res) + } + } + + /// Migrate all pools in parallel + async fn migrate( + pools: &[Arc], + servers: &[ForeignServer], + ) -> Result { + let futures = pools + .iter() + .map(|pool| { + pool.cheap_clone() + .migrate(servers) + .map(|res| (pool.cheap_clone(), res)) + }) + .collect::>(); + join_all(futures) + .await + .into_iter() + .filter_map(filter_unavailable) + .collect::, _>>() + } + + /// Propagate the schema changes to all other pools in parallel + async fn propagate( + this: &PoolCoordinator, + migrated: MigrationCounts, + ) -> Result { + let futures = migrated + .into_iter() + .map(|(pool, count)| async move { + let res = this.propagate(&pool, count); + (pool.cheap_clone(), res) + }) + .collect::>(); + join_all(futures) + .await + .into_iter() + .filter_map(filter_unavailable) + .collect::, _>>() + .map(|v| v.len()) + } + + let primary = self.primary()?; + + let mut pconn = primary.get().map_err(|_| StoreError::DatabaseUnavailable)?; + + // Everything here happens under the migration lock. Anything called + // from here should not try to get that lock, otherwise the process + // will deadlock + let res = with_migration_lock(&mut pconn, |_| async { + primary.drop_cross_shard_views()?; + + let migrated = migrate(&pools, self.servers.as_ref()).await?; + + let propagated = propagate(&self, migrated).await?; + + primary.create_cross_shard_views(&self.servers)?; + Ok(propagated) + }) + .await; + + res + } } From a63e607c844875b3d211b254ff5ed2dafd246df1 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 4 Apr 2025 11:41:49 -0700 Subject: [PATCH 071/160] node, store: Give the PoolCoordinator a logger --- node/src/bin/manager.rs | 2 +- node/src/store_builder.rs | 2 +- store/postgres/src/connection_pool.rs | 9 +++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/node/src/bin/manager.rs b/node/src/bin/manager.rs index 5142a2ab939..50ee9b61958 100644 --- a/node/src/bin/manager.rs +++ b/node/src/bin/manager.rs @@ -898,7 +898,7 @@ impl Context { fn primary_pool(self) -> ConnectionPool { let primary = self.config.primary_store(); - let coord = Arc::new(PoolCoordinator::new(Arc::new(vec![]))); + let coord = Arc::new(PoolCoordinator::new(&self.logger, Arc::new(vec![]))); let pool = StoreBuilder::main_pool( &self.logger, &self.node_id, diff --git a/node/src/store_builder.rs b/node/src/store_builder.rs index abaf59471fd..2d2e56dbc69 100644 --- a/node/src/store_builder.rs +++ b/node/src/store_builder.rs @@ -110,7 +110,7 @@ impl StoreBuilder { .collect::, _>>() .expect("connection url's contain enough detail"); let servers = Arc::new(servers); - let coord = Arc::new(PoolCoordinator::new(servers)); + let coord = Arc::new(PoolCoordinator::new(logger, servers)); let shards: Vec<_> = config .stores diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index 171fb8dbbb6..f03bd75ecb0 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -1336,13 +1336,16 @@ fn migrate_schema(logger: &Logger, conn: &mut PgConnection) -> Result>>, servers: Arc>, } impl PoolCoordinator { - pub fn new(servers: Arc>) -> Self { + pub fn new(logger: &Logger, servers: Arc>) -> Self { + let logger = logger.new(o!("component" => "ConnectionPool", "component" => "Coordinator")); Self { + logger, pools: Mutex::new(HashMap::new()), servers, } @@ -1581,7 +1584,9 @@ impl PoolCoordinator { // Everything here happens under the migration lock. Anything called // from here should not try to get that lock, otherwise the process // will deadlock + debug!(self.logger, "Waiting for migration lock"); let res = with_migration_lock(&mut pconn, |_| async { + debug!(self.logger, "Migration lock acquired"); primary.drop_cross_shard_views()?; let migrated = migrate(&pools, self.servers.as_ref()).await?; @@ -1592,7 +1597,7 @@ impl PoolCoordinator { Ok(propagated) }) .await; - + debug!(self.logger, "Database setup finished"); res } } From 3282af28a63038bc41a53074b088ba6146c52108 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 4 Apr 2025 12:35:14 -0700 Subject: [PATCH 072/160] store: Encapsulate mutable state tracking in PoolState Before, PoolState was just an enum and code all over the place dealt with its interior mutability. Now, we encapsulate that to simplify code using the PoolState --- store/postgres/src/connection_pool.rs | 173 ++++++++++++++++++-------- 1 file changed, 118 insertions(+), 55 deletions(-) diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index f03bd75ecb0..70caf4c49fc 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -10,6 +10,7 @@ use diesel_migrations::{EmbeddedMigrations, HarnessWithOutput}; use graph::cheap_clone::CheapClone; use graph::components::store::QueryPermit; use graph::constraint_violation; +use graph::derive::CheapClone; use graph::futures03::future::join_all; use graph::futures03::FutureExt as _; use graph::prelude::tokio::time::Instant; @@ -312,7 +313,17 @@ impl ForeignServer { /// them on idle. This is much shorter than the default of 10 minutes. const FDW_IDLE_TIMEOUT: Duration = Duration::from_secs(60); -/// A pool goes through several states, and this enum tracks what state we +enum PoolStateInner { + /// A connection pool, and all the servers for which we need to + /// establish fdw mappings when we call `setup` on the pool + Created(Arc, Arc), + /// The pool has been successfully set up + Ready(Arc), + /// The pool has been disabled by setting its size to 0 + Disabled(String), +} + +/// A pool goes through several states, and this struct tracks what state we /// are in, together with the `state_tracker` field on `ConnectionPool`. /// When first created, the pool is in state `Created`; once we successfully /// called `setup` on it, it moves to state `Ready`. During use, we use the @@ -322,20 +333,96 @@ const FDW_IDLE_TIMEOUT: Duration = Duration::from_secs(60); /// database connection. That avoids overall undesirable states like buildup /// of queries; instead of queueing them until the database is available, /// they return almost immediately with an error -enum PoolState { - /// A connection pool, and all the servers for which we need to - /// establish fdw mappings when we call `setup` on the pool - Created(Arc, Arc), - /// The pool has been successfully set up - Ready(Arc), - /// The pool has been disabled by setting its size to 0 - Disabled, +#[derive(Clone, CheapClone)] +struct PoolState { + logger: Logger, + inner: Arc>, } +impl PoolState { + fn new(logger: Logger, inner: PoolStateInner, name: String) -> Self { + let pool_name = format!("pool-{}", name); + Self { + logger, + inner: Arc::new(TimedMutex::new(inner, pool_name)), + } + } + + fn disabled(logger: Logger, name: &str) -> Self { + Self::new( + logger, + PoolStateInner::Disabled(name.to_string()), + name.to_string(), + ) + } + + fn created(pool: Arc, coord: Arc) -> Self { + let logger = pool.logger.clone(); + let name = pool.shard.to_string(); + let inner = PoolStateInner::Created(pool, coord); + Self::new(logger, inner, name) + } + + fn ready(pool: Arc) -> Self { + let logger = pool.logger.clone(); + let name = pool.shard.to_string(); + let inner = PoolStateInner::Ready(pool); + Self::new(logger, inner, name) + } + + fn set_ready(&self) { + use PoolStateInner::*; + + let mut guard = self.inner.lock(&self.logger); + match &*guard { + Created(pool, _) => *guard = Ready(pool.clone()), + Ready(_) | Disabled(_) => { /* nothing to do */ } + } + } + + /// Get a connection pool that is ready, i.e., has been through setup + /// and running migrations + fn get_ready(&self) -> Result, StoreError> { + let mut guard = self.inner.lock(&self.logger); + + use PoolStateInner::*; + match &*guard { + Created(pool, coord) => { + let migrated = coord.cheap_clone().setup_bg(pool.cheap_clone())?; + + if migrated { + let pool2 = pool.cheap_clone(); + *guard = Ready(pool.cheap_clone()); + Ok(pool2) + } else { + Err(StoreError::DatabaseUnavailable) + } + } + Ready(pool) => Ok(pool.clone()), + Disabled(name) => Err(constraint_violation!( + "tried to access disabled database pool `{}`", + name + )), + } + } + + /// Get the inner pool, regardless of whether it has been set up or not. + /// Most uses should use `get_ready` instead + fn get_unready(&self) -> Result, StoreError> { + use PoolStateInner::*; + + match &*self.inner.lock(&self.logger) { + Created(pool, _) | Ready(pool) => Ok(pool.cheap_clone()), + Disabled(name) => Err(constraint_violation!( + "tried to access disabled database pool `{}`", + name + )), + } + } +} #[derive(Clone)] pub struct ConnectionPool { - inner: Arc>, - logger: Logger, + inner: PoolState, pub shard: Shard, state_tracker: PoolStateTracker, } @@ -428,9 +515,9 @@ impl ConnectionPool { let state_tracker = PoolStateTracker::new(); let shard = Shard::new(shard_name.to_string()).expect("shard_name is a valid name for a shard"); - let pool_state = { + let inner = { if pool_size == 0 { - PoolState::Disabled + PoolState::disabled(logger.cheap_clone(), shard_name) } else { let pool = PoolInner::create( shard.clone(), @@ -443,15 +530,14 @@ impl ConnectionPool { state_tracker.clone(), ); if pool_name.is_replica() { - PoolState::Ready(Arc::new(pool)) + PoolState::ready(Arc::new(pool)) } else { - PoolState::Created(Arc::new(pool), coord) + PoolState::created(Arc::new(pool), coord) } } }; ConnectionPool { - inner: Arc::new(TimedMutex::new(pool_state, format!("pool-{}", shard_name))), - logger: logger.clone(), + inner, shard, state_tracker, } @@ -460,11 +546,7 @@ impl ConnectionPool { /// This is only used for `graphman` to ensure it doesn't run migrations /// or other setup steps pub fn skip_setup(&self) { - let mut guard = self.inner.lock(&self.logger); - match &*guard { - PoolState::Created(pool, _) => *guard = PoolState::Ready(pool.clone()), - PoolState::Ready(_) | PoolState::Disabled => { /* nothing to do */ } - } + self.inner.set_ready(); } /// Return a pool that is ready, i.e., connected to the database. If the @@ -472,7 +554,6 @@ impl ConnectionPool { /// or the pool is marked as unavailable, return /// `StoreError::DatabaseUnavailable` fn get_ready(&self) -> Result, StoreError> { - let mut guard = self.inner.lock(&self.logger); if !self.state_tracker.is_available() { // We know that trying to use this pool is pointless since the // database is not available, and will only lead to other @@ -481,21 +562,12 @@ impl ConnectionPool { return Err(StoreError::DatabaseUnavailable); } - match &*guard { - PoolState::Created(pool, coord) => { - let migrated = coord.cheap_clone().setup_bg(pool.cheap_clone())?; - - if migrated { - let pool2 = pool.clone(); - *guard = PoolState::Ready(pool.clone()); - self.state_tracker.mark_available(); - Ok(pool2) - } else { - Err(StoreError::DatabaseUnavailable) - } + match self.inner.get_ready() { + Ok(pool) => { + self.state_tracker.mark_available(); + Ok(pool) } - PoolState::Ready(pool) => Ok(pool.clone()), - PoolState::Disabled => Err(StoreError::DatabaseDisabled), + Err(e) => Err(e), } } @@ -589,12 +661,7 @@ impl ConnectionPool { } pub(crate) async fn query_permit(&self) -> Result { - let pool = match &*self.inner.lock(&self.logger) { - PoolState::Created(pool, _) | PoolState::Ready(pool) => pool.clone(), - PoolState::Disabled => { - return Err(StoreError::DatabaseDisabled); - } - }; + let pool = self.inner.get_unready()?; let start = Instant::now(); let permit = pool.query_permit().await; Ok(QueryPermit { @@ -604,10 +671,9 @@ impl ConnectionPool { } pub(crate) fn wait_stats(&self) -> Result { - match &*self.inner.lock(&self.logger) { - PoolState::Created(pool, _) | PoolState::Ready(pool) => Ok(pool.wait_stats.clone()), - PoolState::Disabled => Err(StoreError::DatabaseDisabled), - } + self.inner + .get_unready() + .map(|pool| pool.wait_stats.cheap_clone()) } /// Mirror key tables from the primary into our own schema. We do this @@ -1381,14 +1447,11 @@ impl PoolCoordinator { // yet. We remember the `PoolInner` so that later, when we have to // call `remap()`, we do not have to take this lock as that will be // already held in `get_ready()` - match &*pool.inner.lock(logger) { - PoolState::Created(inner, _) | PoolState::Ready(inner) => { - self.pools - .lock() - .unwrap() - .insert(pool.shard.clone(), inner.clone()); - } - PoolState::Disabled => { /* nothing to do */ } + if let Some(inner) = pool.inner.get_unready().ok() { + self.pools + .lock() + .unwrap() + .insert(pool.shard.clone(), inner.clone()); } } pool From 70656a378172e8aca86a8fa1c803a8d0868d9c56 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 4 Apr 2025 12:37:21 -0700 Subject: [PATCH 073/160] node, store: Rename 'PoolName' to 'PoolRole' --- node/src/store_builder.rs | 6 +++--- store/postgres/src/connection_pool.rs | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/node/src/store_builder.rs b/node/src/store_builder.rs index 2d2e56dbc69..27dc7d5d021 100644 --- a/node/src/store_builder.rs +++ b/node/src/store_builder.rs @@ -8,7 +8,7 @@ use graph::{ util::security::SafeDisplay, }; use graph_store_postgres::connection_pool::{ - ConnectionPool, ForeignServer, PoolCoordinator, PoolName, + ConnectionPool, ForeignServer, PoolCoordinator, PoolRole, }; use graph_store_postgres::{ BlockStore as DieselBlockStore, ChainHeadUpdateListener as PostgresChainHeadUpdateListener, @@ -224,7 +224,7 @@ impl StoreBuilder { coord.create_pool( &logger, name, - PoolName::Main, + PoolRole::Main, shard.connection.clone(), pool_size, Some(fdw_pool_size), @@ -264,7 +264,7 @@ impl StoreBuilder { coord.clone().create_pool( &logger, name, - PoolName::Replica(pool), + PoolRole::Replica(pool), replica.connection.clone(), pool_size, None, diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index 70caf4c49fc..fd8c26204af 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -435,27 +435,27 @@ impl fmt::Debug for ConnectionPool { } } -/// The name of the pool, mostly for logging, and what purpose it serves. +/// The role of the pool, mostly for logging, and what purpose it serves. /// The main pool will always be called `main`, and can be used for reading /// and writing. Replica pools can only be used for reading, and don't /// require any setup (migrations etc.) -pub enum PoolName { +pub enum PoolRole { Main, Replica(String), } -impl PoolName { +impl PoolRole { fn as_str(&self) -> &str { match self { - PoolName::Main => "main", - PoolName::Replica(name) => name, + PoolRole::Main => "main", + PoolRole::Replica(name) => name, } } fn is_replica(&self) -> bool { match self { - PoolName::Main => false, - PoolName::Replica(_) => true, + PoolRole::Main => false, + PoolRole::Replica(_) => true, } } } @@ -504,7 +504,7 @@ impl PoolStateTracker { impl ConnectionPool { fn create( shard_name: &str, - pool_name: PoolName, + pool_name: PoolRole, postgres_url: String, pool_size: u32, fdw_pool_size: Option, @@ -1421,7 +1421,7 @@ impl PoolCoordinator { self: Arc, logger: &Logger, name: &str, - pool_name: PoolName, + pool_name: PoolRole, postgres_url: String, pool_size: u32, fdw_pool_size: Option, From 67108356b0614fd128a17c21f908524b82cca884 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 4 Apr 2025 15:26:54 -0700 Subject: [PATCH 074/160] all: Filter out shards with pool size 0 Instead of dealing with disabled shards (shards that have a pool size of 0 configured), filter those shards out on startup and warn about them. The end effect is that for that configuration, users will get an error of 'unkown shard' rather than 'shard disabled'. Since configuring a shard to have no connections is kinda pathological, and leads to an error when it is used either way, the code simplification is worth the slightly less helpful error message. Removing the 'disabled' state from pools has ripple effects to quite a few other places, simplifying them a bit --- graph/src/components/store/traits.rs | 6 +- graph/src/data/query/trace.rs | 7 +- graphql/src/execution/resolver.rs | 2 +- graphql/src/introspection/resolver.rs | 2 +- graphql/src/runner.rs | 2 +- graphql/src/store/resolver.rs | 4 +- node/src/store_builder.rs | 20 ++++- server/index-node/src/resolver.rs | 4 +- store/postgres/src/block_store.rs | 6 +- store/postgres/src/connection_pool.rs | 79 +++++++------------ store/postgres/src/deployment_store.rs | 4 +- store/postgres/src/query_store.rs | 4 +- store/postgres/src/store.rs | 4 +- .../test-store/tests/graphql/introspection.rs | 6 +- 14 files changed, 67 insertions(+), 83 deletions(-) diff --git a/graph/src/components/store/traits.rs b/graph/src/components/store/traits.rs index 27cb3768e2c..73cb22269fe 100644 --- a/graph/src/components/store/traits.rs +++ b/graph/src/components/store/traits.rs @@ -655,7 +655,7 @@ pub trait QueryStore: Send + Sync { block_hash: &BlockHash, ) -> Result, Option)>, StoreError>; - fn wait_stats(&self) -> Result; + fn wait_stats(&self) -> PoolWaitStats; /// Find the current state for the subgraph deployment `id` and /// return details about it needed for executing queries @@ -668,7 +668,7 @@ pub trait QueryStore: Send + Sync { fn network_name(&self) -> &str; /// A permit should be acquired before starting query execution. - async fn query_permit(&self) -> Result; + async fn query_permit(&self) -> QueryPermit; /// Report the name of the shard in which the subgraph is stored. This /// should only be used for reporting and monitoring @@ -683,7 +683,7 @@ pub trait QueryStore: Send + Sync { #[async_trait] pub trait StatusStore: Send + Sync + 'static { /// A permit should be acquired before starting query execution. - async fn query_permit(&self) -> Result; + async fn query_permit(&self) -> QueryPermit; fn status(&self, filter: status::Filter) -> Result, StoreError>; diff --git a/graph/src/data/query/trace.rs b/graph/src/data/query/trace.rs index cf2d153dca4..256c9cdeaf6 100644 --- a/graph/src/data/query/trace.rs +++ b/graph/src/data/query/trace.rs @@ -118,11 +118,8 @@ impl Trace { } } - pub fn query_done(&mut self, dur: Duration, permit: &Result) { - let permit_dur = match permit { - Ok(permit) => permit.wait, - Err(_) => Duration::from_millis(0), - }; + pub fn query_done(&mut self, dur: Duration, permit: &QueryPermit) { + let permit_dur = permit.wait; match self { Trace::None => { /* nothing to do */ } Trace::Root { .. } => { diff --git a/graphql/src/execution/resolver.rs b/graphql/src/execution/resolver.rs index ca59e401dfc..0074eb124d8 100644 --- a/graphql/src/execution/resolver.rs +++ b/graphql/src/execution/resolver.rs @@ -18,7 +18,7 @@ use super::Query; pub trait Resolver: Sized + Send + Sync + 'static { const CACHEABLE: bool; - async fn query_permit(&self) -> Result; + async fn query_permit(&self) -> QueryPermit; /// Prepare for executing a query by prefetching as much data as possible fn prefetch( diff --git a/graphql/src/introspection/resolver.rs b/graphql/src/introspection/resolver.rs index 0f67b717c5a..765b0399695 100644 --- a/graphql/src/introspection/resolver.rs +++ b/graphql/src/introspection/resolver.rs @@ -356,7 +356,7 @@ impl Resolver for IntrospectionResolver { // see `fn as_introspection_context`, so this value is irrelevant. const CACHEABLE: bool = false; - async fn query_permit(&self) -> Result { + async fn query_permit(&self) -> QueryPermit { unreachable!() } diff --git a/graphql/src/runner.rs b/graphql/src/runner.rs index 96f30e8bc9d..d2f0bc9c96c 100644 --- a/graphql/src/runner.rs +++ b/graphql/src/runner.rs @@ -143,7 +143,7 @@ where )?; self.load_manager .decide( - &store.wait_stats().map_err(QueryExecutionError::from)?, + &store.wait_stats(), store.shard(), store.deployment_id(), query.shape_hash, diff --git a/graphql/src/store/resolver.rs b/graphql/src/store/resolver.rs index 82c40420fa6..d7032740768 100644 --- a/graphql/src/store/resolver.rs +++ b/graphql/src/store/resolver.rs @@ -256,8 +256,8 @@ impl StoreResolver { impl Resolver for StoreResolver { const CACHEABLE: bool = true; - async fn query_permit(&self) -> Result { - self.store.query_permit().await.map_err(Into::into) + async fn query_permit(&self) -> QueryPermit { + self.store.query_permit().await } fn prefetch( diff --git a/node/src/store_builder.rs b/node/src/store_builder.rs index 27dc7d5d021..5294179f8eb 100644 --- a/node/src/store_builder.rs +++ b/node/src/store_builder.rs @@ -2,6 +2,7 @@ use std::iter::FromIterator; use std::{collections::HashMap, sync::Arc}; use graph::prelude::{o, MetricsRegistry, NodeId}; +use graph::slog::warn; use graph::url::Url; use graph::{ prelude::{info, CheapClone, Logger}, @@ -115,8 +116,23 @@ impl StoreBuilder { let shards: Vec<_> = config .stores .iter() - .map(|(name, shard)| { + .filter_map(|(name, shard)| { let logger = logger.new(o!("shard" => name.to_string())); + let pool_size = shard.pool_size.size_for(node, name).unwrap_or_else(|_| { + panic!("cannot determine the pool size for store {}", name) + }); + if pool_size == 0 { + if name == PRIMARY_SHARD.as_str() { + panic!("pool size for primary shard must be greater than 0"); + } else { + warn!( + logger, + "pool size for shard {} is 0, ignoring this shard", name + ); + return None; + } + } + let conn_pool = Self::main_pool( &logger, node, @@ -137,7 +153,7 @@ impl StoreBuilder { let name = ShardName::new(name.to_string()).expect("shard names have been validated"); - (name, conn_pool, read_only_conn_pools, weights) + Some((name, conn_pool, read_only_conn_pools, weights)) }) .collect(); diff --git a/server/index-node/src/resolver.rs b/server/index-node/src/resolver.rs index a60e5d35fd9..7974afe41db 100644 --- a/server/index-node/src/resolver.rs +++ b/server/index-node/src/resolver.rs @@ -777,8 +777,8 @@ fn entity_changes_to_graphql(entity_changes: Vec) -> r::Value { impl Resolver for IndexNodeResolver { const CACHEABLE: bool = false; - async fn query_permit(&self) -> Result { - self.store.query_permit().await.map_err(Into::into) + async fn query_permit(&self) -> QueryPermit { + self.store.query_permit().await } fn prefetch( diff --git a/store/postgres/src/block_store.rs b/store/postgres/src/block_store.rs index 9af40b8d2a0..f69267fff17 100644 --- a/store/postgres/src/block_store.rs +++ b/store/postgres/src/block_store.rs @@ -319,11 +319,7 @@ impl BlockStore { } pub(crate) async fn query_permit_primary(&self) -> QueryPermit { - self.mirror - .primary() - .query_permit() - .await - .expect("the primary is never disabled") + self.mirror.primary().query_permit().await } pub fn allocate_chain( diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index fd8c26204af..c2f5bc95a9c 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -319,8 +319,6 @@ enum PoolStateInner { Created(Arc, Arc), /// The pool has been successfully set up Ready(Arc), - /// The pool has been disabled by setting its size to 0 - Disabled(String), } /// A pool goes through several states, and this struct tracks what state we @@ -348,14 +346,6 @@ impl PoolState { } } - fn disabled(logger: Logger, name: &str) -> Self { - Self::new( - logger, - PoolStateInner::Disabled(name.to_string()), - name.to_string(), - ) - } - fn created(pool: Arc, coord: Arc) -> Self { let logger = pool.logger.clone(); let name = pool.shard.to_string(); @@ -376,7 +366,7 @@ impl PoolState { let mut guard = self.inner.lock(&self.logger); match &*guard { Created(pool, _) => *guard = Ready(pool.clone()), - Ready(_) | Disabled(_) => { /* nothing to do */ } + Ready(_) => { /* nothing to do */ } } } @@ -399,24 +389,16 @@ impl PoolState { } } Ready(pool) => Ok(pool.clone()), - Disabled(name) => Err(constraint_violation!( - "tried to access disabled database pool `{}`", - name - )), } } /// Get the inner pool, regardless of whether it has been set up or not. /// Most uses should use `get_ready` instead - fn get_unready(&self) -> Result, StoreError> { + fn get_unready(&self) -> Arc { use PoolStateInner::*; match &*self.inner.lock(&self.logger) { - Created(pool, _) | Ready(pool) => Ok(pool.cheap_clone()), - Disabled(name) => Err(constraint_violation!( - "tried to access disabled database pool `{}`", - name - )), + Created(pool, _) | Ready(pool) => pool.cheap_clone(), } } } @@ -516,24 +498,20 @@ impl ConnectionPool { let shard = Shard::new(shard_name.to_string()).expect("shard_name is a valid name for a shard"); let inner = { - if pool_size == 0 { - PoolState::disabled(logger.cheap_clone(), shard_name) + let pool = PoolInner::create( + shard.clone(), + pool_name.as_str(), + postgres_url, + pool_size, + fdw_pool_size, + logger, + registry, + state_tracker.clone(), + ); + if pool_name.is_replica() { + PoolState::ready(Arc::new(pool)) } else { - let pool = PoolInner::create( - shard.clone(), - pool_name.as_str(), - postgres_url, - pool_size, - fdw_pool_size, - logger, - registry, - state_tracker.clone(), - ); - if pool_name.is_replica() { - PoolState::ready(Arc::new(pool)) - } else { - PoolState::created(Arc::new(pool), coord) - } + PoolState::created(Arc::new(pool), coord) } }; ConnectionPool { @@ -660,20 +638,18 @@ impl ConnectionPool { .ignore_timeout(|| inner.try_get_fdw(logger, timeout)) } - pub(crate) async fn query_permit(&self) -> Result { - let pool = self.inner.get_unready()?; + pub(crate) async fn query_permit(&self) -> QueryPermit { + let pool = self.inner.get_unready(); let start = Instant::now(); let permit = pool.query_permit().await; - Ok(QueryPermit { + QueryPermit { permit, wait: start.elapsed(), - }) + } } - pub(crate) fn wait_stats(&self) -> Result { - self.inner - .get_unready() - .map(|pool| pool.wait_stats.cheap_clone()) + pub(crate) fn wait_stats(&self) -> PoolWaitStats { + self.inner.get_unready().wait_stats.cheap_clone() } /// Mirror key tables from the primary into our own schema. We do this @@ -1447,12 +1423,11 @@ impl PoolCoordinator { // yet. We remember the `PoolInner` so that later, when we have to // call `remap()`, we do not have to take this lock as that will be // already held in `get_ready()` - if let Some(inner) = pool.inner.get_unready().ok() { - self.pools - .lock() - .unwrap() - .insert(pool.shard.clone(), inner.clone()); - } + let inner = pool.inner.get_unready(); + self.pools + .lock() + .unwrap() + .insert(pool.shard.clone(), inner.clone()); } pool } diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index 96dd5507f3e..91230d63b7b 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -415,7 +415,7 @@ impl DeploymentStore { Ok(conn) } - pub(crate) async fn query_permit(&self, replica: ReplicaId) -> Result { + pub(crate) async fn query_permit(&self, replica: ReplicaId) -> QueryPermit { let pool = match replica { ReplicaId::Main => &self.pool, ReplicaId::ReadOnly(idx) => &self.read_only_pools[idx], @@ -423,7 +423,7 @@ impl DeploymentStore { pool.query_permit().await } - pub(crate) fn wait_stats(&self, replica: ReplicaId) -> Result { + pub(crate) fn wait_stats(&self, replica: ReplicaId) -> PoolWaitStats { match replica { ReplicaId::Main => self.pool.wait_stats(), ReplicaId::ReadOnly(idx) => self.read_only_pools[idx].wait_stats(), diff --git a/store/postgres/src/query_store.rs b/store/postgres/src/query_store.rs index 8fc2da822e4..fe7d084030b 100644 --- a/store/postgres/src/query_store.rs +++ b/store/postgres/src/query_store.rs @@ -112,7 +112,7 @@ impl QueryStoreTrait for QueryStore { self.chain_store.block_numbers(block_hashes).await } - fn wait_stats(&self) -> Result { + fn wait_stats(&self) -> PoolWaitStats { self.store.wait_stats(self.replica_id) } @@ -137,7 +137,7 @@ impl QueryStoreTrait for QueryStore { &self.site.network } - async fn query_permit(&self) -> Result { + async fn query_permit(&self) -> QueryPermit { self.store.query_permit(self.replica_id).await } diff --git a/store/postgres/src/store.rs b/store/postgres/src/store.rs index 50a5e4b21e0..7eb428a5058 100644 --- a/store/postgres/src/store.rs +++ b/store/postgres/src/store.rs @@ -167,8 +167,8 @@ impl StatusStore for Store { .await } - async fn query_permit(&self) -> Result { + async fn query_permit(&self) -> QueryPermit { // Status queries go to the primary shard. - Ok(self.block_store.query_permit_primary().await) + self.block_store.query_permit_primary().await } } diff --git a/store/test-store/tests/graphql/introspection.rs b/store/test-store/tests/graphql/introspection.rs index 6139e673767..8bc76213e6b 100644 --- a/store/test-store/tests/graphql/introspection.rs +++ b/store/test-store/tests/graphql/introspection.rs @@ -53,15 +53,15 @@ impl Resolver for MockResolver { Ok(r::Value::Null) } - async fn query_permit(&self) -> Result { + async fn query_permit(&self) -> QueryPermit { let permit = Arc::new(tokio::sync::Semaphore::new(1)) .acquire_owned() .await .unwrap(); - Ok(QueryPermit { + QueryPermit { permit, wait: Duration::from_secs(0), - }) + } } } From eb6fae71fd9a05c38b779a97b0bb3de8f17edd7b Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 4 Apr 2025 16:15:33 -0700 Subject: [PATCH 075/160] store: Make sure we do not run setup twice for the same pool With the previous code, we would run setup initially when creating all pools, but they would not be marked as set up. On the first access to the pool we would try to run setup again, which is not needed. This change makes it so that we remember that we ran setup successfully when pools are created --- store/postgres/src/connection_pool.rs | 132 ++++++++++++++++---------- 1 file changed, 84 insertions(+), 48 deletions(-) diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index c2f5bc95a9c..9a6afe9a37e 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -373,22 +373,27 @@ impl PoolState { /// Get a connection pool that is ready, i.e., has been through setup /// and running migrations fn get_ready(&self) -> Result, StoreError> { - let mut guard = self.inner.lock(&self.logger); + // We have to be careful here that we do not hold a lock when we + // call `setup_bg`, otherwise we will deadlock + let (pool, coord) = { + let guard = self.inner.lock(&self.logger); + + use PoolStateInner::*; + match &*guard { + Created(pool, coord) => (pool.cheap_clone(), coord.cheap_clone()), + Ready(pool) => return Ok(pool.clone()), + } + }; - use PoolStateInner::*; - match &*guard { - Created(pool, coord) => { - let migrated = coord.cheap_clone().setup_bg(pool.cheap_clone())?; + // self is `Created` and needs to have setup run + coord.setup_bg(self.cheap_clone())?; - if migrated { - let pool2 = pool.cheap_clone(); - *guard = Ready(pool.cheap_clone()); - Ok(pool2) - } else { - Err(StoreError::DatabaseUnavailable) - } - } - Ready(pool) => Ok(pool.clone()), + // We just tried to set up the pool; if it is still not set up and + // we didn't have an error, it means the database is not available + if self.needs_setup() { + return Err(StoreError::DatabaseUnavailable); + } else { + Ok(pool) } } @@ -401,6 +406,16 @@ impl PoolState { Created(pool, _) | Ready(pool) => pool.cheap_clone(), } } + + fn needs_setup(&self) -> bool { + let guard = self.inner.lock(&self.logger); + + use PoolStateInner::*; + match &*guard { + Created(_, _) => true, + Ready(_) => false, + } + } } #[derive(Clone)] pub struct ConnectionPool { @@ -1186,7 +1201,7 @@ impl PoolInner { async fn migrate( self: Arc, servers: &[ForeignServer], - ) -> Result<(Arc, MigrationCount), StoreError> { + ) -> Result { self.configure_fdw(servers)?; let mut conn = self.get()?; let (this, count) = conn.transaction(|conn| -> Result<_, StoreError> { @@ -1196,7 +1211,7 @@ impl PoolInner { this.locale_check(&this.logger, conn)?; - Ok((this, count)) + Ok(count) } /// If this is the primary shard, drop the namespace `CROSS_SHARD_NSP` @@ -1379,7 +1394,7 @@ fn migrate_schema(logger: &Logger, conn: &mut PgConnection) -> Result>>, + pools: Mutex>, servers: Arc>, } @@ -1419,16 +1434,12 @@ impl PoolCoordinator { // Ignore non-writable pools (replicas), there is no need (and no // way) to coordinate schema changes with them if is_writable { - // It is safe to take this lock here since nobody has seen the pool - // yet. We remember the `PoolInner` so that later, when we have to - // call `remap()`, we do not have to take this lock as that will be - // already held in `get_ready()` - let inner = pool.inner.get_unready(); self.pools .lock() .unwrap() - .insert(pool.shard.clone(), inner.clone()); + .insert(pool.shard.clone(), pool.inner.cheap_clone()); } + pool } @@ -1460,6 +1471,7 @@ impl PoolCoordinator { if count.had_migrations() { let server = self.server(&pool.shard)?; for pool in self.pools.lock().unwrap().values() { + let pool = pool.get_unready(); let remap_res = pool.remap(server); if let Err(e) = remap_res { error!(pool.logger, "Failed to map imports from {}", server.shard; "error" => e.to_string()); @@ -1470,8 +1482,15 @@ impl PoolCoordinator { Ok(()) } + /// Return a list of all pools, regardless of whether they are ready or + /// not. pub fn pools(&self) -> Vec> { - self.pools.lock().unwrap().values().cloned().collect() + self.pools + .lock() + .unwrap() + .values() + .map(|state| state.get_unready()) + .collect::>() } pub fn servers(&self) -> Arc> { @@ -1486,14 +1505,12 @@ impl PoolCoordinator { } fn primary(&self) -> Result, StoreError> { - self.pools - .lock() - .unwrap() - .get(&*PRIMARY_SHARD) - .cloned() - .ok_or_else(|| { - constraint_violation!("internal error: primary shard not found in pool coordinator") - }) + let map = self.pools.lock().unwrap(); + let pool_state = map.get(&*&PRIMARY_SHARD).ok_or_else(|| { + constraint_violation!("internal error: primary shard not found in pool coordinator") + })?; + + Ok(pool_state.get_unready()) } /// Setup all pools the coordinator knows about and return the number of @@ -1528,7 +1545,7 @@ impl PoolCoordinator { /// A helper to call `setup` from a non-async context. Returns `true` if /// the setup was actually run, i.e. if `pool` was available - fn setup_bg(self: Arc, pool: Arc) -> Result { + fn setup_bg(self: Arc, pool: PoolState) -> Result { let migrated = graph::spawn_thread("database-setup", move || { graph::block_on(self.setup(vec![pool.clone()])) }) @@ -1555,37 +1572,43 @@ impl PoolCoordinator { /// This method tolerates databases that are not available and will /// simply ignore them. The returned count is the number of pools that /// were successfully set up. - async fn setup(&self, pools: Vec>) -> Result { - type MigrationCounts = Vec<(Arc, MigrationCount)>; + /// + /// When this method returns, the entries from `states` that were + /// successfully set up will be marked as ready. The method returns the + /// number of pools that were set up + async fn setup(&self, states: Vec) -> Result { + type MigrationCounts = Vec<(PoolState, MigrationCount)>; /// Filter out pools that are not available. We don't want to fail /// because one of the pools is not available. We will just ignore /// them and continue with the others. fn filter_unavailable( - (pool, res): (Arc, Result), - ) -> Option> { + (state, res): (PoolState, Result), + ) -> Option> { if let Err(StoreError::DatabaseUnavailable) = res { error!( - pool.logger, + state.logger, "migrations failed because database was unavailable" ); None } else { - Some(res) + Some(res.map(|count| (state, count))) } } /// Migrate all pools in parallel async fn migrate( - pools: &[Arc], + pools: &[PoolState], servers: &[ForeignServer], ) -> Result { let futures = pools .iter() - .map(|pool| { - pool.cheap_clone() + .map(|state| { + state + .get_unready() + .cheap_clone() .migrate(servers) - .map(|res| (pool.cheap_clone(), res)) + .map(|res| (state.cheap_clone(), res)) }) .collect::>(); join_all(futures) @@ -1599,26 +1622,32 @@ impl PoolCoordinator { async fn propagate( this: &PoolCoordinator, migrated: MigrationCounts, - ) -> Result { + ) -> Result, StoreError> { let futures = migrated .into_iter() - .map(|(pool, count)| async move { + .map(|(state, count)| async move { + let pool = state.get_unready(); let res = this.propagate(&pool, count); - (pool.cheap_clone(), res) + (state.cheap_clone(), res) }) .collect::>(); join_all(futures) .await .into_iter() .filter_map(filter_unavailable) + .map(|res| res.map(|(state, ())| state)) .collect::, _>>() - .map(|v| v.len()) } let primary = self.primary()?; let mut pconn = primary.get().map_err(|_| StoreError::DatabaseUnavailable)?; + let pools: Vec<_> = states + .into_iter() + .filter(|pool| pool.needs_setup()) + .collect(); + // Everything here happens under the migration lock. Anything called // from here should not try to get that lock, otherwise the process // will deadlock @@ -1636,6 +1665,13 @@ impl PoolCoordinator { }) .await; debug!(self.logger, "Database setup finished"); - res + + // Mark all pool states that we set up completely as ready + res.map(|states| { + for state in &states { + state.set_ready(); + } + states.len() + }) } } From c23ee969d1d420d0cb331020a3ba335adce27799 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Sun, 6 Apr 2025 11:14:45 -0700 Subject: [PATCH 076/160] store: Avoid running setup unnecessarily if several threads try to run it --- store/postgres/src/connection_pool.rs | 33 +++++++++++++++++++-------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index 9a6afe9a37e..6ff46649494 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -1643,10 +1643,13 @@ impl PoolCoordinator { let mut pconn = primary.get().map_err(|_| StoreError::DatabaseUnavailable)?; - let pools: Vec<_> = states + let states: Vec<_> = states .into_iter() .filter(|pool| pool.needs_setup()) .collect(); + if states.is_empty() { + return Ok(0); + } // Everything here happens under the migration lock. Anything called // from here should not try to get that lock, otherwise the process @@ -1654,24 +1657,34 @@ impl PoolCoordinator { debug!(self.logger, "Waiting for migration lock"); let res = with_migration_lock(&mut pconn, |_| async { debug!(self.logger, "Migration lock acquired"); + + // While we were waiting for the migration lock, another thread + // might have already run this + let states: Vec<_> = states + .into_iter() + .filter(|pool| pool.needs_setup()) + .collect(); + if states.is_empty() { + debug!(self.logger, "No pools to set up"); + return Ok(0); + } + primary.drop_cross_shard_views()?; - let migrated = migrate(&pools, self.servers.as_ref()).await?; + let migrated = migrate(&states, self.servers.as_ref()).await?; let propagated = propagate(&self, migrated).await?; primary.create_cross_shard_views(&self.servers)?; - Ok(propagated) - }) - .await; - debug!(self.logger, "Database setup finished"); - // Mark all pool states that we set up completely as ready - res.map(|states| { - for state in &states { + for state in &propagated { state.set_ready(); } - states.len() + Ok(propagated.len()) }) + .await; + debug!(self.logger, "Database setup finished"); + + res } } From 4be64c16f575c9da424ac730e26a497f829683ee Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 7 Apr 2025 17:17:43 -0700 Subject: [PATCH 077/160] store: Do not create aggregate indexes twice When index creation was postponed, we would create aggregate indexes twice. They could also be postponed, but we'll leave that for another day. --- store/postgres/src/relational/ddl.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/store/postgres/src/relational/ddl.rs b/store/postgres/src/relational/ddl.rs index e85281a5899..a3c4ed6885e 100644 --- a/store/postgres/src/relational/ddl.rs +++ b/store/postgres/src/relational/ddl.rs @@ -422,8 +422,9 @@ impl Table { } } else { self.create_attribute_indexes(out)?; + self.create_aggregate_indexes(schema, out)?; } - self.create_aggregate_indexes(schema, out) + Ok(()) } pub fn exclusion_ddl(&self, out: &mut String) -> fmt::Result { From 784150edb9b2de9b77d5ba449a5cb724ea8b230b Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 7 Apr 2025 09:17:26 -0700 Subject: [PATCH 078/160] store: Use a fdw connection for copy::is_source --- store/postgres/src/copy.rs | 25 +++++++++++++++++++++---- store/postgres/src/deployment_store.rs | 3 +-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 9e9ba187c6a..e0ae71eab3e 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -37,7 +37,7 @@ use graph::{ info, lazy_static, o, warn, BlockNumber, BlockPtr, CheapClone, Logger, StoreError, ENV_VARS, }, schema::EntityType, - slog::error, + slog::{debug, error}, }; use itertools::Itertools; @@ -113,16 +113,33 @@ table! { } /// Return `true` if the site is the source of a copy operation. The copy -/// operation might be just queued or in progress already -pub fn is_source(conn: &mut PgConnection, site: &Site) -> Result { +/// operation might be just queued or in progress already. This method will +/// block until a fdw connection becomes available. +pub fn is_source(logger: &Logger, pool: &ConnectionPool, site: &Site) -> Result { use active_copies as ac; + // We use a fdw connection to check if the site is being copied. If we + // used an ordinary connection and there are many calls to this method, + // postgres_fdw might open an unmanageable number of connections into + // the primary, which makes the primary run out of connections + let mut last_log = Instant::now(); + let mut conn = pool.get_fdw(&logger, || { + if last_log.elapsed() > LOG_INTERVAL { + last_log = Instant::now(); + debug!( + logger, + "Waiting for fdw connection to check if site {} is being copied", site.namespace + ); + } + false + })?; + select(diesel::dsl::exists( ac::table .filter(ac::src.eq(site.id)) .filter(ac::cancelled_at.is_null()), )) - .get_result::(conn) + .get_result::(&mut conn) .map_err(StoreError::from) } diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index 91230d63b7b..c78b06be46d 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -1235,8 +1235,7 @@ impl DeploymentStore { req: PruneRequest, ) -> Result<(), StoreError> { { - let mut conn = store.get_conn()?; - if copy::is_source(&mut conn, &site)? { + if copy::is_source(&logger, &store.pool, &site)? { debug!( logger, "Skipping pruning since this deployment is being copied" From 1a2aaf3fe07c78ec8e1b9f18022bf829a54bc48d Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 1 Apr 2025 14:07:49 -0700 Subject: [PATCH 079/160] store: Try to spawn copy workers more frequently We used to only try to spawn a new copy worker when we were finished copying a table. But that can take a long time, and in between some connections might have become available. We now check every few minutes if a connection is available and spawn a new worker if it is --- store/postgres/src/copy.rs | 113 +++++++++++++++++++++++++++++-------- 1 file changed, 89 insertions(+), 24 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index e0ae71eab3e..effe2950ee2 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -32,12 +32,13 @@ use diesel::{ }; use graph::{ constraint_violation, - futures03::future::select_all, + futures03::{future::select_all, FutureExt as _}, prelude::{ info, lazy_static, o, warn, BlockNumber, BlockPtr, CheapClone, Logger, StoreError, ENV_VARS, }, schema::EntityType, slog::{debug, error}, + tokio, }; use itertools::Itertools; @@ -687,6 +688,21 @@ impl CopyProgress { } } +enum WorkerResult { + Ok(CopyTableWorker), + Err(StoreError), + Wake, +} + +impl From> for WorkerResult { + fn from(result: Result) -> Self { + match result { + Ok(worker) => WorkerResult::Ok(worker), + Err(e) => WorkerResult::Err(e), + } + } +} + /// A helper to run copying of one table. We need to thread `conn` and /// `table` from the control loop to the background worker and back again to /// the control loop. This worker facilitates that @@ -705,11 +721,7 @@ impl CopyTableWorker { } } - async fn run( - mut self, - logger: Logger, - progress: Arc, - ) -> Result { + async fn run(mut self, logger: Logger, progress: Arc) -> WorkerResult { let object = self.table.dst.object.cheap_clone(); graph::spawn_blocking_allow_panic(move || { self.result = self.run_inner(logger, &progress); @@ -717,6 +729,7 @@ impl CopyTableWorker { }) .await .map_err(|e| constraint_violation!("copy worker for {} panicked: {}", object, e)) + .into() } fn run_inner(&mut self, logger: Logger, progress: &CopyProgress) -> Result { @@ -812,6 +825,57 @@ impl CopyTableWorker { } } +/// A helper to manage the workers that are copying data. Besides the actual +/// workers it also keeps a worker that wakes us up periodically to give us +/// a chance to create more workers if there are database connections +/// available +struct Workers { + /// The list of workers that are currently running. This will always + /// include a future that wakes us up periodically + futures: Vec>>>, +} + +impl Workers { + fn new() -> Self { + Self { + futures: vec![Self::waker()], + } + } + + fn add(&mut self, worker: Pin>>) { + self.futures.push(worker); + } + + fn has_work(&self) -> bool { + self.futures.len() > 1 + } + + async fn select(&mut self) -> WorkerResult { + use WorkerResult::*; + + let futures = std::mem::take(&mut self.futures); + let (result, _idx, remaining) = select_all(futures).await; + self.futures = remaining; + match result { + Ok(_) | Err(_) => { /* nothing to do */ } + Wake => { + self.futures.push(Self::waker()); + } + } + result + } + + fn waker() -> Pin>> { + let sleep = tokio::time::sleep(ENV_VARS.store.batch_target_duration); + Box::pin(sleep.map(|()| WorkerResult::Wake)) + } + + /// Return the number of workers that are not the waker + fn len(&self) -> usize { + self.futures.len() - 1 + } +} + /// A helper for copying subgraphs pub struct Connection { /// The connection pool for the shard that will contain the destination @@ -926,7 +990,7 @@ impl Connection { &mut self, state: &mut CopyState, progress: &Arc, - ) -> Option>>>> { + ) -> Option>>> { let Some(conn) = self.conn.take() else { return None; }; @@ -947,7 +1011,7 @@ impl Connection { &mut self, state: &mut CopyState, progress: &Arc, - ) -> Option>>>> { + ) -> Option>>> { // It's important that we get the connection before the table since // we remove the table from the state and could drop it otherwise let Some(conn) = self @@ -989,19 +1053,15 @@ impl Connection { /// Wait for all workers to finish. This is called when we a worker has /// failed with an error that forces us to abort copying - async fn cancel_workers( - &mut self, - progress: Arc, - mut workers: Vec>>>>, - ) { + async fn cancel_workers(&mut self, progress: Arc, mut workers: Workers) { progress.cancel(); error!( self.logger, "copying encountered an error; waiting for all workers to finish" ); - while !workers.is_empty() { - let (result, _, remaining) = select_all(workers).await; - workers = remaining; + while workers.has_work() { + use WorkerResult::*; + let result = workers.select().await; match result { Ok(worker) => { self.conn = Some(worker.conn); @@ -1010,6 +1070,7 @@ impl Connection { /* Ignore; we had an error previously */ error!(self.logger, "copy worker panicked: {}", e); } + Wake => { /* Ignore; this is just a waker */ } } } } @@ -1031,14 +1092,14 @@ impl Connection { // // The loop has to be very careful about terminating early so that // we do not ever leave the loop with `self.conn == None` - let mut workers = Vec::new(); - while !state.unfinished.is_empty() || !workers.is_empty() { + let mut workers = Workers::new(); + while !state.unfinished.is_empty() || workers.has_work() { // We usually add at least one job here, except if we are out of // tables to copy. In that case, we go through the `while` loop // every time one of the tables we are currently copying // finishes if let Some(worker) = self.default_worker(&mut state, &progress) { - workers.push(worker); + workers.add(worker); } loop { if workers.len() >= self.workers { @@ -1047,24 +1108,24 @@ impl Connection { let Some(worker) = self.extra_worker(&mut state, &progress) else { break; }; - workers.push(worker); + workers.add(worker); } self.assert_progress(workers.len(), &state)?; - let (result, _idx, remaining) = select_all(workers).await; - workers = remaining; + let result = workers.select().await; // Analyze `result` and take another trip through the loop if // everything is ok; wait for pending workers and return if // there was an error or if copying was cancelled. + use WorkerResult as W; match result { - Err(e) => { + W::Err(e) => { // This is a panic in the background task. We need to // cancel all other tasks and return the error self.cancel_workers(progress, workers).await; return Err(e); } - Ok(worker) => { + W::Ok(worker) => { // Put the connection back into self.conn so that we can use it // in the next iteration. self.conn = Some(worker.conn); @@ -1090,6 +1151,10 @@ impl Connection { } } } + W::Wake => { + // nothing to do, just try to create more workers by + // going through the loop again + } }; } debug_assert!(self.conn.is_some()); From 16521ee8e69ab78999edecd108f2d2c06e327b9c Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 9 Apr 2025 17:45:15 -0700 Subject: [PATCH 080/160] store: Do not use a fdw connection to check active_copies Copying checks the active_copies table through the primary_public.active_copies foreign table to determine whether the copy has been cancelled and it should stop. With a large number of copies running, that causes a large number of postgres_fdw connections into the primary, which can overwhelm the primary. Instead, we now pass the connection pool for the primary into the copy code so that it can do this check without involving postgres_fdw. --- store/postgres/src/copy.rs | 73 ++++++++------------------ store/postgres/src/deployment_store.rs | 15 ++++-- store/postgres/src/primary.rs | 51 +++++++++++++++++- store/postgres/src/subgraph_store.rs | 9 +++- 4 files changed, 90 insertions(+), 58 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index effe2950ee2..a20c9b2a29d 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -37,7 +37,7 @@ use graph::{ info, lazy_static, o, warn, BlockNumber, BlockPtr, CheapClone, Logger, StoreError, ENV_VARS, }, schema::EntityType, - slog::{debug, error}, + slog::error, tokio, }; use itertools::Itertools; @@ -45,7 +45,7 @@ use itertools::Itertools; use crate::{ advisory_lock, catalog, deployment, dynds::DataSourcesTable, - primary::{DeploymentId, Site}, + primary::{DeploymentId, Primary, Site}, relational::index::IndexList, vid_batcher::{VidBatcher, VidRange}, }; @@ -104,46 +104,6 @@ table! { } } -// This is the same as primary::active_copies, but mapped into each shard -table! { - primary_public.active_copies(dst) { - src -> Integer, - dst -> Integer, - cancelled_at -> Nullable, - } -} - -/// Return `true` if the site is the source of a copy operation. The copy -/// operation might be just queued or in progress already. This method will -/// block until a fdw connection becomes available. -pub fn is_source(logger: &Logger, pool: &ConnectionPool, site: &Site) -> Result { - use active_copies as ac; - - // We use a fdw connection to check if the site is being copied. If we - // used an ordinary connection and there are many calls to this method, - // postgres_fdw might open an unmanageable number of connections into - // the primary, which makes the primary run out of connections - let mut last_log = Instant::now(); - let mut conn = pool.get_fdw(&logger, || { - if last_log.elapsed() > LOG_INTERVAL { - last_log = Instant::now(); - debug!( - logger, - "Waiting for fdw connection to check if site {} is being copied", site.namespace - ); - } - false - })?; - - select(diesel::dsl::exists( - ac::table - .filter(ac::src.eq(site.id)) - .filter(ac::cancelled_at.is_null()), - )) - .get_result::(&mut conn) - .map_err(StoreError::from) -} - #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum Status { Finished, @@ -161,6 +121,7 @@ struct CopyState { impl CopyState { fn new( conn: &mut PgConnection, + primary: Primary, src: Arc, dst: Arc, target_block: BlockPtr, @@ -199,9 +160,9 @@ impl CopyState { src.site.id )); } - Self::load(conn, src, dst, target_block) + Self::load(conn, primary, src, dst, target_block) } - None => Self::create(conn, src, dst, target_block), + None => Self::create(conn, primary.cheap_clone(), src, dst, target_block), }?; Ok(state) @@ -209,11 +170,12 @@ impl CopyState { fn load( conn: &mut PgConnection, + primary: Primary, src: Arc, dst: Arc, target_block: BlockPtr, ) -> Result { - let tables = TableState::load(conn, src.as_ref(), dst.as_ref())?; + let tables = TableState::load(conn, primary, src.as_ref(), dst.as_ref())?; let (finished, mut unfinished): (Vec<_>, Vec<_>) = tables.into_iter().partition(|table| table.finished()); unfinished.sort_by_key(|table| table.dst.object.to_string()); @@ -228,6 +190,7 @@ impl CopyState { fn create( conn: &mut PgConnection, + primary: Primary, src: Arc, dst: Arc, target_block: BlockPtr, @@ -253,6 +216,7 @@ impl CopyState { .map(|src_table| { TableState::init( conn, + primary.cheap_clone(), dst.site.clone(), &src, src_table.clone(), @@ -354,6 +318,7 @@ pub(crate) fn source( /// transformation. See `CopyEntityBatchQuery` for the details of what /// exactly that means struct TableState { + primary: Primary, src: Arc, dst: Arc
, dst_site: Arc, @@ -364,6 +329,7 @@ struct TableState { impl TableState { fn init( conn: &mut PgConnection, + primary: Primary, dst_site: Arc, src_layout: &Layout, src: Arc
, @@ -373,6 +339,7 @@ impl TableState { let vid_range = VidRange::for_copy(conn, &src, target_block)?; let batcher = VidBatcher::load(conn, &src_layout.site.namespace, src.as_ref(), vid_range)?; Ok(Self { + primary, src, dst, dst_site, @@ -387,6 +354,7 @@ impl TableState { fn load( conn: &mut PgConnection, + primary: Primary, src_layout: &Layout, dst_layout: &Layout, ) -> Result, StoreError> { @@ -450,6 +418,7 @@ impl TableState { .with_batch_size(size as usize); Ok(TableState { + primary: primary.cheap_clone(), src, dst, dst_site: dst_layout.site.clone(), @@ -516,13 +485,8 @@ impl TableState { } fn is_cancelled(&self, conn: &mut PgConnection) -> Result { - use active_copies as ac; - let dst = self.dst_site.as_ref(); - let canceled = ac::table - .filter(ac::dst.eq(dst.id)) - .select(ac::cancelled_at.is_not_null()) - .get_result::(conn)?; + let canceled = self.primary.is_copy_cancelled(dst)?; if canceled { use copy_state as cs; @@ -893,6 +857,7 @@ pub struct Connection { /// `self.transaction` conn: Option, pool: ConnectionPool, + primary: Primary, workers: usize, src: Arc, dst: Arc, @@ -910,6 +875,7 @@ impl Connection { /// is available. pub fn new( logger: &Logger, + primary: Primary, pool: ConnectionPool, src: Arc, dst: Arc, @@ -942,6 +908,7 @@ impl Connection { logger, conn, pool, + primary, workers: ENV_VARS.store.batch_workers, src, dst, @@ -1079,7 +1046,9 @@ impl Connection { let src = self.src.clone(); let dst = self.dst.clone(); let target_block = self.target_block.clone(); - let mut state = self.transaction(|conn| CopyState::new(conn, src, dst, target_block))?; + let primary = self.primary.cheap_clone(); + let mut state = + self.transaction(|conn| CopyState::new(conn, primary, src, dst, target_block))?; let progress = Arc::new(CopyProgress::new(self.logger.cheap_clone(), &state)); progress.start(); diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index c78b06be46d..e497430c2bf 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -51,11 +51,11 @@ use crate::block_range::{BLOCK_COLUMN, BLOCK_RANGE_COLUMN}; use crate::deployment::{self, OnSync}; use crate::detail::ErrorDetail; use crate::dynds::DataSourcesTable; -use crate::primary::DeploymentId; +use crate::primary::{DeploymentId, Primary}; use crate::relational::index::{CreateIndex, IndexList, Method}; use crate::relational::{Layout, LayoutCache, SqlName, Table}; use crate::relational_queries::FromEntityData; -use crate::{advisory_lock, catalog, copy, retry}; +use crate::{advisory_lock, catalog, retry}; use crate::{connection_pool::ConnectionPool, detail}; use crate::{dynds, primary::Site}; @@ -93,6 +93,8 @@ type PruneHandle = JoinHandle>; pub struct StoreInner { logger: Logger, + primary: Primary, + pool: ConnectionPool, read_only_pools: Vec, @@ -130,6 +132,7 @@ impl Deref for DeploymentStore { impl DeploymentStore { pub fn new( logger: &Logger, + primary: Primary, pool: ConnectionPool, read_only_pools: Vec, mut pool_weights: Vec, @@ -160,6 +163,7 @@ impl DeploymentStore { // Create the store let store = StoreInner { logger: logger.clone(), + primary, pool, read_only_pools, replica_order, @@ -1235,7 +1239,7 @@ impl DeploymentStore { req: PruneRequest, ) -> Result<(), StoreError> { { - if copy::is_source(&logger, &store.pool, &site)? { + if store.is_source(&site)? { debug!( logger, "Skipping pruning since this deployment is being copied" @@ -1520,6 +1524,7 @@ impl DeploymentStore { // with the corresponding tables in `self` let copy_conn = crate::copy::Connection::new( logger, + self.primary.cheap_clone(), self.pool.clone(), src.clone(), dst.clone(), @@ -1848,6 +1853,10 @@ impl DeploymentStore { }) .await } + + fn is_source(&self, site: &Site) -> Result { + self.primary.is_source(site) + } } /// Tries to fetch a [`Table`] either by its Entity name or its SQL name. diff --git a/store/postgres/src/primary.rs b/store/postgres/src/primary.rs index f329ae4bba2..39df898ba32 100644 --- a/store/postgres/src/primary.rs +++ b/store/postgres/src/primary.rs @@ -36,6 +36,7 @@ use graph::{ store::scalar::ToPrimitive, subgraph::{status, DeploymentFeatures}, }, + derive::CheapClone, prelude::{ anyhow, chrono::{DateTime, Utc}, @@ -53,9 +54,9 @@ use maybe_owned::MaybeOwnedMut; use std::{ borrow::Borrow, collections::HashMap, - convert::TryFrom, - convert::TryInto, + convert::{TryFrom, TryInto}, fmt, + sync::Arc, time::{SystemTime, UNIX_EPOCH}, }; @@ -1826,6 +1827,52 @@ impl<'a> Connection<'a> { } } +/// A limited interface to query the primary database. +#[derive(Clone, CheapClone)] +pub struct Primary { + pool: Arc, +} + +impl Primary { + pub fn new(pool: Arc) -> Self { + // This really indicates a programming error + if pool.shard != *PRIMARY_SHARD { + panic!("Primary pool must be the primary shard"); + } + + Primary { pool } + } + + /// Return `true` if the site is the source of a copy operation. The copy + /// operation might be just queued or in progress already. This method will + /// block until a fdw connection becomes available. + pub fn is_source(&self, site: &Site) -> Result { + use active_copies as ac; + + let mut conn = self.pool.get()?; + + select(diesel::dsl::exists( + ac::table + .filter(ac::src.eq(site.id)) + .filter(ac::cancelled_at.is_null()), + )) + .get_result::(&mut conn) + .map_err(StoreError::from) + } + + pub fn is_copy_cancelled(&self, dst: &Site) -> Result { + use active_copies as ac; + + let mut conn = self.pool.get()?; + + ac::table + .filter(ac::dst.eq(dst.id)) + .select(ac::cancelled_at.is_not_null()) + .get_result::(&mut conn) + .map_err(StoreError::from) + } +} + /// Return `true` if we deem this installation to be empty, defined as /// having no deployments and no subgraph names in the database pub fn is_empty(conn: &mut PgConnection) -> Result { diff --git a/store/postgres/src/subgraph_store.rs b/store/postgres/src/subgraph_store.rs index 0beeadf345d..339c66cee3f 100644 --- a/store/postgres/src/subgraph_store.rs +++ b/store/postgres/src/subgraph_store.rs @@ -39,7 +39,7 @@ use graph::{ use crate::{ connection_pool::ConnectionPool, deployment::{OnSync, SubgraphHealth}, - primary::{self, DeploymentId, Mirror as PrimaryMirror, Site}, + primary::{self, DeploymentId, Mirror as PrimaryMirror, Primary, Site}, relational::{ index::{IndexList, Method}, Layout, @@ -360,6 +360,12 @@ impl SubgraphStoreInner { sender: Arc, registry: Arc, ) -> Self { + let primary = stores + .iter() + .find(|(name, _, _, _)| name == &*PRIMARY_SHARD) + .map(|(_, pool, _, _)| Primary::new(Arc::new(pool.clone()))) + .expect("primary shard must be present"); + let mirror = { let pools = HashMap::from_iter( stores @@ -376,6 +382,7 @@ impl SubgraphStoreInner { name, Arc::new(DeploymentStore::new( &logger, + primary.cheap_clone(), main_pool, read_only_pools, weights, From ee88833b53f548200eda2e16840511e7cbb05c29 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 9 Apr 2025 18:33:27 -0700 Subject: [PATCH 081/160] store: Log more when copy workers have an error --- store/postgres/src/copy.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index a20c9b2a29d..fd736276cbd 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -1091,6 +1091,7 @@ impl Connection { W::Err(e) => { // This is a panic in the background task. We need to // cancel all other tasks and return the error + error!(self.logger, "copy worker panicked: {}", e); self.cancel_workers(progress, workers).await; return Err(e); } @@ -1115,6 +1116,7 @@ impl Connection { return Ok(Status::Cancelled); } (Err(e), _) => { + error!(self.logger, "copy worker had an error: {}", e); self.cancel_workers(progress, workers).await; return Err(e); } From 63b2a5c2d3a67e21ffbaac2d74a843cd049d24aa Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 9 Apr 2025 23:06:46 -0700 Subject: [PATCH 082/160] store: Make sure we use the right connection to unlock the copy lock Otherwise, the lock will linger and can block an attempt to restart a copy that failed for transient reasons --- store/postgres/src/copy.rs | 107 ++++++++++++++++++++++++++++++------- 1 file changed, 88 insertions(+), 19 deletions(-) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index fd736276cbd..22ddee394f6 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -64,8 +64,6 @@ const ACCEPTABLE_REPLICATION_LAG: Duration = Duration::from_secs(30); /// the lag again const REPLICATION_SLEEP: Duration = Duration::from_secs(10); -type PooledPgConnection = PooledConnection>; - lazy_static! { static ref STATEMENT_TIMEOUT: Option = ENV_VARS .store @@ -667,17 +665,77 @@ impl From> for WorkerResult { } } +/// We pass connections back and forth between the control loop and various +/// workers. We need to make sure that we end up with the connection that +/// was used to acquire the copy lock in the right place so we can release +/// the copy lock which is only possible with the connection that acquired +/// it. +/// +/// This struct helps us with that. It wraps a connection and tracks whether +/// the connection was used to acquire the copy lock +struct LockTrackingConnection { + inner: PooledConnection>, + has_lock: bool, +} + +impl LockTrackingConnection { + fn new(inner: PooledConnection>) -> Self { + Self { + inner, + has_lock: false, + } + } + + fn transaction(&mut self, f: F) -> Result + where + F: FnOnce(&mut PgConnection) -> Result, + { + let conn = &mut self.inner; + conn.transaction(|conn| f(conn)) + } + + /// Put `self` into `other` if `self` has the lock. + fn extract(self, other: &mut Option) { + if self.has_lock { + *other = Some(self); + } + } + + fn lock(&mut self, logger: &Logger, dst: &Site) -> Result<(), StoreError> { + if self.has_lock { + warn!(logger, "already acquired copy lock for {}", dst); + return Ok(()); + } + advisory_lock::lock_copying(&mut self.inner, dst)?; + self.has_lock = true; + Ok(()) + } + + fn unlock(&mut self, logger: &Logger, dst: &Site) -> Result<(), StoreError> { + if !self.has_lock { + error!( + logger, + "tried to release copy lock for {} even though we are not the owner", dst + ); + return Ok(()); + } + advisory_lock::unlock_copying(&mut self.inner, dst)?; + self.has_lock = false; + Ok(()) + } +} + /// A helper to run copying of one table. We need to thread `conn` and /// `table` from the control loop to the background worker and back again to /// the control loop. This worker facilitates that struct CopyTableWorker { - conn: PooledPgConnection, + conn: LockTrackingConnection, table: TableState, result: Result, } impl CopyTableWorker { - fn new(conn: PooledPgConnection, table: TableState) -> Self { + fn new(conn: LockTrackingConnection, table: TableState) -> Self { Self { conn, table, @@ -699,7 +757,7 @@ impl CopyTableWorker { fn run_inner(&mut self, logger: Logger, progress: &CopyProgress) -> Result { use Status::*; - let conn = &mut self.conn; + let conn = &mut self.conn.inner; progress.start_table(&self.table); while !self.table.finished() { // It is important that this check happens outside the write @@ -855,7 +913,7 @@ pub struct Connection { /// individual table. Except for that case, this will always be /// `Some(..)`. Most code shouldn't access `self.conn` directly, but use /// `self.transaction` - conn: Option, + conn: Option, pool: ConnectionPool, primary: Primary, workers: usize, @@ -901,9 +959,9 @@ impl Connection { } false })?; - let conn = Some(conn); let src_manifest_idx_and_name = Arc::new(src_manifest_idx_and_name); let dst_manifest_idx_and_name = Arc::new(dst_manifest_idx_and_name); + let conn = Some(LockTrackingConnection::new(conn)); Ok(Self { logger, conn, @@ -990,6 +1048,7 @@ impl Connection { let Some(table) = state.unfinished.pop() else { return None; }; + let conn = LockTrackingConnection::new(conn); let worker = CopyTableWorker::new(conn, table); Some(Box::pin( @@ -1031,7 +1090,7 @@ impl Connection { let result = workers.select().await; match result { Ok(worker) => { - self.conn = Some(worker.conn); + worker.conn.extract(&mut self.conn); } Err(e) => { /* Ignore; we had an error previously */ @@ -1098,7 +1157,7 @@ impl Connection { W::Ok(worker) => { // Put the connection back into self.conn so that we can use it // in the next iteration. - self.conn = Some(worker.conn); + worker.conn.extract(&mut self.conn); match (worker.result, progress.is_cancelled()) { (Ok(Status::Finished), false) => { @@ -1207,20 +1266,30 @@ impl Connection { ); let dst_site = self.dst.site.cheap_clone(); - self.transaction(|conn| advisory_lock::lock_copying(conn, &dst_site))?; + let Some(conn) = self.conn.as_mut() else { + return Err(constraint_violation!( + "copy connection went missing (copy_data)" + )); + }; + conn.lock(&self.logger, &dst_site)?; let res = self.copy_data_internal(index_list).await; - if self.conn.is_none() { - // A background worker panicked and left us without our - // dedicated connection, but we still need to release the copy - // lock; get a normal connection, not from the fdw pool for that - // as that will be much less contended. We won't be holding on - // to the connection for long as `res` will be an error and we - // will abort starting this subgraph - self.conn = Some(self.pool.get()?); + match self.conn.as_mut() { + None => { + // A background worker panicked and left us without our + // dedicated connection; we would need to get that + // connection to unlock the advisory lock. We can't do that, + // so we just log an error + warn!( + self.logger, + "can't unlock copy lock since the default worker panicked; lock will linger until session ends" + ); + } + Some(conn) => { + conn.unlock(&self.logger, &dst_site)?; + } } - self.transaction(|conn| advisory_lock::unlock_copying(conn, &dst_site))?; if matches!(res, Ok(Status::Cancelled)) { warn!(&self.logger, "Copying was cancelled and is incomplete"); From 17360f56c7657e49d2b5da2fafa5d8d633d556f7 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 10 Apr 2025 09:42:21 -0700 Subject: [PATCH 083/160] graph: Allow remove followed by remove in write batches If subgraphs delete entities without checking if they exist, we get two removes in a row. In the database, the second remove would just lead to a query that changes nothing. We'll do the same when putting a write batch together. Fixes https://github.com/graphprotocol/graph-node/issues/5449 --- graph/src/components/store/write.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/graph/src/components/store/write.rs b/graph/src/components/store/write.rs index aa56fdcc910..6f899633bd8 100644 --- a/graph/src/components/store/write.rs +++ b/graph/src/components/store/write.rs @@ -439,7 +439,7 @@ impl RowGroup { // clamping an old version match (&*prev_row, &row) { (Insert { end: None, .. } | Overwrite { end: None, .. }, Insert { .. }) - | (Remove { .. }, Overwrite { .. } | Remove { .. }) + | (Remove { .. }, Overwrite { .. }) | ( Insert { end: Some(_), .. } | Overwrite { end: Some(_), .. }, Overwrite { .. } | Remove { .. }, @@ -450,6 +450,11 @@ impl RowGroup { row )) } + (Remove { .. }, Remove { .. }) => { + // Ignore the new row, since prev_row is already a + // delete. This can happen when subgraphs delete + // entities without checking if they even exist + } ( Insert { end: Some(_), .. } | Overwrite { end: Some(_), .. } | Remove { .. }, Insert { .. }, From c0516729d432b7c8aa39ec88cfb0804ca6a1a0f5 Mon Sep 17 00:00:00 2001 From: Zoran Cvetkov <36600146+zorancv@users.noreply.github.com> Date: Mon, 14 Apr 2025 12:16:53 +0300 Subject: [PATCH 084/160] test: fix reorg threshold --- chain/ethereum/src/chain.rs | 2 +- graph/src/data/subgraph/mod.rs | 4 +-- graph/src/env/mod.rs | 46 +++++++++++++++++--------- node/src/chain.rs | 2 +- node/src/manager/commands/prune.rs | 6 ++-- node/src/manager/commands/rewind.rs | 4 +-- store/postgres/src/block_store.rs | 2 +- store/postgres/src/deployment.rs | 6 +++- store/postgres/src/deployment_store.rs | 2 +- tests/src/config.rs | 3 +- tests/src/fixture/ethereum.rs | 2 +- tests/tests/runner_tests.rs | 6 +++- 12 files changed, 55 insertions(+), 30 deletions(-) diff --git a/chain/ethereum/src/chain.rs b/chain/ethereum/src/chain.rs index 117e3033b18..911d4d3ebfe 100644 --- a/chain/ethereum/src/chain.rs +++ b/chain/ethereum/src/chain.rs @@ -614,7 +614,7 @@ impl Blockchain for Chain { // present in the DB. Box::new(PollingBlockIngestor::new( logger, - graph::env::ENV_VARS.reorg_threshold, + graph::env::ENV_VARS.reorg_threshold(), self.chain_client(), self.chain_store().cheap_clone(), self.polling_ingestor_interval, diff --git a/graph/src/data/subgraph/mod.rs b/graph/src/data/subgraph/mod.rs index 3e7bc7061ab..77c8ba67d36 100644 --- a/graph/src/data/subgraph/mod.rs +++ b/graph/src/data/subgraph/mod.rs @@ -504,9 +504,9 @@ impl Graft { // The graft point must be at least `reorg_threshold` blocks // behind the subgraph head so that a reorg can not affect the // data that we copy for grafting - (Some(ptr), true) if self.block + ENV_VARS.reorg_threshold > ptr.number => Err(GraftBaseInvalid(format!( + (Some(ptr), true) if self.block + ENV_VARS.reorg_threshold() > ptr.number => Err(GraftBaseInvalid(format!( "failed to graft onto `{}` at block {} since it's only at block {} which is within the reorg threshold of {} blocks", - self.base, self.block, ptr.number, ENV_VARS.reorg_threshold + self.base, self.block, ptr.number, ENV_VARS.reorg_threshold() ))), // If the base deployment is failed *and* the `graft.block` is not // less than the `base.block`, the graft shouldn't be permitted. diff --git a/graph/src/env/mod.rs b/graph/src/env/mod.rs index 48fa0ba4688..eff0ebea16e 100644 --- a/graph/src/env/mod.rs +++ b/graph/src/env/mod.rs @@ -15,9 +15,16 @@ use crate::{ runtime::gas::CONST_MAX_GAS_PER_HANDLER, }; +#[cfg(debug_assertions)] +use std::sync::Mutex; + lazy_static! { pub static ref ENV_VARS: EnvVars = EnvVars::from_env().unwrap(); } +#[cfg(debug_assertions)] +lazy_static! { + pub static ref TEST_WITH_NO_REORG: Mutex = Mutex::new(false); +} /// Panics if: /// - The value is not UTF8. @@ -181,7 +188,7 @@ pub struct EnvVars { pub static_filters_threshold: usize, /// Set by the environment variable `ETHEREUM_REORG_THRESHOLD`. The default /// value is 250 blocks. - pub reorg_threshold: BlockNumber, + reorg_threshold: BlockNumber, /// The time to wait between polls when using polling block ingestor. /// The value is set by `ETHERUM_POLLING_INTERVAL` in millis and the /// default is 1000. @@ -259,16 +266,6 @@ impl EnvVars { let mapping_handlers = InnerMappingHandlers::init_from_env()?.into(); let store = InnerStore::init_from_env()?.try_into()?; - // The default reorganization (reorg) threshold is set to 250. - // For testing purposes, we need to set this threshold to 0 because: - // 1. Many tests involve reverting blocks. - // 2. Blocks cannot be reverted below the reorg threshold. - // Therefore, during tests, we want to set the reorg threshold to 0. - let reorg_threshold = - inner - .reorg_threshold - .unwrap_or_else(|| if cfg!(debug_assertions) { 0 } else { 250 }); - Ok(Self { graphql, mappings: mapping_handlers, @@ -322,13 +319,15 @@ impl EnvVars { external_http_base_url: inner.external_http_base_url, external_ws_base_url: inner.external_ws_base_url, static_filters_threshold: inner.static_filters_threshold, - reorg_threshold, + reorg_threshold: inner.reorg_threshold, ingestor_polling_interval: Duration::from_millis(inner.ingestor_polling_interval), subgraph_settings: inner.subgraph_settings, prefer_substreams_block_streams: inner.prefer_substreams_block_streams, enable_dips_metrics: inner.enable_dips_metrics.0, history_blocks_override: inner.history_blocks_override, - min_history_blocks: inner.min_history_blocks.unwrap_or(2 * reorg_threshold), + min_history_blocks: inner + .min_history_blocks + .unwrap_or(2 * inner.reorg_threshold), dips_metrics_object_store_url: inner.dips_metrics_object_store_url, section_map: inner.section_map, firehose_grpc_max_decode_size_mb: inner.firehose_grpc_max_decode_size_mb, @@ -375,6 +374,23 @@ impl EnvVars { .filter(|x| !x.is_empty()) .collect() } + #[cfg(debug_assertions)] + pub fn reorg_threshold(&self) -> i32 { + // The default reorganization (reorg) threshold is set to 250. + // For testing purposes, we need to set this threshold to 0 because: + // 1. Many tests involve reverting blocks. + // 2. Blocks cannot be reverted below the reorg threshold. + // Therefore, during tests, we want to set the reorg threshold to 0. + if *TEST_WITH_NO_REORG.lock().unwrap() { + 0 + } else { + self.reorg_threshold + } + } + #[cfg(not(debug_assertions))] + pub fn reorg_threshold(&self) -> i32 { + self.reorg_threshold + } } impl Default for EnvVars { @@ -473,8 +489,8 @@ struct Inner { #[envconfig(from = "GRAPH_STATIC_FILTERS_THRESHOLD", default = "10000")] static_filters_threshold: usize, // JSON-RPC specific. - #[envconfig(from = "ETHEREUM_REORG_THRESHOLD")] - reorg_threshold: Option, + #[envconfig(from = "ETHEREUM_REORG_THRESHOLD", default = "250")] + reorg_threshold: BlockNumber, #[envconfig(from = "ETHEREUM_POLLING_INTERVAL", default = "1000")] ingestor_polling_interval: u64, #[envconfig(from = "GRAPH_EXPERIMENTAL_SUBGRAPH_SETTINGS")] diff --git a/node/src/chain.rs b/node/src/chain.rs index 239db116e55..4ff45b8211a 100644 --- a/node/src/chain.rs +++ b/node/src/chain.rs @@ -460,7 +460,7 @@ pub async fn networks_as_chains( Arc::new(adapter_selector), Arc::new(EthereumRuntimeAdapterBuilder {}), eth_adapters, - ENV_VARS.reorg_threshold, + ENV_VARS.reorg_threshold(), polling_interval, true, ); diff --git a/node/src/manager/commands/prune.rs b/node/src/manager/commands/prune.rs index c169577ee65..2c3c2ae2386 100644 --- a/node/src/manager/commands/prune.rs +++ b/node/src/manager/commands/prune.rs @@ -188,13 +188,13 @@ pub async fn run( println!("prune {deployment}"); println!(" latest: {latest}"); - println!(" final: {}", latest - ENV_VARS.reorg_threshold); + println!(" final: {}", latest - ENV_VARS.reorg_threshold()); println!(" earliest: {}\n", latest - history); let mut req = PruneRequest::new( &deployment, history, - ENV_VARS.reorg_threshold, + ENV_VARS.reorg_threshold(), status.earliest_block_number, latest, )?; @@ -217,7 +217,7 @@ pub async fn run( store.subgraph_store().set_history_blocks( &deployment, history, - ENV_VARS.reorg_threshold, + ENV_VARS.reorg_threshold(), )?; } diff --git a/node/src/manager/commands/rewind.rs b/node/src/manager/commands/rewind.rs index 339f2ec979a..629c4b6e70f 100644 --- a/node/src/manager/commands/rewind.rs +++ b/node/src/manager/commands/rewind.rs @@ -133,13 +133,13 @@ pub async fn run( let deployment_details = deployment_store.deployment_details_for_id(locator)?; let block_number_to = block_ptr_to.as_ref().map(|b| b.number).unwrap_or(0); - if block_number_to < deployment_details.earliest_block_number + ENV_VARS.reorg_threshold { + if block_number_to < deployment_details.earliest_block_number + ENV_VARS.reorg_threshold() { bail!( "The block number {} is not safe to rewind to for deployment {}. The earliest block number of this deployment is {}. You can only safely rewind to block number {}", block_ptr_to.as_ref().map(|b| b.number).unwrap_or(0), locator, deployment_details.earliest_block_number, - deployment_details.earliest_block_number + ENV_VARS.reorg_threshold + deployment_details.earliest_block_number + ENV_VARS.reorg_threshold() ); } } diff --git a/store/postgres/src/block_store.rs b/store/postgres/src/block_store.rs index f69267fff17..762a2642524 100644 --- a/store/postgres/src/block_store.rs +++ b/store/postgres/src/block_store.rs @@ -503,7 +503,7 @@ impl BlockStore { }; if let Some(head_block) = store.remove_cursor(&&store.chain)? { - let lower_bound = head_block.saturating_sub(ENV_VARS.reorg_threshold * 2); + let lower_bound = head_block.saturating_sub(ENV_VARS.reorg_threshold() * 2); info!(&self.logger, "Removed cursor for non-firehose chain, now cleaning shallow blocks"; "network" => &store.chain, "lower_bound" => lower_bound); store.cleanup_shallow_blocks(lower_bound)?; } diff --git a/store/postgres/src/deployment.rs b/store/postgres/src/deployment.rs index 92181ac5a6c..5d83a563181 100644 --- a/store/postgres/src/deployment.rs +++ b/store/postgres/src/deployment.rs @@ -546,10 +546,14 @@ pub fn revert_block_ptr( // Work around a Diesel issue with serializing BigDecimals to numeric let number = format!("{}::numeric", ptr.number); + // Intention is to revert to a block lower than the reorg threshold, on the other + // hand the earliest we can possibly go is genesys block, so go to genesys even + // if it's within the reorg threshold. + let earliest_block = i32::max(ptr.number - ENV_VARS.reorg_threshold(), 0); let affected_rows = update( d::table .filter(d::deployment.eq(id.as_str())) - .filter(d::earliest_block_number.le(ptr.number - ENV_VARS.reorg_threshold)), + .filter(d::earliest_block_number.le(earliest_block)), ) .set(( d::latest_ethereum_block_number.eq(sql(&number)), diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index e497430c2bf..e07b4659436 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -1261,7 +1261,7 @@ impl DeploymentStore { let req = PruneRequest::new( &site.as_ref().into(), history_blocks, - ENV_VARS.reorg_threshold, + ENV_VARS.reorg_threshold(), earliest_block, latest_block, )?; diff --git a/tests/src/config.rs b/tests/src/config.rs index 54b07b0a5a8..6cdd97a216f 100644 --- a/tests/src/config.rs +++ b/tests/src/config.rs @@ -175,7 +175,8 @@ impl Config { .stdout(stdout) .stderr(stderr) .args(args) - .env("GRAPH_STORE_WRITE_BATCH_DURATION", "5"); + .env("GRAPH_STORE_WRITE_BATCH_DURATION", "5") + .env("ETHEREUM_REORG_THRESHOLD", "0"); status!( "graph-node", diff --git a/tests/src/fixture/ethereum.rs b/tests/src/fixture/ethereum.rs index fc651a512db..d93ac25c235 100644 --- a/tests/src/fixture/ethereum.rs +++ b/tests/src/fixture/ethereum.rs @@ -64,7 +64,7 @@ pub async fn chain( triggers_adapter, Arc::new(NoopRuntimeAdapterBuilder {}), eth_adapters, - ENV_VARS.reorg_threshold, + ENV_VARS.reorg_threshold(), ENV_VARS.ingestor_polling_interval, // We assume the tested chain is always ingestible for now true, diff --git a/tests/tests/runner_tests.rs b/tests/tests/runner_tests.rs index ac645884b5d..261c886dfea 100644 --- a/tests/tests/runner_tests.rs +++ b/tests/tests/runner_tests.rs @@ -12,7 +12,7 @@ use graph::data::store::scalar::Bytes; use graph::data::subgraph::schema::{SubgraphError, SubgraphHealth}; use graph::data::value::Word; use graph::data_source::CausalityRegion; -use graph::env::EnvVars; +use graph::env::{EnvVars, TEST_WITH_NO_REORG}; use graph::ipfs; use graph::ipfs::test_utils::add_files_to_local_ipfs_node_for_testing; use graph::object; @@ -109,6 +109,8 @@ fn assert_eq_ignore_backtrace(err: &SubgraphError, expected: &SubgraphError) { #[tokio::test] async fn data_source_revert() -> anyhow::Result<()> { + *TEST_WITH_NO_REORG.lock().unwrap() = true; + let RunnerTestRecipe { stores, test_info } = RunnerTestRecipe::new("data_source_revert", "data-source-revert").await; @@ -179,6 +181,8 @@ async fn data_source_revert() -> anyhow::Result<()> { // since it uses the same deployment id. data_source_long_revert().await.unwrap(); + *TEST_WITH_NO_REORG.lock().unwrap() = false; + Ok(()) } From bafc3dd52439b458ef0e0e972558fb29f1fda42f Mon Sep 17 00:00:00 2001 From: Zoran Cvetkov <36600146+zorancv@users.noreply.github.com> Date: Tue, 15 Apr 2025 10:51:16 +0300 Subject: [PATCH 085/160] test: check grafting from pre 0.0.6 to 0.0.6 and later --- .github/workflows/ci.yml | 2 +- tests/docker-compose.yml | 2 +- .../integration-tests/base/abis/Contract.abi | 33 +++++ tests/integration-tests/base/package.json | 25 ++++ tests/integration-tests/base/schema.graphql | 5 + tests/integration-tests/base/src/mapping.ts | 9 ++ tests/integration-tests/base/subgraph.yaml | 25 ++++ .../grafted/abis/Contract.abi | 33 +++++ tests/integration-tests/grafted/package.json | 25 ++++ .../integration-tests/grafted/schema.graphql | 5 + .../integration-tests/grafted/src/mapping.ts | 9 ++ tests/integration-tests/grafted/subgraph.yaml | 30 ++++ tests/src/contract.rs | 11 +- tests/src/subgraph.rs | 2 +- tests/tests/integration_tests.rs | 140 ++++++++++++++++-- 15 files changed, 337 insertions(+), 19 deletions(-) create mode 100644 tests/integration-tests/base/abis/Contract.abi create mode 100644 tests/integration-tests/base/package.json create mode 100644 tests/integration-tests/base/schema.graphql create mode 100644 tests/integration-tests/base/src/mapping.ts create mode 100644 tests/integration-tests/base/subgraph.yaml create mode 100644 tests/integration-tests/grafted/abis/Contract.abi create mode 100644 tests/integration-tests/grafted/package.json create mode 100644 tests/integration-tests/grafted/schema.graphql create mode 100644 tests/integration-tests/grafted/src/mapping.ts create mode 100644 tests/integration-tests/grafted/subgraph.yaml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0fa6d58bbb7..24993639945 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -153,7 +153,7 @@ jobs: - name: Install Foundry uses: foundry-rs/foundry-toolchain@v1 - name: Start anvil - run: anvil --gas-limit 100000000000 --base-fee 1 --block-time 2 --port 3021 & + run: anvil --gas-limit 100000000000 --base-fee 1 --block-time 2 --timestamp 1743944919 --port 3021 & - name: Install graph CLI run: curl -sSL http://cli.thegraph.com/install.sh | sudo bash diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml index f45360fd367..7385b4b08a2 100644 --- a/tests/docker-compose.yml +++ b/tests/docker-compose.yml @@ -23,7 +23,7 @@ services: image: ghcr.io/foundry-rs/foundry:stable ports: - '3021:8545' - command: "'anvil --host 0.0.0.0 --gas-limit 100000000000 --base-fee 1 --block-time 5 --mnemonic \"test test test test test test test test test test test junk\"'" + command: "'anvil --host 0.0.0.0 --gas-limit 100000000000 --base-fee 1 --block-time 2 --timestamp 1743944919 --mnemonic \"test test test test test test test test test test test junk\"'" # graph-node ports: # json-rpc: 8020 diff --git a/tests/integration-tests/base/abis/Contract.abi b/tests/integration-tests/base/abis/Contract.abi new file mode 100644 index 00000000000..02da1a9e7f3 --- /dev/null +++ b/tests/integration-tests/base/abis/Contract.abi @@ -0,0 +1,33 @@ +[ + { + "inputs": [], + "stateMutability": "nonpayable", + "type": "constructor" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": false, + "internalType": "uint16", + "name": "x", + "type": "uint16" + } + ], + "name": "Trigger", + "type": "event" + }, + { + "inputs": [ + { + "internalType": "uint16", + "name": "x", + "type": "uint16" + } + ], + "name": "emitTrigger", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + } +] diff --git a/tests/integration-tests/base/package.json b/tests/integration-tests/base/package.json new file mode 100644 index 00000000000..2cfb6b94def --- /dev/null +++ b/tests/integration-tests/base/package.json @@ -0,0 +1,25 @@ +{ + "name": "base-subgraph", + "version": "0.1.0", + "scripts": { + "build-contracts": "../../common/build-contracts.sh", + "codegen": "graph codegen --skip-migrations", + "test": "yarn build-contracts && truffle test --compile-none --network test", + "create:test": "graph create test/base-subgraph --node $GRAPH_NODE_ADMIN_URI", + "deploy:test": "graph deploy test/base-subgraph --version-label v0.0.1 --ipfs $IPFS_URI --node $GRAPH_NODE_ADMIN_URI" + }, + "devDependencies": { + "@graphprotocol/graph-cli": "0.69.0", + "@graphprotocol/graph-ts": "0.34.0", + "solc": "^0.8.2" + }, + "dependencies": { + "@truffle/contract": "^4.3", + "@truffle/hdwallet-provider": "^1.2", + "apollo-fetch": "^0.7.0", + "babel-polyfill": "^6.26.0", + "babel-register": "^6.26.0", + "gluegun": "^4.6.1", + "truffle": "^5.2" + } +} \ No newline at end of file diff --git a/tests/integration-tests/base/schema.graphql b/tests/integration-tests/base/schema.graphql new file mode 100644 index 00000000000..f7034353d73 --- /dev/null +++ b/tests/integration-tests/base/schema.graphql @@ -0,0 +1,5 @@ +type BaseData @entity(immutable: true) { + id: ID! + data: String! + blockNumber: BigInt! +} \ No newline at end of file diff --git a/tests/integration-tests/base/src/mapping.ts b/tests/integration-tests/base/src/mapping.ts new file mode 100644 index 00000000000..11767070a5b --- /dev/null +++ b/tests/integration-tests/base/src/mapping.ts @@ -0,0 +1,9 @@ +import { ethereum } from '@graphprotocol/graph-ts' +import { BaseData } from '../generated/schema' + +export function handleBlock(block: ethereum.Block): void { + let entity = new BaseData(block.number.toString()) + entity.data = 'from base' + entity.blockNumber = block.number + entity.save() +} \ No newline at end of file diff --git a/tests/integration-tests/base/subgraph.yaml b/tests/integration-tests/base/subgraph.yaml new file mode 100644 index 00000000000..808b446c622 --- /dev/null +++ b/tests/integration-tests/base/subgraph.yaml @@ -0,0 +1,25 @@ +specVersion: 0.0.5 +description: Base Subgraph +repository: https://github.com/graphprotocol/graph-node +schema: + file: ./schema.graphql +dataSources: + - kind: ethereum/contract + name: SimpleContract + network: test + source: + address: "0x5FbDB2315678afecb367f032d93F642f64180aa3" + abi: SimpleContract + startBlock: 0 + mapping: + kind: ethereum/events + apiVersion: 0.0.6 + language: wasm/assemblyscript + entities: + - BaseData + abis: + - name: SimpleContract + file: ./abis/Contract.abi + blockHandlers: + - handler: handleBlock + file: ./src/mapping.ts \ No newline at end of file diff --git a/tests/integration-tests/grafted/abis/Contract.abi b/tests/integration-tests/grafted/abis/Contract.abi new file mode 100644 index 00000000000..02da1a9e7f3 --- /dev/null +++ b/tests/integration-tests/grafted/abis/Contract.abi @@ -0,0 +1,33 @@ +[ + { + "inputs": [], + "stateMutability": "nonpayable", + "type": "constructor" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": false, + "internalType": "uint16", + "name": "x", + "type": "uint16" + } + ], + "name": "Trigger", + "type": "event" + }, + { + "inputs": [ + { + "internalType": "uint16", + "name": "x", + "type": "uint16" + } + ], + "name": "emitTrigger", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + } +] diff --git a/tests/integration-tests/grafted/package.json b/tests/integration-tests/grafted/package.json new file mode 100644 index 00000000000..d45b6fc6727 --- /dev/null +++ b/tests/integration-tests/grafted/package.json @@ -0,0 +1,25 @@ +{ + "name": "grafted-subgraph", + "version": "0.1.0", + "scripts": { + "build-contracts": "../../common/build-contracts.sh", + "codegen": "graph codegen --skip-migrations", + "test": "yarn build-contracts && truffle test --compile-none --network test", + "create:test": "graph create test/grafted-subgraph --node $GRAPH_NODE_ADMIN_URI", + "deploy:test": "graph deploy test/grafted-subgraph --version-label v0.0.1 --ipfs $IPFS_URI --node $GRAPH_NODE_ADMIN_URI" + }, + "devDependencies": { + "@graphprotocol/graph-cli": "0.69.0", + "@graphprotocol/graph-ts": "0.34.0", + "solc": "^0.8.2" + }, + "dependencies": { + "@truffle/contract": "^4.3", + "@truffle/hdwallet-provider": "^1.2", + "apollo-fetch": "^0.7.0", + "babel-polyfill": "^6.26.0", + "babel-register": "^6.26.0", + "gluegun": "^4.6.1", + "truffle": "^5.2" + } +} \ No newline at end of file diff --git a/tests/integration-tests/grafted/schema.graphql b/tests/integration-tests/grafted/schema.graphql new file mode 100644 index 00000000000..b83083fd466 --- /dev/null +++ b/tests/integration-tests/grafted/schema.graphql @@ -0,0 +1,5 @@ +type GraftedData @entity(immutable: true) { + id: ID! + data: String! + blockNumber: BigInt! +} \ No newline at end of file diff --git a/tests/integration-tests/grafted/src/mapping.ts b/tests/integration-tests/grafted/src/mapping.ts new file mode 100644 index 00000000000..742d5d67c54 --- /dev/null +++ b/tests/integration-tests/grafted/src/mapping.ts @@ -0,0 +1,9 @@ +import { ethereum } from '@graphprotocol/graph-ts' +import { GraftedData } from '../generated/schema' + +export function handleBlock(block: ethereum.Block): void { + let entity = new GraftedData(block.number.toString()) + entity.data = 'to grafted' + entity.blockNumber = block.number + entity.save() +} \ No newline at end of file diff --git a/tests/integration-tests/grafted/subgraph.yaml b/tests/integration-tests/grafted/subgraph.yaml new file mode 100644 index 00000000000..f946f201941 --- /dev/null +++ b/tests/integration-tests/grafted/subgraph.yaml @@ -0,0 +1,30 @@ +specVersion: 0.0.6 +description: Grafted Subgraph +repository: https://github.com/graphprotocol/graph-node +schema: + file: ./schema.graphql +dataSources: + - kind: ethereum/contract + name: SimpleContract + network: test + source: + address: "0x5FbDB2315678afecb367f032d93F642f64180aa3" + abi: SimpleContract + startBlock: 0 + mapping: + kind: ethereum/events + apiVersion: 0.0.6 + language: wasm/assemblyscript + entities: + - GraftedData + abis: + - name: SimpleContract + file: ./abis/Contract.abi + blockHandlers: + - handler: handleBlock + file: ./src/mapping.ts +features: + - grafting +graft: + base: QmQpiC9bJGFssQfeZippfQ7rcTv7QA67X7jUejc8nV125F + block: 2 \ No newline at end of file diff --git a/tests/src/contract.rs b/tests/src/contract.rs index 4fdf767b041..05fda947839 100644 --- a/tests/src/contract.rs +++ b/tests/src/contract.rs @@ -7,7 +7,7 @@ use graph::prelude::{ api::{Eth, Namespace}, contract::{tokens::Tokenize, Contract as Web3Contract, Options}, transports::Http, - types::{Address, Bytes, TransactionReceipt}, + types::{Address, Block, BlockId, BlockNumber, Bytes, TransactionReceipt, H256}, }, }; // web3 version 0.18 does not expose this; once the graph crate updates to @@ -165,4 +165,13 @@ impl Contract { } Ok(contracts) } + + pub async fn latest_block() -> Option> { + let eth = Self::eth(); + let block = eth + .block(BlockId::Number(BlockNumber::Latest)) + .await + .unwrap_or_default(); + block + } } diff --git a/tests/src/subgraph.rs b/tests/src/subgraph.rs index 810b87cbb78..92e42836b68 100644 --- a/tests/src/subgraph.rs +++ b/tests/src/subgraph.rs @@ -164,7 +164,7 @@ impl Subgraph { } /// Make a GraphQL query to the index node API - pub async fn index_with_vars(&self, text: &str, vars: Value) -> anyhow::Result { + pub async fn query_with_vars(text: &str, vars: Value) -> anyhow::Result { let endpoint = CONFIG.graph_node.index_node_uri(); graphql_query_with_vars(&endpoint, text, vars).await } diff --git a/tests/tests/integration_tests.rs b/tests/tests/integration_tests.rs index 5c6ab96968d..9df36f7145a 100644 --- a/tests/tests/integration_tests.rs +++ b/tests/tests/integration_tests.rs @@ -11,7 +11,7 @@ use std::future::Future; use std::pin::Pin; -use std::time::{Duration, Instant}; +use std::time::{self, Duration, Instant}; use anyhow::{anyhow, bail, Context, Result}; use graph::futures03::StreamExt; @@ -25,6 +25,8 @@ use tokio::process::{Child, Command}; use tokio::task::JoinError; use tokio::time::sleep; +const SUBGRAPH_LAST_GRAFTING_BLOCK: i32 = 3; + type TestFn = Box< dyn FnOnce(TestContext) -> Pin> + Send>> + Sync @@ -110,6 +112,15 @@ impl TestCase { } } + fn new_with_grafting(name: &str, test: fn(TestContext) -> T, base_subgraph: &str) -> Self + where + T: Future> + Send + 'static, + { + let mut test_case = Self::new(name, test); + test_case.source_subgraph = Some(base_subgraph.to_string()); + test_case + } + fn new_with_source_subgraph( name: &str, test: fn(TestContext) -> T, @@ -246,7 +257,7 @@ impl TestCase { let subgraph = self.deploy_and_wait(source, contracts).await?; status!( source, - "source subgraph deployed with hash {}", + "Source subgraph deployed with hash {}", subgraph.deployment ); } @@ -456,9 +467,8 @@ async fn test_block_handlers(ctx: TestContext) -> anyhow::Result<()> { .await?; // test subgraphFeatures endpoint returns handlers correctly - let subgraph_features = subgraph - .index_with_vars( - "query GetSubgraphFeatures($deployment: String!) { + let subgraph_features = Subgraph::query_with_vars( + "query GetSubgraphFeatures($deployment: String!) { subgraphFeatures(subgraphId: $deployment) { specVersion apiVersion @@ -468,9 +478,9 @@ async fn test_block_handlers(ctx: TestContext) -> anyhow::Result<()> { handlers } }", - json!({ "deployment": subgraph.deployment }), - ) - .await?; + json!({ "deployment": subgraph.deployment }), + ) + .await?; let handlers = &subgraph_features["data"]["subgraphFeatures"]["handlers"]; assert!( handlers.is_array(), @@ -697,9 +707,8 @@ async fn test_non_fatal_errors(ctx: TestContext) -> anyhow::Result<()> { } }"; - let resp = subgraph - .index_with_vars(query, json!({ "deployment" : subgraph.deployment })) - .await?; + let resp = + Subgraph::query_with_vars(query, json!({ "deployment" : subgraph.deployment })).await?; let subgraph_features = &resp["data"]["subgraphFeatures"]; let exp = json!({ "specVersion": "0.0.4", @@ -796,6 +805,82 @@ async fn test_remove_then_update(ctx: TestContext) -> anyhow::Result<()> { Ok(()) } +async fn test_subgraph_grafting(ctx: TestContext) -> anyhow::Result<()> { + async fn get_block_hash(block_number: i32) -> Option { + const FETCH_BLOCK_HASH: &str = r#" + query blockHashFromNumber($network: String!, $blockNumber: Int!) { + hash: blockHashFromNumber( + network: $network, + blockNumber: $blockNumber, + ) } "#; + let vars = json!({ + "network": "test", + "blockNumber": block_number + }); + + let resp = Subgraph::query_with_vars(FETCH_BLOCK_HASH, vars) + .await + .unwrap(); + assert_eq!(None, resp.get("errors")); + resp["data"]["hash"].as_str().map(|s| s.to_owned()) + } + + let subgraph = ctx.subgraph; + + assert!(subgraph.healthy); + + let block_hashes: Vec<&str> = vec![ + "384c705d4d1933ae8ba89026f016f09854057a267e1143e47bb7511d772a35d4", + "b90423eead33404dae0684169d35edd494b36802b721fb8de0bb8bc036c10480", + "2a6c4b65d659e0485371a93bc1ac0f0d7bc0f25a454b5f23a842335fea0638d5", + ]; + + let pois: Vec<&str> = vec![ + "0xde9e5650e22e61def6990d3fc4bd5915a4e8e0dd54af0b6830bf064aab16cc03", + "0x5d790dca3e37bd9976345d32d437b84ba5ea720a0b6ea26231a866e9f078bd52", + "0x719c04b78e01804c86f2bd809d20f481e146327af07227960e2242da365754ef", + ]; + + for i in 1..4 { + let block_hash = get_block_hash(i).await.unwrap(); + // We need to make sure that the preconditions for POI are fulfiled + // namely that the blockchain produced the proper block hashes for the + // blocks of which we will check the POI. + assert_eq!(block_hash, block_hashes[(i - 1) as usize]); + + const FETCH_POI: &str = r#" + query proofOfIndexing($subgraph: String!, $blockNumber: Int!, $blockHash: String!, $indexer: String!) { + proofOfIndexing( + subgraph: $subgraph, + blockNumber: $blockNumber, + blockHash: $blockHash, + indexer: $indexer + ) } "#; + + let zero_addr = "0000000000000000000000000000000000000000"; + let vars = json!({ + "subgraph": subgraph.deployment, + "blockNumber": i, + "blockHash": block_hash, + "indexer": zero_addr, + }); + let resp = Subgraph::query_with_vars(FETCH_POI, vars).await?; + assert_eq!(None, resp.get("errors")); + assert!(resp["data"]["proofOfIndexing"].is_string()); + let poi = resp["data"]["proofOfIndexing"].as_str().unwrap(); + // Check the expected value of the POI. The transition from the old legacy + // hashing to the new one is done in the block #2 anything before that + // should not change as the legacy code will not be updated. Any change + // after that might indicate a change in the way new POI is now calculated. + // Change on the block #2 would mean a change in the transitioning + // from the old to the new algorithm hence would be reflected only + // subgraphs that are grafting from pre 0.0.5 to 0.0.6 or newer. + assert_eq!(poi, pois[(i - 1) as usize]); + } + + Ok(()) +} + async fn test_poi_for_failed_subgraph(ctx: TestContext) -> anyhow::Result<()> { let subgraph = ctx.subgraph; const INDEXING_STATUS: &str = r#" @@ -829,9 +914,9 @@ async fn test_poi_for_failed_subgraph(ctx: TestContext) -> anyhow::Result<()> { } async fn fetch_status(subgraph: &Subgraph) -> anyhow::Result { - let resp = subgraph - .index_with_vars(INDEXING_STATUS, json!({ "subgraphName": subgraph.name })) - .await?; + let resp = + Subgraph::query_with_vars(INDEXING_STATUS, json!({ "subgraphName": subgraph.name })) + .await?; assert_eq!(None, resp.get("errors")); let statuses = &resp["data"]["statuses"]; assert_eq!(1, statuses.as_array().unwrap().len()); @@ -877,7 +962,7 @@ async fn test_poi_for_failed_subgraph(ctx: TestContext) -> anyhow::Result<()> { "blockNumber": block_number, "blockHash": status.latest_block["hash"], }); - let resp = subgraph.index_with_vars(FETCH_POI, vars).await?; + let resp = Subgraph::query_with_vars(FETCH_POI, vars).await?; assert_eq!(None, resp.get("errors")); assert!(resp["data"]["proofOfIndexing"].is_string()); Ok(()) @@ -915,6 +1000,25 @@ async fn test_multiple_subgraph_datasources(ctx: TestContext) -> anyhow::Result< Ok(()) } +async fn wait_for_blockchain_block(block_number: i32) -> bool { + // Wait up to 5 minutes for the expected block to appear + const STATUS_WAIT: Duration = Duration::from_secs(300); + const REQUEST_REPEATING: Duration = time::Duration::from_secs(1); + let start = Instant::now(); + while start.elapsed() < STATUS_WAIT { + let latest_block = Contract::latest_block().await; + if let Some(latest_block) = latest_block { + if let Some(number) = latest_block.number { + if number >= block_number.into() { + return true; + } + } + } + tokio::time::sleep(REQUEST_REPEATING).await; + } + false +} + /// The main test entrypoint. #[tokio::test] async fn integration_tests() -> anyhow::Result<()> { @@ -936,6 +1040,7 @@ async fn integration_tests() -> anyhow::Result<()> { TestCase::new("timestamp", test_timestamp), TestCase::new("ethereum-api-tests", test_eth_api), TestCase::new("topic-filter", test_topic_filters), + TestCase::new_with_grafting("grafted", test_subgraph_grafting, "base"), TestCase::new_with_source_subgraph( "subgraph-data-sources", subgraph_data_sources, @@ -958,6 +1063,11 @@ async fn integration_tests() -> anyhow::Result<()> { cases }; + // Here we wait for a block in the blockchain in order not to influence + // block hashes for all the blocks until the end of the grafting tests. + // Currently the last used block for grafting test is the block 3. + assert!(wait_for_blockchain_block(SUBGRAPH_LAST_GRAFTING_BLOCK).await); + let contracts = Contract::deploy_all().await?; status!("setup", "Resetting database"); From baf33244fb1f10071f3b8387b17dbe6869c611b0 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 8 Apr 2025 16:29:03 -0700 Subject: [PATCH 086/160] graph, store: Remove GRAPH_STORE_LAST_ROLLUP_FROM_POI This flag was only meant as a safety switch in case the fixed behavior caused trouble. Since it's not been needed in several months, it's safe to remove it. --- graph/src/env/store.rs | 11 ---------- store/postgres/src/deployment_store.rs | 12 ++--------- store/postgres/src/relational.rs | 29 +------------------------- store/postgres/src/writable.rs | 8 +++---- 4 files changed, 6 insertions(+), 54 deletions(-) diff --git a/graph/src/env/store.rs b/graph/src/env/store.rs index 3ecf92e0388..8197d07b6bc 100644 --- a/graph/src/env/store.rs +++ b/graph/src/env/store.rs @@ -129,14 +129,6 @@ pub struct EnvVarsStore { pub use_brin_for_all_query_types: bool, /// Temporary env var to disable certain lookups in the chain store pub disable_block_cache_for_lookup: bool, - /// Temporary env var to fall back to the old broken way of determining - /// the time of the last rollup from the POI table instead of the new - /// way that fixes - /// https://github.com/graphprotocol/graph-node/issues/5530 Remove this - /// and all code that is dead as a consequence once this has been vetted - /// sufficiently, probably after 2024-12-01 - /// Defaults to `false`, i.e. using the new fixed behavior - pub last_rollup_from_poi: bool, /// Safety switch to increase the number of columns used when /// calculating the chunk size in `InsertQuery::chunk_size`. This can be /// used to work around Postgres errors complaining 'number of @@ -197,7 +189,6 @@ impl TryFrom for EnvVarsStore { create_gin_indexes: x.create_gin_indexes, use_brin_for_all_query_types: x.use_brin_for_all_query_types, disable_block_cache_for_lookup: x.disable_block_cache_for_lookup, - last_rollup_from_poi: x.last_rollup_from_poi, insert_extra_cols: x.insert_extra_cols, fdw_fetch_size: x.fdw_fetch_size, }; @@ -276,8 +267,6 @@ pub struct InnerStore { use_brin_for_all_query_types: bool, #[envconfig(from = "GRAPH_STORE_DISABLE_BLOCK_CACHE_FOR_LOOKUP", default = "false")] disable_block_cache_for_lookup: bool, - #[envconfig(from = "GRAPH_STORE_LAST_ROLLUP_FROM_POI", default = "false")] - last_rollup_from_poi: bool, #[envconfig(from = "GRAPH_STORE_INSERT_EXTRA_COLS", default = "0")] insert_extra_cols: usize, #[envconfig(from = "GRAPH_STORE_FDW_FETCH_SIZE", default = "1000")] diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index e07b4659436..92de85f316e 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -904,20 +904,12 @@ impl DeploymentStore { .await } - pub(crate) fn block_time( - &self, - site: Arc, - block: BlockNumber, - ) -> Result, StoreError> { + pub(crate) fn block_time(&self, site: Arc) -> Result, StoreError> { let store = self.cheap_clone(); let mut conn = self.get_conn()?; let layout = store.layout(&mut conn, site.cheap_clone())?; - if ENV_VARS.store.last_rollup_from_poi { - layout.block_time(&mut conn, block) - } else { - layout.last_rollup(&mut conn) - } + layout.last_rollup(&mut conn) } pub(crate) async fn get_proof_of_indexing( diff --git a/store/postgres/src/relational.rs b/store/postgres/src/relational.rs index d148060efc2..fb181b7e74d 100644 --- a/store/postgres/src/relational.rs +++ b/store/postgres/src/relational.rs @@ -32,7 +32,6 @@ use graph::blockchain::block_stream::{EntityOperationKind, EntitySourceOperation use graph::blockchain::BlockTime; use graph::cheap_clone::CheapClone; use graph::components::store::write::{RowGroup, WriteChunk}; -use graph::components::subgraph::PoICausalityRegion; use graph::constraint_violation; use graph::data::graphql::TypeExt as _; use graph::data::query::Trace; @@ -69,7 +68,7 @@ use crate::{ }, }; use graph::components::store::{AttributeNames, DerivedEntityQuery}; -use graph::data::store::{Id, IdList, IdType, BYTES_SCALAR}; +use graph::data::store::{IdList, IdType, BYTES_SCALAR}; use graph::data::subgraph::schema::POI_TABLE; use graph::prelude::{ anyhow, info, BlockNumber, DeploymentHash, Entity, EntityOperation, Logger, @@ -1113,32 +1112,6 @@ impl Layout { Ok(Arc::new(layout)) } - pub(crate) fn block_time( - &self, - conn: &mut PgConnection, - block: BlockNumber, - ) -> Result, StoreError> { - let block_time_name = self.input_schema.poi_block_time(); - let poi_type = self.input_schema.poi_type(); - let id = Id::String(Word::from(PoICausalityRegion::from_network( - &self.site.network, - ))); - let key = poi_type.key(id); - - let block_time = self - .find(conn, &key, block)? - .and_then(|entity| { - entity.get(&block_time_name).map(|value| { - value - .as_int8() - .ok_or_else(|| constraint_violation!("block_time must have type Int8")) - }) - }) - .transpose()? - .map(|value| BlockTime::since_epoch(value, 0)); - Ok(block_time) - } - /// Find the time of the last rollup for the subgraph. We do this by /// looking for the maximum timestamp in any aggregation table and /// adding a little bit more than the corresponding interval to it. This diff --git a/store/postgres/src/writable.rs b/store/postgres/src/writable.rs index 26e559bcbc9..3d85042d07c 100644 --- a/store/postgres/src/writable.rs +++ b/store/postgres/src/writable.rs @@ -95,8 +95,8 @@ impl LastRollup { let kind = match (has_aggregations, block) { (false, _) => LastRollup::NotNeeded, (true, None) => LastRollup::Unknown, - (true, Some(block)) => { - let block_time = store.block_time(site, block)?; + (true, Some(_)) => { + let block_time = store.block_time(site)?; block_time .map(|b| LastRollup::Some(b)) .unwrap_or(LastRollup::Unknown) @@ -240,9 +240,7 @@ impl SyncStore { firehose_cursor, )?; - let block_time = self - .writable - .block_time(self.site.cheap_clone(), block_ptr_to.number)?; + let block_time = self.writable.block_time(self.site.cheap_clone())?; self.last_rollup.set(block_time) }) } From e6f55b174da714791d920c5f53ffdf942d808566 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 11 Apr 2025 14:45:47 -0700 Subject: [PATCH 087/160] store: Do not drop the default copy connection when running out of work When the default worker finished while other workers were still copying, and there was no more work to do, we inadvertently dropped the default connection. --- store/postgres/src/copy.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 22ddee394f6..2e8807b2aa8 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -1020,6 +1020,7 @@ impl Connection { return None; }; let Some(table) = state.unfinished.pop() else { + self.conn = Some(conn); return None; }; From 23427029ea4b614c0e02a76e73f11e0ebe1e3a92 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Sat, 12 Apr 2025 12:29:03 -0700 Subject: [PATCH 088/160] node: Make number of blocking threads configurable --- node/src/main.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/node/src/main.rs b/node/src/main.rs index 9b0e94250dc..b2003dff28f 100644 --- a/node/src/main.rs +++ b/node/src/main.rs @@ -78,8 +78,21 @@ fn read_expensive_queries( Ok(queries) } -#[tokio::main] -async fn main() { +fn main() { + let max_blocking: usize = std::env::var("GRAPH_MAX_BLOCKING_THREADS") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(512); + + tokio::runtime::Builder::new_multi_thread() + .enable_all() + .max_blocking_threads(max_blocking) + .build() + .unwrap() + .block_on(async { main_inner().await }) +} + +async fn main_inner() { env_logger::init(); let env_vars = Arc::new(EnvVars::from_env().unwrap()); From 746acb9b945e7a2da4cf8b9e571afd17dd8b8f81 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 9 Apr 2025 09:33:31 -0700 Subject: [PATCH 089/160] core: Remove unused From for BlockProcessingError --- core/src/subgraph/error.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/core/src/subgraph/error.rs b/core/src/subgraph/error.rs index b3131255aed..5eddd835bd2 100644 --- a/core/src/subgraph/error.rs +++ b/core/src/subgraph/error.rs @@ -1,5 +1,5 @@ use graph::data::subgraph::schema::SubgraphError; -use graph::prelude::{thiserror, Error, StoreError}; +use graph::prelude::{thiserror, Error}; #[derive(thiserror::Error, Debug)] pub enum BlockProcessingError { @@ -20,9 +20,3 @@ impl BlockProcessingError { matches!(self, BlockProcessingError::Deterministic(_)) } } - -impl From for BlockProcessingError { - fn from(e: StoreError) -> Self { - BlockProcessingError::Unknown(e.into()) - } -} From a98f6e1e875f70dd628aae311ad0588fcbc3b829 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 11 Apr 2025 17:08:47 -0700 Subject: [PATCH 090/160] core: Remove '*' import in subgraph runner and list imports explicitly --- core/src/subgraph/runner.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index 922c7a4003c..1d6c3b4cda5 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -3,6 +3,8 @@ use crate::subgraph::error::BlockProcessingError; use crate::subgraph::inputs::IndexingInputs; use crate::subgraph::state::IndexingState; use crate::subgraph::stream::new_block_stream; +use anyhow::Context as _; +use async_trait::async_trait; use graph::blockchain::block_stream::{ BlockStreamError, BlockStreamEvent, BlockWithTriggers, FirehoseCursor, }; @@ -27,8 +29,14 @@ use graph::data_source::{ use graph::env::EnvVars; use graph::futures03::stream::StreamExt; use graph::futures03::TryStreamExt; -use graph::prelude::*; +use graph::prelude::{ + anyhow, hex, retry, thiserror, BlockNumber, BlockPtr, BlockState, CancelGuard, CancelHandle, + CancelToken as _, CancelableError, CheapClone as _, EntityCache, EntityModification, Error, + InstanceDSTemplateInfo, LogCode, RunnerMetrics, RuntimeHostBuilder, StopwatchMetrics, + StoreError, StreamExtension, UnfailOutcome, Value, ENV_VARS, +}; use graph::schema::EntityKey; +use graph::slog::{debug, error, info, o, trace, warn, Logger}; use graph::util::{backoff::ExponentialBackoff, lfu_cache::LfuCache}; use std::sync::Arc; use std::time::{Duration, Instant}; From 4ec6eea995d842f868c94c5edea3d27e4680822e Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 11 Apr 2025 17:28:54 -0700 Subject: [PATCH 091/160] core: Force marking non-deterministic errors explicitly Instead of implicitly converting anyhow::Error to a non-deterministic error, force an explicit call to make it clear where non-deterministic errors come from --- core/src/subgraph/error.rs | 20 ++++++++++++-- core/src/subgraph/runner.rs | 55 +++++++++++++++++++++---------------- 2 files changed, 50 insertions(+), 25 deletions(-) diff --git a/core/src/subgraph/error.rs b/core/src/subgraph/error.rs index 5eddd835bd2..f4cf7e2308d 100644 --- a/core/src/subgraph/error.rs +++ b/core/src/subgraph/error.rs @@ -1,10 +1,10 @@ use graph::data::subgraph::schema::SubgraphError; -use graph::prelude::{thiserror, Error}; +use graph::prelude::{thiserror, Error, StoreError}; #[derive(thiserror::Error, Debug)] pub enum BlockProcessingError { #[error("{0:#}")] - Unknown(#[from] Error), + Unknown(Error), // The error had a deterministic cause but, for a possibly non-deterministic reason, we chose to // halt processing due to the error. @@ -20,3 +20,19 @@ impl BlockProcessingError { matches!(self, BlockProcessingError::Deterministic(_)) } } + +pub(crate) trait ErrorHelper { + fn non_deterministic(self: Self) -> Result; +} + +impl ErrorHelper for Result { + fn non_deterministic(self) -> Result { + self.map_err(|e| BlockProcessingError::Unknown(e)) + } +} + +impl ErrorHelper for Result { + fn non_deterministic(self) -> Result { + self.map_err(|e| BlockProcessingError::Unknown(Error::from(e))) + } +} diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index 1d6c3b4cda5..779018cea08 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -1,5 +1,5 @@ use crate::subgraph::context::IndexingContext; -use crate::subgraph::error::BlockProcessingError; +use crate::subgraph::error::{BlockProcessingError, ErrorHelper as _}; use crate::subgraph::inputs::IndexingInputs; use crate::subgraph::state::IndexingState; use crate::subgraph::stream::new_block_stream; @@ -524,7 +524,8 @@ where let chain = chain.cheap_clone(); async move { chain.refetch_firehose_block(&log, cur).await } }) - .await?, + .await + .non_deterministic()?, ) } else { block.cheap_clone() @@ -535,7 +536,8 @@ where .inputs .triggers_adapter .triggers_in_block(&logger, block.as_ref().clone(), filter) - .await?; + .await + .non_deterministic()?; let triggers = block_with_triggers.trigger_data; @@ -641,7 +643,8 @@ where &self.metrics.host.stopwatch, &mut block_state.entity_cache, ) - .await?; + .await + .non_deterministic()?; } let section = self @@ -671,10 +674,15 @@ where // Check for offchain events and process them, including their entity modifications in the // set to be transacted. - let offchain_events = self.ctx.offchain_monitor.ready_offchain_events()?; + let offchain_events = self + .ctx + .offchain_monitor + .ready_offchain_events() + .non_deterministic()?; let (offchain_mods, processed_offchain_data_sources, persisted_off_chain_data_sources) = self.handle_offchain_triggers(offchain_events, &block) - .await?; + .await + .non_deterministic()?; mods.extend(offchain_mods); // Put the cache back in the state, asserting that the placeholder cache was not used. @@ -720,7 +728,7 @@ where let first_error = deterministic_errors.first().cloned(); - let is_caught_up = self.is_caught_up(&block_ptr).await?; + let is_caught_up = self.is_caught_up(&block_ptr).await.non_deterministic()?; persisted_data_sources.extend(persisted_off_chain_data_sources); self.inputs @@ -738,7 +746,7 @@ where is_caught_up, ) .await - .context("Failed to transact block operations")?; + .non_deterministic()?; // For subgraphs with `nonFatalErrors` feature disabled, we consider // any error as fatal. @@ -758,11 +766,9 @@ where .block_ops_transaction_duration .observe(elapsed); - block_state_metrics.flush_metrics_to_store( - &logger, - block_ptr, - self.inputs.deployment.id, - )?; + block_state_metrics + .flush_metrics_to_store(&logger, block_ptr, self.inputs.deployment.id) + .non_deterministic()?; // To prevent a buggy pending version from replacing a current version, if errors are // present the subgraph will be unassigned. @@ -817,7 +823,7 @@ where fn create_dynamic_data_sources( &mut self, created_data_sources: Vec, - ) -> Result<(Vec>, Vec>), Error> { + ) -> Result<(Vec>, Vec>), BlockProcessingError> { let mut data_sources = vec![]; let mut runtime_hosts = vec![]; @@ -825,15 +831,15 @@ where let manifest_idx = info .template .manifest_idx() - .ok_or_else(|| anyhow!("Expected template to have an idx"))?; + .ok_or_else(|| anyhow!("Expected template to have an idx")) + .non_deterministic()?; let created_ds_template = self .inputs .templates .iter() .find(|t| t.manifest_idx() == manifest_idx) - .ok_or_else(|| { - anyhow!("Expected to find a template for this dynamic data source") - })?; + .ok_or_else(|| anyhow!("Expected to find a template for this dynamic data source")) + .non_deterministic()?; // Try to instantiate a data source from the template let data_source = { @@ -855,14 +861,15 @@ where warn!(self.logger, "{}", e.to_string()); continue; } - Err(DataSourceCreationError::Unknown(e)) => return Err(e), + Err(DataSourceCreationError::Unknown(e)) => return Err(e).non_deterministic(), } }; // Try to create a runtime host for the data source let host = self .ctx - .add_dynamic_data_source(&self.logger, data_source.clone())?; + .add_dynamic_data_source(&self.logger, data_source.clone()) + .non_deterministic()?; match host { Some(host) => { @@ -1381,7 +1388,8 @@ where &self.metrics.host.stopwatch, &mut block_state.entity_cache, ) - .await?; + .await + .non_deterministic()?; } let section = self @@ -1451,7 +1459,7 @@ where let first_error = deterministic_errors.first().cloned(); // We consider a subgraph caught up when it's at most 1 blocks behind the chain head. - let is_caught_up = self.is_caught_up(&block_ptr).await?; + let is_caught_up = self.is_caught_up(&block_ptr).await.non_deterministic()?; self.inputs .store @@ -1468,7 +1476,8 @@ where is_caught_up, ) .await - .context("Failed to transact block operations")?; + .context("Failed to transact block operations") + .non_deterministic()?; // For subgraphs with `nonFatalErrors` feature disabled, we consider // any error as fatal. From 8be5b736750dbcd2619bed3606aceb5a6324318d Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 11 Apr 2025 17:56:24 -0700 Subject: [PATCH 092/160] core: Make BlockProcessingError::Deterministic more general All we care about is that the enclosed error implements Display, really --- core/src/subgraph/error.rs | 6 +++++- core/src/subgraph/runner.rs | 8 ++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/core/src/subgraph/error.rs b/core/src/subgraph/error.rs index f4cf7e2308d..2bab4eef6fd 100644 --- a/core/src/subgraph/error.rs +++ b/core/src/subgraph/error.rs @@ -1,6 +1,10 @@ use graph::data::subgraph::schema::SubgraphError; use graph::prelude::{thiserror, Error, StoreError}; +pub trait DeterministicError: std::fmt::Debug + std::fmt::Display + Send + Sync + 'static {} + +impl DeterministicError for SubgraphError {} + #[derive(thiserror::Error, Debug)] pub enum BlockProcessingError { #[error("{0:#}")] @@ -9,7 +13,7 @@ pub enum BlockProcessingError { // The error had a deterministic cause but, for a possibly non-deterministic reason, we chose to // halt processing due to the error. #[error("{0}")] - Deterministic(SubgraphError), + Deterministic(Box), #[error("subgraph stopped while processing triggers")] Canceled, diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index 779018cea08..117c91b1d47 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -757,7 +757,9 @@ where // all of the others are discarded. if has_errors && !is_non_fatal_errors_active { // Only the first error is reported. - return Err(BlockProcessingError::Deterministic(first_error.unwrap())); + return Err(BlockProcessingError::Deterministic(Box::new( + first_error.unwrap(), + ))); } let elapsed = start.elapsed().as_secs_f64(); @@ -1488,7 +1490,9 @@ where // all of the others are discarded. if has_errors && !is_non_fatal_errors_active { // Only the first error is reported. - return Err(BlockProcessingError::Deterministic(first_error.unwrap()).into()); + return Err(BlockProcessingError::Deterministic(Box::new( + first_error.unwrap(), + ))); } let elapsed = start.elapsed().as_secs_f64(); From f224562103c5159702643fe83eaa57afed61cea5 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 11 Apr 2025 18:09:32 -0700 Subject: [PATCH 093/160] core: Rename BlockProcessingError to ProcessingError --- core/src/subgraph/error.rs | 16 +++++++------- core/src/subgraph/runner.rs | 42 ++++++++++++++++++------------------- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/core/src/subgraph/error.rs b/core/src/subgraph/error.rs index 2bab4eef6fd..e1035bb2290 100644 --- a/core/src/subgraph/error.rs +++ b/core/src/subgraph/error.rs @@ -6,7 +6,7 @@ pub trait DeterministicError: std::fmt::Debug + std::fmt::Display + Send + Sync impl DeterministicError for SubgraphError {} #[derive(thiserror::Error, Debug)] -pub enum BlockProcessingError { +pub enum ProcessingError { #[error("{0:#}")] Unknown(Error), @@ -19,24 +19,24 @@ pub enum BlockProcessingError { Canceled, } -impl BlockProcessingError { +impl ProcessingError { pub fn is_deterministic(&self) -> bool { - matches!(self, BlockProcessingError::Deterministic(_)) + matches!(self, ProcessingError::Deterministic(_)) } } pub(crate) trait ErrorHelper { - fn non_deterministic(self: Self) -> Result; + fn non_deterministic(self: Self) -> Result; } impl ErrorHelper for Result { - fn non_deterministic(self) -> Result { - self.map_err(|e| BlockProcessingError::Unknown(e)) + fn non_deterministic(self) -> Result { + self.map_err(|e| ProcessingError::Unknown(e)) } } impl ErrorHelper for Result { - fn non_deterministic(self) -> Result { - self.map_err(|e| BlockProcessingError::Unknown(Error::from(e))) + fn non_deterministic(self) -> Result { + self.map_err(|e| ProcessingError::Unknown(Error::from(e))) } } diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index 117c91b1d47..86a2fad6b53 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -1,5 +1,5 @@ use crate::subgraph::context::IndexingContext; -use crate::subgraph::error::{BlockProcessingError, ErrorHelper as _}; +use crate::subgraph::error::{ErrorHelper as _, ProcessingError}; use crate::subgraph::inputs::IndexingInputs; use crate::subgraph::state::IndexingState; use crate::subgraph::stream::new_block_stream; @@ -361,7 +361,7 @@ where block_stream_cancel_handle: &CancelHandle, block: BlockWithTriggers, firehose_cursor: FirehoseCursor, - ) -> Result { + ) -> Result { let triggers = block.trigger_data; let block = Arc::new(block.block); let block_ptr = block.ptr(); @@ -449,7 +449,7 @@ where Ok(state) => block_state = state, // Some form of unknown or non-deterministic error ocurred. - Err(MappingError::Unknown(e)) => return Err(BlockProcessingError::Unknown(e)), + Err(MappingError::Unknown(e)) => return Err(ProcessingError::Unknown(e)), Err(MappingError::PossibleReorg(e)) => { info!(logger, "Possible reorg detected, retrying"; @@ -616,7 +616,7 @@ where // clean context as in b21fa73b-6453-4340-99fb-1a78ec62efb1. match e { MappingError::PossibleReorg(e) | MappingError::Unknown(e) => { - BlockProcessingError::Unknown(e) + ProcessingError::Unknown(e) } } })?; @@ -633,7 +633,7 @@ where // Avoid writing to store if block stream has been canceled if block_stream_cancel_handle.is_canceled() { - return Err(BlockProcessingError::Canceled); + return Err(ProcessingError::Canceled); } if let Some(proof_of_indexing) = proof_of_indexing.into_inner() { @@ -659,7 +659,7 @@ where } = block_state .entity_cache .as_modifications(block.number()) - .map_err(|e| BlockProcessingError::Unknown(e.into()))?; + .map_err(|e| ProcessingError::Unknown(e.into()))?; section.end(); trace!(self.logger, "Entity cache statistics"; @@ -757,7 +757,7 @@ where // all of the others are discarded. if has_errors && !is_non_fatal_errors_active { // Only the first error is reported. - return Err(BlockProcessingError::Deterministic(Box::new( + return Err(ProcessingError::Deterministic(Box::new( first_error.unwrap(), ))); } @@ -778,11 +778,11 @@ where if has_errors && !ENV_VARS.disable_fail_fast && !store.is_deployment_synced() { store .unassign_subgraph() - .map_err(|e| BlockProcessingError::Unknown(e.into()))?; + .map_err(|e| ProcessingError::Unknown(e.into()))?; // Use `Canceled` to avoiding setting the subgraph health to failed, an error was // just transacted so it will be already be set to unhealthy. - return Err(BlockProcessingError::Canceled); + return Err(ProcessingError::Canceled); } match needs_restart { @@ -825,7 +825,7 @@ where fn create_dynamic_data_sources( &mut self, created_data_sources: Vec, - ) -> Result<(Vec>, Vec>), BlockProcessingError> { + ) -> Result<(Vec>, Vec>), ProcessingError> { let mut data_sources = vec![]; let mut runtime_hosts = vec![]; @@ -899,7 +899,7 @@ where &mut self, start: Instant, block_ptr: BlockPtr, - action: Result, + action: Result, ) -> Result { self.state.skip_ptr_updates_timer = Instant::now(); @@ -951,7 +951,7 @@ where return Ok(action); } - Err(BlockProcessingError::Canceled) => { + Err(ProcessingError::Canceled) => { debug!(self.logger, "Subgraph block stream shut down cleanly"); return Ok(Action::Stop); } @@ -1280,7 +1280,7 @@ trait StreamEventHandler { handler: String, cursor: FirehoseCursor, cancel_handle: &CancelHandle, - ) -> Result; + ) -> Result; async fn handle_process_block( &mut self, block: BlockWithTriggers, @@ -1314,7 +1314,7 @@ where handler: String, cursor: FirehoseCursor, cancel_handle: &CancelHandle, - ) -> Result { + ) -> Result { let logger = self.logger.new(o!( "block_number" => format!("{:?}", block_ptr.number), "block_hash" => format!("{}", block_ptr.hash) @@ -1349,9 +1349,7 @@ where Ok(block_state) => block_state, // Some form of unknown or non-deterministic error ocurred. - Err(MappingError::Unknown(e)) => { - return Err(BlockProcessingError::Unknown(e).into()) - } + Err(MappingError::Unknown(e)) => return Err(ProcessingError::Unknown(e).into()), Err(MappingError::PossibleReorg(e)) => { info!(logger, "Possible reorg detected, retrying"; @@ -1380,7 +1378,7 @@ where // Avoid writing to store if block stream has been canceled if cancel_handle.is_canceled() { - return Err(BlockProcessingError::Canceled.into()); + return Err(ProcessingError::Canceled.into()); } if let Some(proof_of_indexing) = proof_of_indexing.into_inner() { @@ -1406,7 +1404,7 @@ where } = block_state .entity_cache .as_modifications(block_ptr.number) - .map_err(|e| BlockProcessingError::Unknown(e.into()))?; + .map_err(|e| ProcessingError::Unknown(e.into()))?; section.end(); trace!(self.logger, "Entity cache statistics"; @@ -1490,7 +1488,7 @@ where // all of the others are discarded. if has_errors && !is_non_fatal_errors_active { // Only the first error is reported. - return Err(BlockProcessingError::Deterministic(Box::new( + return Err(ProcessingError::Deterministic(Box::new( first_error.unwrap(), ))); } @@ -1507,11 +1505,11 @@ where if has_errors && !ENV_VARS.disable_fail_fast && !store.is_deployment_synced() { store .unassign_subgraph() - .map_err(|e| BlockProcessingError::Unknown(e.into()))?; + .map_err(|e| ProcessingError::Unknown(e.into()))?; // Use `Canceled` to avoiding setting the subgraph health to failed, an error was // just transacted so it will be already be set to unhealthy. - return Err(BlockProcessingError::Canceled.into()); + return Err(ProcessingError::Canceled.into()); }; Ok(Action::Continue) From d5286bfb856a901af198b04454a62e3270ce97d8 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 11 Apr 2025 18:21:18 -0700 Subject: [PATCH 094/160] graph: Remove unused variants from StoreError --- graph/src/components/store/err.rs | 11 +---------- store/postgres/src/fork.rs | 12 +++++------- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/graph/src/components/store/err.rs b/graph/src/components/store/err.rs index 76be7c311ce..b3021f0c9de 100644 --- a/graph/src/components/store/err.rs +++ b/graph/src/components/store/err.rs @@ -1,6 +1,6 @@ use super::{BlockNumber, DeploymentSchemaVersion}; +use crate::prelude::DeploymentHash; use crate::prelude::QueryExecutionError; -use crate::{data::store::EntityValidationError, prelude::DeploymentHash}; use anyhow::{anyhow, Error}; use diesel::result::Error as DieselError; @@ -11,8 +11,6 @@ use tokio::task::JoinError; pub enum StoreError { #[error("store error: {0:#}")] Unknown(Error), - #[error("Entity validation failed: {0}")] - EntityValidationError(EntityValidationError), #[error( "tried to set entity of type `{0}` with ID \"{1}\" but an entity of type `{2}`, \ which has an interface in common with `{0}`, exists with the same ID" @@ -24,8 +22,6 @@ pub enum StoreError { UnknownTable(String), #[error("entity type '{0}' does not have an attribute '{0}'")] UnknownAttribute(String, String), - #[error("malformed directive '{0}'")] - MalformedDirective(String), #[error("query execution failed: {0}")] QueryExecutionError(String), #[error("Child filter nesting not supported by value `{0}`: `{1}`")] @@ -54,8 +50,6 @@ pub enum StoreError { Canceled, #[error("database unavailable")] DatabaseUnavailable, - #[error("database disabled")] - DatabaseDisabled, #[error("subgraph forking failed: {0}")] ForkFailure(String), #[error("subgraph writer poisoned by previous error")] @@ -96,7 +90,6 @@ impl Clone for StoreError { fn clone(&self) -> Self { match self { Self::Unknown(arg0) => Self::Unknown(anyhow!("{}", arg0)), - Self::EntityValidationError(arg0) => Self::EntityValidationError(arg0.clone()), Self::ConflictingId(arg0, arg1, arg2) => { Self::ConflictingId(arg0.clone(), arg1.clone(), arg2.clone()) } @@ -105,7 +98,6 @@ impl Clone for StoreError { Self::UnknownAttribute(arg0, arg1) => { Self::UnknownAttribute(arg0.clone(), arg1.clone()) } - Self::MalformedDirective(arg0) => Self::MalformedDirective(arg0.clone()), Self::QueryExecutionError(arg0) => Self::QueryExecutionError(arg0.clone()), Self::ChildFilterNestingNotSupportedError(arg0, arg1) => { Self::ChildFilterNestingNotSupportedError(arg0.clone(), arg1.clone()) @@ -121,7 +113,6 @@ impl Clone for StoreError { Self::FulltextColumnMissingConfig => Self::FulltextColumnMissingConfig, Self::Canceled => Self::Canceled, Self::DatabaseUnavailable => Self::DatabaseUnavailable, - Self::DatabaseDisabled => Self::DatabaseDisabled, Self::ForkFailure(arg0) => Self::ForkFailure(arg0.clone()), Self::Poisoned => Self::Poisoned, Self::WriterPanic(arg0) => Self::Unknown(anyhow!("writer panic: {}", arg0)), diff --git a/store/postgres/src/fork.rs b/store/postgres/src/fork.rs index 1a8e7a7c4ec..4bb064f87c6 100644 --- a/store/postgres/src/fork.rs +++ b/store/postgres/src/fork.rs @@ -9,8 +9,8 @@ use graph::{ components::store::SubgraphFork as SubgraphForkTrait, constraint_violation, prelude::{ - info, r::Value as RValue, reqwest, serde_json, DeploymentHash, Entity, Logger, Serialize, - StoreError, Value, ValueType, + anyhow, info, r::Value as RValue, reqwest, serde_json, DeploymentHash, Entity, Logger, + Serialize, StoreError, Value, ValueType, }, schema::Field, url::Url, @@ -211,11 +211,9 @@ query Query ($id: String) {{ map }; - Ok(Some( - schema - .make_entity(map) - .map_err(|e| StoreError::EntityValidationError(e))?, - )) + Ok(Some(schema.make_entity(map).map_err(|e| { + StoreError::Unknown(anyhow!("entity validation failed: {e}")) + })?)) } } From 9f65b2249330a2d34bee0255c00bd5df5fbac6d8 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 11 Apr 2025 18:43:43 -0700 Subject: [PATCH 095/160] graph: Classify StoreError into non/deterministic --- graph/src/components/store/err.rs | 36 +++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/graph/src/components/store/err.rs b/graph/src/components/store/err.rs index b3021f0c9de..0fe8ae43f2b 100644 --- a/graph/src/components/store/err.rs +++ b/graph/src/components/store/err.rs @@ -161,6 +161,42 @@ impl StoreError { StoreError::WriteFailure(entity.to_string(), block, error.to_string(), query) }) } + + pub fn is_deterministic(&self) -> bool { + use StoreError::*; + + // This classification tries to err on the side of caution. If in doubt, + // assume the error is non-deterministic. + match self { + // deterministic errors + ConflictingId(_, _, _) + | UnknownField(_, _) + | UnknownTable(_) + | UnknownAttribute(_, _) + | InvalidIdentifier(_) + | UnsupportedFilter(_, _) => true, + + // non-deterministic errors + Unknown(_) + | QueryExecutionError(_) + | ChildFilterNestingNotSupportedError(_, _) + | DuplicateBlockProcessing(_, _) + | ConstraintViolation(_) + | DeploymentNotFound(_) + | UnknownShard(_) + | FulltextSearchNonDeterministic + | FulltextColumnMissingConfig + | Canceled + | DatabaseUnavailable + | ForkFailure(_) + | Poisoned + | WriterPanic(_) + | UnsupportedDeploymentSchemaVersion(_) + | PruneFailure(_) + | WriteFailure(_, _, _, _) + | StatementTimeout => false, + } + } } impl From for StoreError { From 284b4ce4188187cc07e45b9673b6d4f7f6ba79d8 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 11 Apr 2025 18:45:58 -0700 Subject: [PATCH 096/160] core: Rename ErrorHelper --- core/src/subgraph/error.rs | 11 +++++++---- core/src/subgraph/runner.rs | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/core/src/subgraph/error.rs b/core/src/subgraph/error.rs index e1035bb2290..1ab79348be0 100644 --- a/core/src/subgraph/error.rs +++ b/core/src/subgraph/error.rs @@ -5,6 +5,9 @@ pub trait DeterministicError: std::fmt::Debug + std::fmt::Display + Send + Sync impl DeterministicError for SubgraphError {} +/// An error happened during processing and we need to classify errors into +/// deterministic and non-deterministic errors. This struct holds the result +/// of that classification #[derive(thiserror::Error, Debug)] pub enum ProcessingError { #[error("{0:#}")] @@ -24,18 +27,18 @@ impl ProcessingError { matches!(self, ProcessingError::Deterministic(_)) } } - -pub(crate) trait ErrorHelper { +/// Implement this for errors that are always non-deterministic. +pub(crate) trait NonDeterministicErrorHelper { fn non_deterministic(self: Self) -> Result; } -impl ErrorHelper for Result { +impl NonDeterministicErrorHelper for Result { fn non_deterministic(self) -> Result { self.map_err(|e| ProcessingError::Unknown(e)) } } -impl ErrorHelper for Result { +impl NonDeterministicErrorHelper for Result { fn non_deterministic(self) -> Result { self.map_err(|e| ProcessingError::Unknown(Error::from(e))) } diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index 86a2fad6b53..3dd712f1a89 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -1,5 +1,5 @@ use crate::subgraph::context::IndexingContext; -use crate::subgraph::error::{ErrorHelper as _, ProcessingError}; +use crate::subgraph::error::{NonDeterministicErrorHelper as _, ProcessingError}; use crate::subgraph::inputs::IndexingInputs; use crate::subgraph::state::IndexingState; use crate::subgraph::stream::new_block_stream; From efda72e52149d63a0ce89abd404bb4a35edd4549 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 9 Apr 2025 10:11:04 -0700 Subject: [PATCH 097/160] graph: Switch to turn off deterministic store errors --- graph/src/env/mappings.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/graph/src/env/mappings.rs b/graph/src/env/mappings.rs index 41499056b5b..c1bbb8565e5 100644 --- a/graph/src/env/mappings.rs +++ b/graph/src/env/mappings.rs @@ -62,6 +62,13 @@ pub struct EnvVarsMapping { /// eth calls before running triggers; instead eth calls happen when /// mappings call `ethereum.call`. Off by default. pub disable_declared_calls: bool, + + /// Set by the flag `GRAPH_STORE_ERRORS_ARE_NON_DETERMINISTIC`. Off by + /// default. Setting this to `true` will revert to the old behavior of + /// treating all store errors as nondeterministic. This is a temporary + /// measure and can be removed after 2025-07-01, once we are sure the + /// new behavior works as intended. + pub store_errors_are_nondeterministic: bool, } // This does not print any values avoid accidentally leaking any sensitive env vars @@ -89,6 +96,7 @@ impl From for EnvVarsMapping { ipfs_request_limit: x.ipfs_request_limit, allow_non_deterministic_ipfs: x.allow_non_deterministic_ipfs.0, disable_declared_calls: x.disable_declared_calls.0, + store_errors_are_nondeterministic: x.store_errors_are_nondeterministic.0, } } } @@ -123,4 +131,6 @@ pub struct InnerMappingHandlers { allow_non_deterministic_ipfs: EnvVarBoolean, #[envconfig(from = "GRAPH_DISABLE_DECLARED_CALLS", default = "false")] disable_declared_calls: EnvVarBoolean, + #[envconfig(from = "GRAPH_STORE_ERRORS_ARE_NON_DETERMINISTIC", default = "false")] + store_errors_are_nondeterministic: EnvVarBoolean, } From 8a4cc057801c05ee17e69b0fd4b63c74f11225d8 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 11 Apr 2025 18:50:50 -0700 Subject: [PATCH 098/160] core: Classify StoreError as non/deterministic --- core/src/subgraph/error.rs | 57 ++++++++++++++++++++++++++++++++++++- core/src/subgraph/runner.rs | 11 ++++--- 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/core/src/subgraph/error.rs b/core/src/subgraph/error.rs index 1ab79348be0..c50712c08db 100644 --- a/core/src/subgraph/error.rs +++ b/core/src/subgraph/error.rs @@ -1,10 +1,15 @@ use graph::data::subgraph::schema::SubgraphError; -use graph::prelude::{thiserror, Error, StoreError}; +use graph::env::ENV_VARS; +use graph::prelude::{anyhow, thiserror, Error, StoreError}; pub trait DeterministicError: std::fmt::Debug + std::fmt::Display + Send + Sync + 'static {} impl DeterministicError for SubgraphError {} +impl DeterministicError for StoreError {} + +impl DeterministicError for anyhow::Error {} + /// An error happened during processing and we need to classify errors into /// deterministic and non-deterministic errors. This struct holds the result /// of that classification @@ -26,7 +31,34 @@ impl ProcessingError { pub fn is_deterministic(&self) -> bool { matches!(self, ProcessingError::Deterministic(_)) } + + pub fn detail(self, ctx: &str) -> ProcessingError { + match self { + ProcessingError::Unknown(e) => { + let x = e.context(ctx.to_string()); + ProcessingError::Unknown(x) + } + ProcessingError::Deterministic(e) => { + ProcessingError::Deterministic(Box::new(anyhow!("{e}").context(ctx.to_string()))) + } + ProcessingError::Canceled => ProcessingError::Canceled, + } + } } + +/// Similar to `anyhow::Context`, but for `Result`. We +/// call the method `detail` to avoid ambiguity with anyhow's `context` +/// method +pub trait DetailHelper { + fn detail(self: Self, ctx: &str) -> Result; +} + +impl DetailHelper for Result { + fn detail(self, ctx: &str) -> Result { + self.map_err(|e| e.detail(ctx)) + } +} + /// Implement this for errors that are always non-deterministic. pub(crate) trait NonDeterministicErrorHelper { fn non_deterministic(self: Self) -> Result; @@ -43,3 +75,26 @@ impl NonDeterministicErrorHelper for Result { self.map_err(|e| ProcessingError::Unknown(Error::from(e))) } } + +/// Implement this for errors where it depends on the details whether they +/// are deterministic or not. +pub(crate) trait ClassifyErrorHelper { + fn classify(self: Self) -> Result; +} + +impl ClassifyErrorHelper for Result { + fn classify(self) -> Result { + self.map_err(|e| { + if ENV_VARS.mappings.store_errors_are_nondeterministic { + // Old behavior, just in case the new behavior causes issues + ProcessingError::Unknown(Error::from(e)) + } else { + if e.is_deterministic() { + ProcessingError::Deterministic(Box::new(e)) + } else { + ProcessingError::Unknown(Error::from(e)) + } + } + }) + } +} diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index 3dd712f1a89..71c36886d2e 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -1,5 +1,7 @@ use crate::subgraph::context::IndexingContext; -use crate::subgraph::error::{NonDeterministicErrorHelper as _, ProcessingError}; +use crate::subgraph::error::{ + ClassifyErrorHelper as _, DetailHelper as _, NonDeterministicErrorHelper as _, ProcessingError, +}; use crate::subgraph::inputs::IndexingInputs; use crate::subgraph::state::IndexingState; use crate::subgraph::stream::new_block_stream; @@ -746,7 +748,8 @@ where is_caught_up, ) .await - .non_deterministic()?; + .classify() + .detail("Failed to transact block operations")?; // For subgraphs with `nonFatalErrors` feature disabled, we consider // any error as fatal. @@ -1476,8 +1479,8 @@ where is_caught_up, ) .await - .context("Failed to transact block operations") - .non_deterministic()?; + .classify() + .detail("Failed to transact block operations")?; // For subgraphs with `nonFatalErrors` feature disabled, we consider // any error as fatal. From ba3c1a968b121260de8e00e9bb6483ec92d4890c Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 11 Apr 2025 18:57:50 -0700 Subject: [PATCH 099/160] all: Rename 'constraint violation' to 'internal error' --- graph/src/components/server/query.rs | 2 +- graph/src/components/store/err.rs | 16 ++++---- graph/src/components/store/mod.rs | 10 ++--- graph/src/components/store/write.rs | 20 +++++----- graph/src/data/query/error.rs | 8 ++-- graph/src/data/store/id.rs | 16 ++++---- graph/src/util/ogive.rs | 10 ++--- graphql/src/store/prefetch.rs | 2 +- graphql/src/store/resolver.rs | 2 +- store/postgres/src/block_store.rs | 12 +++--- store/postgres/src/chain_store.rs | 10 ++--- store/postgres/src/connection_pool.rs | 8 ++-- store/postgres/src/copy.rs | 26 ++++++------- store/postgres/src/deployment.rs | 16 ++++---- store/postgres/src/deployment_store.rs | 14 +++---- store/postgres/src/detail.rs | 16 ++++---- store/postgres/src/dynds/mod.rs | 4 +- store/postgres/src/dynds/private.rs | 6 +-- store/postgres/src/dynds/shared.rs | 18 ++++----- store/postgres/src/fork.rs | 6 +-- store/postgres/src/primary.rs | 27 ++++++------- store/postgres/src/relational.rs | 18 ++++----- store/postgres/src/relational/rollup.rs | 4 +- store/postgres/src/relational_queries.rs | 48 ++++++++++++------------ store/postgres/src/store.rs | 4 +- store/postgres/src/subgraph_store.rs | 14 +++---- store/postgres/src/writable.rs | 12 +++--- 27 files changed, 164 insertions(+), 185 deletions(-) diff --git a/graph/src/components/server/query.rs b/graph/src/components/server/query.rs index 6bf83ffbf76..4a9fe1557c2 100644 --- a/graph/src/components/server/query.rs +++ b/graph/src/components/server/query.rs @@ -28,7 +28,7 @@ impl From for ServerError { impl From for ServerError { fn from(e: StoreError) -> Self { match e { - StoreError::ConstraintViolation(s) => ServerError::InternalError(s), + StoreError::InternalError(s) => ServerError::InternalError(s), _ => ServerError::ClientError(e.to_string()), } } diff --git a/graph/src/components/store/err.rs b/graph/src/components/store/err.rs index 0fe8ae43f2b..4c093d5f4d6 100644 --- a/graph/src/components/store/err.rs +++ b/graph/src/components/store/err.rs @@ -36,8 +36,8 @@ pub enum StoreError { /// An internal error where we expected the application logic to enforce /// some constraint, e.g., that subgraph names are unique, but found that /// constraint to not hold - #[error("internal constraint violated: {0}")] - ConstraintViolation(String), + #[error("internal error: {0}")] + InternalError(String), #[error("deployment not found: {0}")] DeploymentNotFound(String), #[error("shard not found: {0} (this usually indicates a misconfiguration)")] @@ -72,14 +72,14 @@ pub enum StoreError { StatementTimeout, } -// Convenience to report a constraint violation +// Convenience to report an internal error #[macro_export] -macro_rules! constraint_violation { +macro_rules! internal_error { ($msg:expr) => {{ - $crate::prelude::StoreError::ConstraintViolation(format!("{}", $msg)) + $crate::prelude::StoreError::InternalError(format!("{}", $msg)) }}; ($fmt:expr, $($arg:tt)*) => {{ - $crate::prelude::StoreError::ConstraintViolation(format!($fmt, $($arg)*)) + $crate::prelude::StoreError::InternalError(format!($fmt, $($arg)*)) }} } @@ -106,7 +106,7 @@ impl Clone for StoreError { Self::DuplicateBlockProcessing(arg0, arg1) => { Self::DuplicateBlockProcessing(arg0.clone(), arg1.clone()) } - Self::ConstraintViolation(arg0) => Self::ConstraintViolation(arg0.clone()), + Self::InternalError(arg0) => Self::InternalError(arg0.clone()), Self::DeploymentNotFound(arg0) => Self::DeploymentNotFound(arg0.clone()), Self::UnknownShard(arg0) => Self::UnknownShard(arg0.clone()), Self::FulltextSearchNonDeterministic => Self::FulltextSearchNonDeterministic, @@ -181,7 +181,7 @@ impl StoreError { | QueryExecutionError(_) | ChildFilterNestingNotSupportedError(_, _) | DuplicateBlockProcessing(_, _) - | ConstraintViolation(_) + | InternalError(_) | DeploymentNotFound(_) | UnknownShard(_) | FulltextSearchNonDeterministic diff --git a/graph/src/components/store/mod.rs b/graph/src/components/store/mod.rs index b64f8b35964..efe16c90ee6 100644 --- a/graph/src/components/store/mod.rs +++ b/graph/src/components/store/mod.rs @@ -26,13 +26,13 @@ use std::time::Duration; use crate::blockchain::{Block, BlockHash, BlockPtr}; use crate::cheap_clone::CheapClone; use crate::components::store::write::EntityModification; -use crate::constraint_violation; use crate::data::store::scalar::Bytes; use crate::data::store::{Id, IdList, Value}; use crate::data::value::Word; use crate::data_source::CausalityRegion; use crate::derive::CheapClone; use crate::env::ENV_VARS; +use crate::internal_error; use crate::prelude::{s, Attribute, DeploymentHash, ValueType}; use crate::schema::{ast as sast, EntityKey, EntityType, InputSchema}; use crate::util::stats::MovingStats; @@ -1000,17 +1000,17 @@ impl PruneRequest { let rebuild_threshold = ENV_VARS.store.rebuild_threshold; let delete_threshold = ENV_VARS.store.delete_threshold; if rebuild_threshold < 0.0 || rebuild_threshold > 1.0 { - return Err(constraint_violation!( + return Err(internal_error!( "the copy threshold must be between 0 and 1 but is {rebuild_threshold}" )); } if delete_threshold < 0.0 || delete_threshold > 1.0 { - return Err(constraint_violation!( + return Err(internal_error!( "the delete threshold must be between 0 and 1 but is {delete_threshold}" )); } if history_blocks <= reorg_threshold { - return Err(constraint_violation!( + return Err(internal_error!( "the deployment {} needs to keep at least {} blocks \ of history and can't be pruned to only {} blocks of history", deployment, @@ -1019,7 +1019,7 @@ impl PruneRequest { )); } if first_block >= latest_block { - return Err(constraint_violation!( + return Err(internal_error!( "the earliest block {} must be before the latest block {}", first_block, latest_block diff --git a/graph/src/components/store/write.rs b/graph/src/components/store/write.rs index 6f899633bd8..2c470fd32be 100644 --- a/graph/src/components/store/write.rs +++ b/graph/src/components/store/write.rs @@ -5,10 +5,10 @@ use crate::{ blockchain::{block_stream::FirehoseCursor, BlockPtr, BlockTime}, cheap_clone::CheapClone, components::subgraph::Entity, - constraint_violation, data::{store::Id, subgraph::schema::SubgraphError}, data_source::CausalityRegion, derive::CacheWeight, + internal_error, util::cache_weight::CacheWeight, }; @@ -182,7 +182,7 @@ impl EntityModification { match self { Insert { end, .. } | Overwrite { end, .. } => { if end.is_some() { - return Err(constraint_violation!( + return Err(internal_error!( "can not clamp {:?} to block {}", self, block @@ -191,7 +191,7 @@ impl EntityModification { *end = Some(block); } Remove { .. } => { - return Err(constraint_violation!( + return Err(internal_error!( "can not clamp block range for removal of {:?} to {}", self, block @@ -219,7 +219,7 @@ impl EntityModification { end, }), Remove { key, .. } => { - return Err(constraint_violation!( + return Err(internal_error!( "a remove for {}[{}] can not be converted into an insert", entity_type, key.entity_id @@ -330,7 +330,7 @@ impl RowGroup { if !is_forward { // unwrap: we only get here when `last()` is `Some` let last_block = self.rows.last().map(|emod| emod.block()).unwrap(); - return Err(constraint_violation!( + return Err(internal_error!( "we already have a modification for block {}, can not append {:?}", last_block, emod @@ -412,7 +412,7 @@ impl RowGroup { self.rows.push(row); } EntityModification::Overwrite { .. } | EntityModification::Remove { .. } => { - return Err(constraint_violation!( + return Err(internal_error!( "immutable entity type {} only allows inserts, not {:?}", self.entity_type, row @@ -426,7 +426,7 @@ impl RowGroup { use EntityModification::*; if row.block() <= prev_row.block() { - return Err(constraint_violation!( + return Err(internal_error!( "can not append operations that go backwards from {:?} to {:?}", prev_row, row @@ -444,7 +444,7 @@ impl RowGroup { Insert { end: Some(_), .. } | Overwrite { end: Some(_), .. }, Overwrite { .. } | Remove { .. }, ) => { - return Err(constraint_violation!( + return Err(internal_error!( "impossible combination of entity operations: {:?} and then {:?}", prev_row, row @@ -481,7 +481,7 @@ impl RowGroup { fn append(&mut self, group: RowGroup) -> Result<(), StoreError> { if self.entity_type != group.entity_type { - return Err(constraint_violation!( + return Err(internal_error!( "Can not append a row group for {} to a row group for {}", group.entity_type, self.entity_type @@ -710,7 +710,7 @@ impl Batch { fn append_inner(&mut self, mut batch: Batch) -> Result<(), StoreError> { if batch.block_ptr.number <= self.block_ptr.number { - return Err(constraint_violation!("Batches must go forward. Can't append a batch with block pointer {} to one with block pointer {}", batch.block_ptr, self.block_ptr)); + return Err(internal_error!("Batches must go forward. Can't append a batch with block pointer {} to one with block pointer {}", batch.block_ptr, self.block_ptr)); } self.block_ptr = batch.block_ptr; diff --git a/graph/src/data/query/error.rs b/graph/src/data/query/error.rs index 65fc1bcd259..d02b1c9c4bd 100644 --- a/graph/src/data/query/error.rs +++ b/graph/src/data/query/error.rs @@ -74,7 +74,7 @@ pub enum QueryExecutionError { DeploymentNotFound(String), IdMissing, IdNotString, - ConstraintViolation(String), + InternalError(String), } impl QueryExecutionError { @@ -132,7 +132,7 @@ impl QueryExecutionError { | DeploymentNotFound(_) | IdMissing | IdNotString - | ConstraintViolation(_) => false, + | InternalError(_) => false, } } } @@ -274,7 +274,7 @@ impl fmt::Display for QueryExecutionError { DeploymentNotFound(id_or_name) => write!(f, "deployment `{}` does not exist", id_or_name), IdMissing => write!(f, "entity is missing an `id` attribute"), IdNotString => write!(f, "entity `id` attribute is not a string"), - ConstraintViolation(msg) => write!(f, "internal constraint violated: {}", msg), + InternalError(msg) => write!(f, "internal error: {}", msg), } } } @@ -306,7 +306,7 @@ impl From for QueryExecutionError { StoreError::ChildFilterNestingNotSupportedError(attr, filter) => { QueryExecutionError::ChildFilterNestingNotSupportedError(attr, filter) } - StoreError::ConstraintViolation(msg) => QueryExecutionError::ConstraintViolation(msg), + StoreError::InternalError(msg) => QueryExecutionError::InternalError(msg), _ => QueryExecutionError::StoreError(CloneableAnyhowError(Arc::new(e.into()))), } } diff --git a/graph/src/data/store/id.rs b/graph/src/data/store/id.rs index 64be7545621..9726141e2d6 100644 --- a/graph/src/data/store/id.rs +++ b/graph/src/data/store/id.rs @@ -20,9 +20,9 @@ use crate::{ use crate::{ components::store::StoreError, - constraint_violation, data::value::Word, derive::CacheWeight, + internal_error, prelude::QueryExecutionError, runtime::gas::{Gas, GasSizeOf}, }; @@ -367,7 +367,7 @@ impl IdList { ids.push(id); Ok(ids) } - _ => Err(constraint_violation!( + _ => Err(internal_error!( "expected string id, got {}: {}", id.id_type(), id, @@ -381,7 +381,7 @@ impl IdList { ids.push(id); Ok(ids) } - _ => Err(constraint_violation!( + _ => Err(internal_error!( "expected bytes id, got {}: {}", id.id_type(), id, @@ -395,7 +395,7 @@ impl IdList { ids.push(id); Ok(ids) } - _ => Err(constraint_violation!( + _ => Err(internal_error!( "expected int8 id, got {}: {}", id.id_type(), id, @@ -423,7 +423,7 @@ impl IdList { ids.push(Word::from(id)); Ok(ids) } - _ => Err(constraint_violation!( + _ => Err(internal_error!( "expected string id, got {}: 0x{}", id.id_type(), id, @@ -438,7 +438,7 @@ impl IdList { ids.push(scalar::Bytes::from(id)); Ok(ids) } - _ => Err(constraint_violation!( + _ => Err(internal_error!( "expected bytes id, got {}: {}", id.id_type(), id, @@ -452,7 +452,7 @@ impl IdList { ids.push(id); Ok(ids) } - _ => Err(constraint_violation!( + _ => Err(internal_error!( "expected int8 id, got {}: {}", id.id_type(), id, @@ -533,7 +533,7 @@ impl IdList { ids.push(id); Ok(()) } - (list, id) => Err(constraint_violation!( + (list, id) => Err(internal_error!( "expected id of type {}, but got {}[{}]", list.id_type(), id.id_type(), diff --git a/graph/src/util/ogive.rs b/graph/src/util/ogive.rs index 476bfd76ce8..38300e088e6 100644 --- a/graph/src/util/ogive.rs +++ b/graph/src/util/ogive.rs @@ -1,6 +1,6 @@ use std::ops::RangeInclusive; -use crate::{constraint_violation, prelude::StoreError}; +use crate::{internal_error, prelude::StoreError}; /// A helper to deal with cumulative histograms, also known as ogives. This /// implementation is restricted to histograms where each bin has the same @@ -37,9 +37,7 @@ impl Ogive { /// and deduplicated, i.e., they don't have to be in ascending order. pub fn from_equi_histogram(mut points: Vec, total: usize) -> Result { if points.is_empty() { - return Err(constraint_violation!( - "histogram must have at least one point" - )); + return Err(internal_error!("histogram must have at least one point")); } points.sort_unstable(); @@ -124,7 +122,7 @@ impl Ogive { fn inverse(&self, value: i64) -> Result { let value = value as f64; if value < 0.0 { - return Err(constraint_violation!("value {} can not be negative", value)); + return Err(internal_error!("value {} can not be negative", value)); } let idx = (value / self.bin_size) as usize; if idx >= self.points.len() - 1 { @@ -138,7 +136,7 @@ impl Ogive { fn check_in_range(&self, point: i64) -> Result<(), StoreError> { if !self.range.contains(&point) { - return Err(constraint_violation!( + return Err(internal_error!( "point {} is outside of the range [{}, {}]", point, self.range.start(), diff --git a/graphql/src/store/prefetch.rs b/graphql/src/store/prefetch.rs index 33f0b67452b..95f51d51944 100644 --- a/graphql/src/store/prefetch.rs +++ b/graphql/src/store/prefetch.rs @@ -632,7 +632,7 @@ impl<'a> Loader<'a> { let object_type = input_schema .object_or_aggregation(&object_type.name, parent_interval) .ok_or_else(|| { - vec![QueryExecutionError::ConstraintViolation(format!( + vec![QueryExecutionError::InternalError(format!( "the type `{}`(interval {}) is not an object type", object_type.name, parent_interval diff --git a/graphql/src/store/resolver.rs b/graphql/src/store/resolver.rs index d7032740768..8f5eaaccbd1 100644 --- a/graphql/src/store/resolver.rs +++ b/graphql/src/store/resolver.rs @@ -327,7 +327,7 @@ impl Resolver for StoreResolver { None => { let child0_id = child_id(&children[0]); let child1_id = child_id(&children[1]); - QueryExecutionError::ConstraintViolation(format!( + QueryExecutionError::InternalError(format!( "expected only one child for {}.{} but got {}. One child has id {}, another has id {}", object_type.name(), field.name, children.len(), child0_id, child1_id diff --git a/store/postgres/src/block_store.rs b/store/postgres/src/block_store.rs index 762a2642524..84a19b601e5 100644 --- a/store/postgres/src/block_store.rs +++ b/store/postgres/src/block_store.rs @@ -17,7 +17,7 @@ use graph::{ prelude::{error, info, BlockNumber, BlockPtr, Logger, ENV_VARS}, slog::o, }; -use graph::{constraint_violation, prelude::CheapClone}; +use graph::{internal_error, prelude::CheapClone}; use graph::{prelude::StoreError, util::timed_cache::TimedCache}; use crate::{ @@ -55,7 +55,7 @@ pub mod primary { }; use graph::{ blockchain::{BlockHash, ChainIdentifier}, - constraint_violation, + internal_error, prelude::StoreError, }; @@ -92,7 +92,7 @@ pub mod primary { net_version: self.net_version.clone(), genesis_block_hash: BlockHash::try_from(self.genesis_block.as_str()).map_err( |e| { - constraint_violation!( + internal_error!( "the genesis block hash `{}` for chain `{}` is not a valid hash: {}", self.genesis_block, self.name, @@ -366,7 +366,7 @@ impl BlockStore { let pool = self .pools .get(&chain.shard) - .ok_or_else(|| constraint_violation!("there is no pool for shard {}", chain.shard))? + .ok_or_else(|| internal_error!("there is no pool for shard {}", chain.shard))? .clone(); let sender = ChainHeadUpdateSender::new( self.mirror.primary().clone(), @@ -427,7 +427,7 @@ impl BlockStore { pub fn chain_head_block(&self, chain: &str) -> Result, StoreError> { let store = self .store(chain) - .ok_or_else(|| constraint_violation!("unknown network `{}`", chain))?; + .ok_or_else(|| internal_error!("unknown network `{}`", chain))?; store.chain_head_block(chain) } @@ -466,7 +466,7 @@ impl BlockStore { pub fn drop_chain(&self, chain: &str) -> Result<(), StoreError> { let chain_store = self .store(chain) - .ok_or_else(|| constraint_violation!("unknown chain {}", chain))?; + .ok_or_else(|| internal_error!("unknown chain {}", chain))?; // Delete from the primary first since that's where // deployment_schemas has a fk constraint on chains diff --git a/store/postgres/src/chain_store.rs b/store/postgres/src/chain_store.rs index 097aa799eff..0ec347d2bd5 100644 --- a/store/postgres/src/chain_store.rs +++ b/store/postgres/src/chain_store.rs @@ -30,7 +30,7 @@ use graph::prelude::{ BlockPtr, CachedEthereumCall, CancelableError, ChainStore as ChainStoreTrait, Error, EthereumCallCache, StoreError, }; -use graph::{constraint_violation, ensure}; +use graph::{ensure, internal_error}; use self::recent_blocks_cache::RecentBlocksCache; use crate::{ @@ -98,8 +98,8 @@ mod data { update, }; use graph::blockchain::{Block, BlockHash}; - use graph::constraint_violation; use graph::data::store::scalar::Bytes; + use graph::internal_error; use graph::prelude::ethabi::ethereum_types::H160; use graph::prelude::transaction_receipt::LightTransactionReceipt; use graph::prelude::web3::types::H256; @@ -176,7 +176,7 @@ mod data { if bytes.len() == H256::len_bytes() { Ok(H256::from_slice(bytes)) } else { - Err(constraint_violation!( + Err(internal_error!( "invalid H256 value `{}` has {} bytes instead of {}", graph::prelude::hex::encode(bytes), bytes.len(), @@ -1840,7 +1840,7 @@ impl ChainStore { number.map(|number| number.try_into()).transpose().map_err( |e: std::num::TryFromIntError| { - constraint_violation!( + internal_error!( "head block number for {} is {:?} which does not fit into a u32: {}", chain, number, @@ -2792,7 +2792,7 @@ impl EthereumCallCache for ChainStore { let mut resps = Vec::new(); for (id, retval, _) in rows { let idx = ids.iter().position(|i| i.as_ref() == id).ok_or_else(|| { - constraint_violation!( + internal_error!( "get_calls returned a call id that was not requested: {}", hex::encode(id) ) diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/connection_pool.rs index 6ff46649494..abe9109e1d6 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/connection_pool.rs @@ -9,10 +9,10 @@ use diesel::{sql_query, RunQueryDsl}; use diesel_migrations::{EmbeddedMigrations, HarnessWithOutput}; use graph::cheap_clone::CheapClone; use graph::components::store::QueryPermit; -use graph::constraint_violation; use graph::derive::CheapClone; use graph::futures03::future::join_all; use graph::futures03::FutureExt as _; +use graph::internal_error; use graph::prelude::tokio::time::Instant; use graph::prelude::{tokio, MetricsRegistry}; use graph::slog::warn; @@ -1076,7 +1076,7 @@ impl PoolInner { const MSG: &str = "internal error: trying to get fdw connection on a pool that doesn't have any"; error!(logger, "{}", MSG); - return Err(constraint_violation!(MSG)); + return Err(internal_error!(MSG)); } }; Ok(pool) @@ -1501,13 +1501,13 @@ impl PoolCoordinator { self.servers .iter() .find(|server| &server.shard == shard) - .ok_or_else(|| constraint_violation!("unknown shard {shard}")) + .ok_or_else(|| internal_error!("unknown shard {shard}")) } fn primary(&self) -> Result, StoreError> { let map = self.pools.lock().unwrap(); let pool_state = map.get(&*&PRIMARY_SHARD).ok_or_else(|| { - constraint_violation!("internal error: primary shard not found in pool coordinator") + internal_error!("internal error: primary shard not found in pool coordinator") })?; Ok(pool_state.get_unready()) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 2e8807b2aa8..75cc80fb3f6 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -31,8 +31,8 @@ use diesel::{ QueryDsl, RunQueryDsl, }; use graph::{ - constraint_violation, futures03::{future::select_all, FutureExt as _}, + internal_error, prelude::{ info, lazy_static, o, warn, BlockNumber, BlockPtr, CheapClone, Logger, StoreError, ENV_VARS, }, @@ -140,7 +140,7 @@ impl CopyState { Some((src_id, hash, number)) => { let stored_target_block = BlockPtr::from((hash, number)); if stored_target_block != target_block { - return Err(constraint_violation!( + return Err(internal_error!( "CopyState {} for copying {} to {} has incompatible block pointer {} instead of {}", dst.site.id, src.site.deployment, @@ -149,7 +149,7 @@ impl CopyState { target_block)); } if src_id != src.site.id { - return Err(constraint_violation!( + return Err(internal_error!( "CopyState {} for copying {} to {} has incompatible source {} instead of {}", dst.site.id, src.site.deployment, @@ -275,7 +275,7 @@ impl CopyState { // drop_foreign_schema does), see that we do not have // metadata for `src` if crate::deployment::exists(conn, &self.src.site)? { - return Err(constraint_violation!( + return Err(internal_error!( "we think we are copying {}[{}] across shards from {} to {}, but the \ source subgraph is actually in this shard", self.src.site.deployment, @@ -368,7 +368,7 @@ impl TableState { layout .table_for_entity(entity_type) .map_err(|e| { - constraint_violation!( + internal_error!( "invalid {} table {} in CopyState {} (table {}): {}", kind, entity_type, @@ -750,7 +750,7 @@ impl CopyTableWorker { self }) .await - .map_err(|e| constraint_violation!("copy worker for {} panicked: {}", object, e)) + .map_err(|e| internal_error!("copy worker for {} panicked: {}", object, e)) .into() } @@ -944,7 +944,7 @@ impl Connection { let logger = logger.new(o!("dst" => dst.site.namespace.to_string())); if src.site.schema_version != dst.site.schema_version { - return Err(StoreError::ConstraintViolation(format!( + return Err(StoreError::InternalError(format!( "attempted to copy between different schema versions, \ source version is {} but destination version is {}", src.site.schema_version, dst.site.schema_version @@ -981,7 +981,7 @@ impl Connection { F: FnOnce(&mut PgConnection) -> Result, { let Some(conn) = self.conn.as_mut() else { - return Err(constraint_violation!( + return Err(internal_error!( "copy connection has been handed to background task but not returned yet (transaction)" )); }; @@ -1066,13 +1066,11 @@ impl Connection { // Something bad happened. We should have at least one // worker if there are still tables to copy if self.conn.is_none() { - return Err(constraint_violation!( + return Err(internal_error!( "copy connection has been handed to background task but not returned yet (copy_data_internal)" )); } else { - return Err(constraint_violation!( - "no workers left but still tables to copy" - )); + return Err(internal_error!("no workers left but still tables to copy")); } } Ok(()) @@ -1268,9 +1266,7 @@ impl Connection { let dst_site = self.dst.site.cheap_clone(); let Some(conn) = self.conn.as_mut() else { - return Err(constraint_violation!( - "copy connection went missing (copy_data)" - )); + return Err(internal_error!("copy connection went missing (copy_data)")); }; conn.lock(&self.logger, &dst_site)?; diff --git a/store/postgres/src/deployment.rs b/store/postgres/src/deployment.rs index 5d83a563181..d58b26370c8 100644 --- a/store/postgres/src/deployment.rs +++ b/store/postgres/src/deployment.rs @@ -42,7 +42,7 @@ use std::{str::FromStr, sync::Arc}; use crate::connection_pool::ForeignServer; use crate::{block_range::BLOCK_RANGE_COLUMN, primary::Site}; -use graph::constraint_violation; +use graph::internal_error; #[derive(DbEnum, Debug, Clone, Copy)] #[PgType = "text"] @@ -92,7 +92,7 @@ impl TryFrom> for OnSync { None => Ok(OnSync::None), Some("activate") => Ok(OnSync::Activate), Some("replace") => Ok(OnSync::Replace), - _ => Err(constraint_violation!("illegal value for on_sync: {value}")), + _ => Err(internal_error!("illegal value for on_sync: {value}")), } } } @@ -466,7 +466,7 @@ pub fn transact_block( ))), // More than one matching row was found. - _ => Err(StoreError::ConstraintViolation( + _ => Err(StoreError::InternalError( "duplicate deployments in shard".to_owned(), )), } @@ -515,7 +515,7 @@ pub fn forward_block_ptr( }, // More than one matching row was found. - _ => Err(StoreError::ConstraintViolation( + _ => Err(StoreError::InternalError( "duplicate deployments in shard".to_owned(), )), } @@ -612,7 +612,7 @@ pub fn initialize_block_ptr(conn: &mut PgConnection, site: &Site) -> Result<(), .select(d::latest_ethereum_block_hash) .first::>>(conn) .map_err(|e| { - constraint_violation!( + internal_error!( "deployment sgd{} must have been created before calling initialize_block_ptr but we got {}", site.id, e ) @@ -645,10 +645,10 @@ pub fn initialize_block_ptr(conn: &mut PgConnection, site: &Site) -> Result<(), fn convert_to_u32(number: Option, field: &str, subgraph: &str) -> Result { number - .ok_or_else(|| constraint_violation!("missing {} for subgraph `{}`", field, subgraph)) + .ok_or_else(|| internal_error!("missing {} for subgraph `{}`", field, subgraph)) .and_then(|number| { u32::try_from(number).map_err(|_| { - constraint_violation!( + internal_error!( "invalid value {:?} for {} in subgraph {}", number, field, @@ -1330,7 +1330,7 @@ pub fn set_on_sync( match n { 0 => Err(StoreError::DeploymentNotFound(site.to_string())), 1 => Ok(()), - _ => Err(constraint_violation!( + _ => Err(internal_error!( "multiple manifests for deployment {}", site.to_string() )), diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index 92de85f316e..948b6e94410 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -37,8 +37,8 @@ use std::time::{Duration, Instant}; use graph::components::store::EntityCollection; use graph::components::subgraph::{ProofOfIndexingFinisher, ProofOfIndexingVersion}; -use graph::constraint_violation; use graph::data::subgraph::schema::{DeploymentCreate, SubgraphError}; +use graph::internal_error; use graph::prelude::{ anyhow, debug, info, o, warn, web3, AttributeNames, BlockNumber, BlockPtr, CheapClone, DeploymentHash, DeploymentState, Entity, EntityQuery, Error, Logger, QueryExecutionError, @@ -806,7 +806,7 @@ impl DeploymentStore { reorg_threshold: BlockNumber, ) -> Result<(), StoreError> { if history_blocks <= reorg_threshold { - return Err(constraint_violation!( + return Err(internal_error!( "the amount of history to keep for sgd{} can not be set to \ {history_blocks} since it must be more than the \ reorg threshold {reorg_threshold}", @@ -1208,9 +1208,7 @@ impl DeploymentStore { Some(Ok(Ok(()))) => Ok(false), Some(Ok(Err(err))) => Err(StoreError::PruneFailure(err.to_string())), Some(Err(join_err)) => Err(StoreError::PruneFailure(join_err.to_string())), - None => Err(constraint_violation!( - "prune handle is finished but not ready" - )), + None => Err(internal_error!("prune handle is finished but not ready")), } } Some(false) => { @@ -1324,7 +1322,7 @@ impl DeploymentStore { // Sanity check on block numbers let from_number = block_ptr_from.map(|ptr| ptr.number); if from_number <= Some(block_ptr_to.number) { - constraint_violation!( + internal_error!( "truncate must go backwards, but would go from block {} to block {}", from_number.unwrap_or(0), block_ptr_to.number @@ -1350,7 +1348,7 @@ impl DeploymentStore { // Sanity check on block numbers let from_number = block_ptr_from.map(|ptr| ptr.number); if from_number <= Some(block_ptr_to.number) { - constraint_violation!( + internal_error!( "rewind must go backwards, but would go from block {} to block {}", from_number.unwrap_or(0), block_ptr_to.number @@ -1387,7 +1385,7 @@ impl DeploymentStore { let info = self.subgraph_info_with_conn(&mut conn, site.cheap_clone())?; if let Some(graft_block) = info.graft_block { if graft_block > block_ptr_to.number { - return Err(constraint_violation!( + return Err(internal_error!( "Can not revert subgraph `{}` to block {} as it was \ grafted at block {} and reverting past a graft point \ is not possible", diff --git a/store/postgres/src/detail.rs b/store/postgres/src/detail.rs index 807e238f4fe..168af5b5d51 100644 --- a/store/postgres/src/detail.rs +++ b/store/postgres/src/detail.rs @@ -17,7 +17,7 @@ use graph::prelude::{ BigDecimal, BlockPtr, DeploymentHash, StoreError, SubgraphDeploymentEntity, }; use graph::schema::InputSchema; -use graph::{constraint_violation, data::subgraph::status, prelude::web3::types::H256}; +use graph::{data::subgraph::status, internal_error, prelude::web3::types::H256}; use itertools::Itertools; use std::collections::HashMap; use std::convert::TryFrom; @@ -134,7 +134,7 @@ impl TryFrom for SubgraphError { _ => None, }; let subgraph_id = DeploymentHash::new(subgraph_id).map_err(|id| { - StoreError::ConstraintViolation(format!("invalid subgraph id `{}` in fatal error", id)) + StoreError::InternalError(format!("invalid subgraph id `{}` in fatal error", id)) })?; Ok(SubgraphError { subgraph_id, @@ -155,7 +155,7 @@ pub(crate) fn block( match (hash, number) { (Some(hash), Some(number)) => { let number = number.to_i32().ok_or_else(|| { - constraint_violation!( + internal_error!( "the block number {} for {} in {} is not representable as an i32", number, name, @@ -168,7 +168,7 @@ pub(crate) fn block( ))) } (None, None) => Ok(None), - (hash, number) => Err(constraint_violation!( + (hash, number) => Err(internal_error!( "the hash and number \ of a block pointer must either both be null or both have a \ value, but for `{}` the hash of {} is `{:?}` and the number is `{:?}`", @@ -208,7 +208,7 @@ pub(crate) fn info_from_details( let site = sites .iter() .find(|site| site.deployment.as_str() == deployment) - .ok_or_else(|| constraint_violation!("missing site for subgraph `{}`", deployment))?; + .ok_or_else(|| internal_error!("missing site for subgraph `{}`", deployment))?; // This needs to be filled in later since it lives in a // different shard @@ -227,7 +227,7 @@ pub(crate) fn info_from_details( latest_block, }; let entity_count = entity_count.to_u64().ok_or_else(|| { - constraint_violation!( + internal_error!( "the entityCount for {} is not representable as a u64", deployment ) @@ -438,13 +438,13 @@ impl StoredDeploymentEntity { .graft_base .map(DeploymentHash::new) .transpose() - .map_err(|b| constraint_violation!("invalid graft base `{}`", b))?; + .map_err(|b| internal_error!("invalid graft base `{}`", b))?; let debug_fork = detail .debug_fork .map(DeploymentHash::new) .transpose() - .map_err(|b| constraint_violation!("invalid debug fork `{}`", b))?; + .map_err(|b| internal_error!("invalid debug fork `{}`", b))?; Ok(SubgraphDeploymentEntity { manifest: manifest.as_manifest(schema), diff --git a/store/postgres/src/dynds/mod.rs b/store/postgres/src/dynds/mod.rs index 09385fb8a7d..27ab4e78a10 100644 --- a/store/postgres/src/dynds/mod.rs +++ b/store/postgres/src/dynds/mod.rs @@ -7,8 +7,8 @@ use crate::primary::Site; use diesel::PgConnection; use graph::{ components::store::{write, StoredDynamicDataSource}, - constraint_violation, data_source::CausalityRegion, + internal_error, prelude::{BlockNumber, StoreError}, }; @@ -60,7 +60,7 @@ pub(crate) fn update_offchain_status( true => { DataSourcesTable::new(site.namespace.clone()).update_offchain_status(conn, data_sources) } - false => Err(constraint_violation!( + false => Err(internal_error!( "shared schema does not support data source offchain_found", )), } diff --git a/store/postgres/src/dynds/private.rs b/store/postgres/src/dynds/private.rs index 243a7dc5a57..d4d21ad39c1 100644 --- a/store/postgres/src/dynds/private.rs +++ b/store/postgres/src/dynds/private.rs @@ -12,8 +12,8 @@ use diesel::{ use graph::{ anyhow::{anyhow, Context}, components::store::{write, StoredDynamicDataSource}, - constraint_violation, data_source::CausalityRegion, + internal_error, prelude::{serde_json, BlockNumber, StoreError}, }; @@ -164,7 +164,7 @@ impl DataSourcesTable { // Nested offchain data sources might not pass this check, as their `creation_block` // will be their parent's `creation_block`, not necessarily `block`. if causality_region == &CausalityRegion::ONCHAIN && creation_block != &Some(block) { - return Err(constraint_violation!( + return Err(internal_error!( "mismatching creation blocks `{:?}` and `{}`", creation_block, block @@ -293,7 +293,7 @@ impl DataSourcesTable { .execute(conn)?; if count > 1 { - return Err(constraint_violation!( + return Err(internal_error!( "expected to remove at most one offchain data source but would remove {}, causality region: {}", count, ds.causality_region diff --git a/store/postgres/src/dynds/shared.rs b/store/postgres/src/dynds/shared.rs index 34615a720e3..5a2af316fcf 100644 --- a/store/postgres/src/dynds/shared.rs +++ b/store/postgres/src/dynds/shared.rs @@ -11,9 +11,9 @@ use diesel::{insert_into, pg::PgConnection}; use graph::{ components::store::{write, StoredDynamicDataSource}, - constraint_violation, data::store::scalar::ToPrimitive, data_source::CausalityRegion, + internal_error, prelude::{serde_json, BigDecimal, BlockNumber, DeploymentHash, StoreError}, }; @@ -62,7 +62,7 @@ pub(super) fn load( let mut data_sources: Vec = Vec::new(); for (vid, name, context, address, creation_block) in dds.into_iter() { if address.len() != 20 { - return Err(constraint_violation!( + return Err(internal_error!( "Data source address `0x{:?}` for dynamic data source {} should be 20 bytes long but is {} bytes long", address, vid, address.len() @@ -72,7 +72,7 @@ pub(super) fn load( let manifest_idx = manifest_idx_and_name .iter() .find(|(_, manifest_name)| manifest_name == &name) - .ok_or_else(|| constraint_violation!("data source name {} not found", name))? + .ok_or_else(|| internal_error!("data source name {} not found", name))? .0; let creation_block = creation_block.to_i32(); let data_source = StoredDynamicDataSource { @@ -88,7 +88,7 @@ pub(super) fn load( }; if data_sources.last().and_then(|d| d.creation_block) > data_source.creation_block { - return Err(StoreError::ConstraintViolation( + return Err(StoreError::InternalError( "data sources not ordered by creation block".to_string(), )); } @@ -126,7 +126,7 @@ pub(super) fn insert( } = ds; if causality_region != &CausalityRegion::ONCHAIN { - return Err(constraint_violation!( + return Err(internal_error!( "using shared data source schema with file data sources" )); } @@ -134,17 +134,13 @@ pub(super) fn insert( let address = match param { Some(param) => param, None => { - return Err(constraint_violation!( - "dynamic data sources must have an address", - )); + return Err(internal_error!("dynamic data sources must have an address",)); } }; let name = manifest_idx_and_name .iter() .find(|(idx, _)| *idx == ds.manifest_idx) - .ok_or_else(|| { - constraint_violation!("manifest idx {} not found", ds.manifest_idx) - })? + .ok_or_else(|| internal_error!("manifest idx {} not found", ds.manifest_idx))? .1 .clone(); Ok(( diff --git a/store/postgres/src/fork.rs b/store/postgres/src/fork.rs index 4bb064f87c6..40457fb1739 100644 --- a/store/postgres/src/fork.rs +++ b/store/postgres/src/fork.rs @@ -7,7 +7,7 @@ use std::{ use graph::{ block_on, components::store::SubgraphFork as SubgraphForkTrait, - constraint_violation, + internal_error, prelude::{ anyhow, info, r::Value as RValue, reqwest, serde_json, DeploymentHash, Entity, Logger, Serialize, StoreError, Value, ValueType, @@ -69,9 +69,7 @@ impl SubgraphForkTrait for SubgraphFork { let entity_type = self.schema.entity_type(&entity_type_name)?; let fields = &entity_type .object_type() - .map_err(|_| { - constraint_violation!("no object type called `{}` found", entity_type_name) - })? + .map_err(|_| internal_error!("no object type called `{}` found", entity_type_name))? .fields; let query = Query { diff --git a/store/postgres/src/primary.rs b/store/postgres/src/primary.rs index 39df898ba32..2d4b2624289 100644 --- a/store/postgres/src/primary.rs +++ b/store/postgres/src/primary.rs @@ -31,12 +31,12 @@ use diesel::{ }; use graph::{ components::store::DeploymentLocator, - constraint_violation, data::{ store::scalar::ToPrimitive, subgraph::{status, DeploymentFeatures}, }, derive::CheapClone, + internal_error, prelude::{ anyhow, chrono::{DateTime, Utc}, @@ -384,9 +384,9 @@ impl TryFrom for Site { fn try_from(schema: Schema) -> Result { let deployment = DeploymentHash::new(&schema.subgraph) - .map_err(|s| constraint_violation!("Invalid deployment id {}", s))?; + .map_err(|s| internal_error!("Invalid deployment id {}", s))?; let namespace = Namespace::new(schema.name.clone()).map_err(|nsp| { - constraint_violation!( + internal_error!( "Invalid schema name {} for deployment {}", nsp, &schema.subgraph @@ -450,8 +450,8 @@ mod queries { use diesel::sql_types::Text; use graph::prelude::NodeId; use graph::{ - constraint_violation, data::subgraph::status, + internal_error, prelude::{DeploymentHash, StoreError, SubgraphName}, }; use std::{collections::HashMap, convert::TryFrom, convert::TryInto}; @@ -510,7 +510,7 @@ mod queries { .optional()?; match id { Some(id) => DeploymentHash::new(id) - .map_err(|id| constraint_violation!("illegal deployment id: {}", id)), + .map_err(|id| internal_error!("illegal deployment id: {}", id)), None => Err(StoreError::DeploymentNotFound(name.to_string())), } } @@ -673,7 +673,7 @@ mod queries { .optional()? .map(|node| { NodeId::new(&node).map_err(|()| { - constraint_violation!( + internal_error!( "invalid node id `{}` in assignment for `{}`", node, site.deployment @@ -698,7 +698,7 @@ mod queries { .optional()? .map(|(node, ts)| { let node_id = NodeId::new(&node).map_err(|()| { - constraint_violation!( + internal_error!( "invalid node id `{}` in assignment for `{}`", node, site.deployment @@ -837,7 +837,7 @@ impl<'a> Connection<'a> { DeploymentHash::new(hash) .map(|hash| AssignmentChange::removed(DeploymentLocator::new(id.into(), hash))) .map_err(|id| { - StoreError::ConstraintViolation(format!( + StoreError::InternalError(format!( "invalid id `{}` for deployment assignment", id )) @@ -1318,7 +1318,7 @@ impl<'a> Connection<'a> { .cloned() .ok_or_else(|| anyhow!("failed to read schema name for {} back", deployment))?; let namespace = Namespace::new(namespace).map_err(|name| { - constraint_violation!("Generated database schema name {} is invalid", name) + internal_error!("Generated database schema name {} is invalid", name) })?; Ok(Site { @@ -1522,7 +1522,7 @@ impl<'a> Connection<'a> { .transpose() // This can't really happen since we filtered by valid NodeId's .map_err(|node| { - constraint_violation!("database has assignment for illegal node name {:?}", node) + internal_error!("database has assignment for illegal node name {:?}", node) }) } @@ -1559,7 +1559,7 @@ impl<'a> Connection<'a> { .map(|(shard, _)| Shard::new(shard.to_string())) .transpose() // This can't really happen since we filtered by valid shards - .map_err(|e| constraint_violation!("database has illegal shard name: {}", e)) + .map_err(|e| internal_error!("database has illegal shard name: {}", e)) } #[cfg(debug_assertions)] @@ -1729,10 +1729,7 @@ impl<'a> Connection<'a> { let ts = chrono::offset::Local::now() .checked_sub_signed(duration) .ok_or_else(|| { - StoreError::ConstraintViolation(format!( - "duration {} is too large", - duration - )) + StoreError::InternalError(format!("duration {} is too large", duration)) })?; Ok(u::table .filter(u::removed_at.is_null()) diff --git a/store/postgres/src/relational.rs b/store/postgres/src/relational.rs index fb181b7e74d..6bf8759a202 100644 --- a/store/postgres/src/relational.rs +++ b/store/postgres/src/relational.rs @@ -32,11 +32,11 @@ use graph::blockchain::block_stream::{EntityOperationKind, EntitySourceOperation use graph::blockchain::BlockTime; use graph::cheap_clone::CheapClone; use graph::components::store::write::{RowGroup, WriteChunk}; -use graph::constraint_violation; use graph::data::graphql::TypeExt as _; use graph::data::query::Trace; use graph::data::value::Word; use graph::data_source::CausalityRegion; +use graph::internal_error; use graph::prelude::{q, EntityQuery, StopwatchMetrics, ENV_VARS}; use graph::schema::{ EntityKey, EntityType, Field, FulltextConfig, FulltextDefinition, InputSchema, @@ -503,7 +503,7 @@ impl Layout { let key = entity_type.key_in(entity_data.id(), CausalityRegion::from_entity(&entity_data)); if entities.contains_key(&key) { - return Err(constraint_violation!( + return Err(internal_error!( "duplicate entity {}[{}] in result set, block = {}", key.entity_type, key.entity_id, @@ -910,7 +910,7 @@ impl Layout { .map(|id| id.to_string()) .collect::>() .join(", "); - return Err(constraint_violation!( + return Err(internal_error!( "entities of type `{}` can not be updated since they are immutable. Entity ids are [{}]", group.entity_type, ids @@ -968,7 +968,7 @@ impl Layout { let table = self.table_for_entity(&group.entity_type)?; if table.immutable { - return Err(constraint_violation!( + return Err(internal_error!( "entities of type `{}` can not be deleted since they are immutable. Entity ids are [{}]", table.object, group.ids().join(", ") )); @@ -1138,11 +1138,11 @@ impl Layout { let source_type = mapping.source_type(schema); let source_table = tables .get(&source_type) - .ok_or_else(|| constraint_violation!("Table for {source_type} is missing"))?; + .ok_or_else(|| internal_error!("Table for {source_type} is missing"))?; let agg_type = mapping.agg_type(schema); let agg_table = tables .get(&agg_type) - .ok_or_else(|| constraint_violation!("Table for {agg_type} is missing"))?; + .ok_or_else(|| internal_error!("Table for {agg_type} is missing"))?; let aggregation = mapping.aggregation(schema); let rollup = Rollup::new( mapping.interval, @@ -1612,9 +1612,9 @@ impl Table { ) -> Result { SqlName::check_valid_identifier(defn.as_str(), "object")?; - let object_type = defn.object_type().map_err(|_| { - constraint_violation!("The type `{}` is not an object type", defn.as_str()) - })?; + let object_type = defn + .object_type() + .map_err(|_| internal_error!("The type `{}` is not an object type", defn.as_str()))?; let table_name = SqlName::from(defn.as_str()); let columns = object_type diff --git a/store/postgres/src/relational/rollup.rs b/store/postgres/src/relational/rollup.rs index b9177a0052b..9a9830f6b5a 100644 --- a/store/postgres/src/relational/rollup.rs +++ b/store/postgres/src/relational/rollup.rs @@ -63,8 +63,8 @@ use diesel::{sql_query, PgConnection, RunQueryDsl as _}; use diesel::sql_types::{Integer, Nullable, Timestamptz}; use graph::blockchain::BlockTime; use graph::components::store::{BlockNumber, StoreError}; -use graph::constraint_violation; use graph::data::store::IdType; +use graph::internal_error; use graph::schema::{ Aggregate, AggregateFn, Aggregation, AggregationInterval, ExprVisitor, VisitExpr, }; @@ -111,7 +111,7 @@ fn rewrite<'a>(table: &'a Table, expr: &str) -> Result<(String, Vec<&'a str>), S fn not_supported(&mut self, msg: String) { if self.error.is_none() { - self.error = Some(constraint_violation!( + self.error = Some(internal_error!( "Schema validation should have found expression errors: {}", msg )); diff --git a/store/postgres/src/relational_queries.rs b/store/postgres/src/relational_queries.rs index 028f6044c34..533990c42b9 100644 --- a/store/postgres/src/relational_queries.rs +++ b/store/postgres/src/relational_queries.rs @@ -94,9 +94,9 @@ impl From for diesel::result::Error { } } -// Similar to graph::prelude::constraint_violation, but returns a Diesel +// Similar to graph::prelude::internal_error, but returns a Diesel // error for use in the guts of query generation -macro_rules! constraint_violation { +macro_rules! internal_error { ($msg:expr) => {{ diesel::result::Error::QueryBuilderError(anyhow!("{}", $msg).into()) }}; @@ -431,7 +431,7 @@ pub fn parse_id(id_type: IdType, json: serde_json::Value) -> Result SqlValue<'a> { String(s) => match column_type { ColumnType::String|ColumnType::Enum(_)|ColumnType::TSVector(_) => S::Text(s), ColumnType::Int8 => S::Int8(s.parse::().map_err(|e| { - constraint_violation!("failed to convert `{}` to an Int8: {}", s, e.to_string()) + internal_error!("failed to convert `{}` to an Int8: {}", s, e.to_string()) })?), ColumnType::Bytes => { let bytes = scalar::Bytes::from_str(s) @@ -913,7 +913,7 @@ impl PrefixType { match column.column_type() { ColumnType::String => Ok(PrefixType::String), ColumnType::Bytes => Ok(PrefixType::Bytes), - _ => Err(constraint_violation!( + _ => Err(internal_error!( "cannot setup prefix comparison for column {} of type {}", column, column.column_type().sql_type() @@ -1086,7 +1086,7 @@ impl<'a> QueryFragment for PrefixComparison<'a> { // For `op` either `<=` or `>=`, we can write (using '<=' as an example) // uv <= st <=> u < s || u = s && uv <= st let large = self.kind.is_large(&self.value).map_err(|()| { - constraint_violation!( + internal_error!( "column {} has type {} and can't be compared with the value `{}` using {}", self.column, self.column.column_type().sql_type(), @@ -2237,7 +2237,7 @@ impl<'a> InsertRow<'a> { .filter_map(|field| row.entity.get(field)) .map(|value| match value { Value::String(s) => Ok(s), - _ => Err(constraint_violation!( + _ => Err(internal_error!( "fulltext fields must be strings but got {:?}", value )), @@ -3178,7 +3178,7 @@ impl<'a> FilterCollection<'a> { if windows.iter().map(FilterWindow::parent_type).all_equal() { Ok(Some(windows[0].parent_type()?)) } else { - Err(graph::constraint_violation!( + Err(graph::internal_error!( "all implementors of an interface must use the same type for their `id`" )) } @@ -3448,7 +3448,7 @@ impl<'a> SortKey<'a> { true => ( parent_table.primary_key(), child_table.column_for_field(&join_attribute).map_err(|_| { - graph::constraint_violation!( + graph::internal_error!( "Column for a join attribute `{}` of `{}` table not found", join_attribute, child_table.name() @@ -3459,7 +3459,7 @@ impl<'a> SortKey<'a> { parent_table .column_for_field(&join_attribute) .map_err(|_| { - graph::constraint_violation!( + graph::internal_error!( "Column for a join attribute `{}` of `{}` table not found", join_attribute, parent_table.name() @@ -3535,7 +3535,7 @@ impl<'a> SortKey<'a> { child_table .column_for_field(&child.join_attribute) .map_err(|_| { - graph::constraint_violation!( + graph::internal_error!( "Column for a join attribute `{}` of `{}` table not found", child.join_attribute, child_table.name() @@ -3546,7 +3546,7 @@ impl<'a> SortKey<'a> { parent_table .column_for_field(&child.join_attribute) .map_err(|_| { - graph::constraint_violation!( + graph::internal_error!( "Column for a join attribute `{}` of `{}` table not found", child.join_attribute, parent_table.name() @@ -3586,7 +3586,7 @@ impl<'a> SortKey<'a> { direction: SortDirection, ) -> Result, QueryExecutionError> { if entity_types.is_empty() { - return Err(QueryExecutionError::ConstraintViolation( + return Err(QueryExecutionError::InternalError( "Cannot order by child interface with no implementing entity types".to_string(), )); } @@ -3744,7 +3744,7 @@ impl<'a> SortKey<'a> { direction: _, } => { if column.is_primary_key() { - return Err(constraint_violation!("SortKey::Key never uses 'id'")); + return Err(internal_error!("SortKey::Key never uses 'id'")); } match select_statement_level { @@ -3764,7 +3764,7 @@ impl<'a> SortKey<'a> { match nested { ChildKey::Single(child) => { if child.sort_by_column.is_primary_key() { - return Err(constraint_violation!("SortKey::Key never uses 'id'")); + return Err(internal_error!("SortKey::Key never uses 'id'")); } match select_statement_level { @@ -3781,7 +3781,7 @@ impl<'a> SortKey<'a> { ChildKey::Many(_, children) => { for child in children.iter() { if child.sort_by_column.is_primary_key() { - return Err(constraint_violation!("SortKey::Key never uses 'id'")); + return Err(internal_error!("SortKey::Key never uses 'id'")); } out.push_sql(", "); child.sort_by_column.walk_ast(out.reborrow())?; @@ -3930,9 +3930,7 @@ impl<'a> SortKey<'a> { ) -> QueryResult<()> { if column.is_primary_key() { // This shouldn't happen since we'd use SortKey::IdAsc/Desc - return Err(constraint_violation!( - "sort_expr called with primary key column" - )); + return Err(internal_error!("sort_expr called with primary key column")); } fn push_prefix(prefix: Option<&str>, out: &mut AstPass) { @@ -3990,14 +3988,14 @@ impl<'a> SortKey<'a> { let sort_by = &child.sort_by_column; if sort_by.is_primary_key() { // This shouldn't happen since we'd use SortKey::ManyIdAsc/ManyDesc - return Err(constraint_violation!( + return Err(internal_error!( "multi_sort_expr called with primary key column" )); } match sort_by.column_type() { ColumnType::TSVector(_) => { - return Err(constraint_violation!("TSVector is not supported")); + return Err(internal_error!("TSVector is not supported")); } _ => {} } @@ -4565,7 +4563,7 @@ impl<'a> ClampRangeQuery<'a> { block: BlockNumber, ) -> Result { if table.immutable { - Err(graph::constraint_violation!( + Err(graph::internal_error!( "immutable entities can not be deleted or updated (table `{}`)", table.qualified_name )) @@ -4674,7 +4672,7 @@ pub struct RevertClampQuery<'a> { impl<'a> RevertClampQuery<'a> { pub(crate) fn new(table: &'a Table, block: BlockNumber) -> Result { if table.immutable { - Err(graph::constraint_violation!( + Err(graph::internal_error!( "can not revert clamping in immutable table `{}`", table.qualified_name )) @@ -4894,7 +4892,7 @@ impl<'a> QueryFragment for CopyEntityBatchQuery<'a> { out.push_sql(", 0"); } (true, false) => { - return Err(constraint_violation!( + return Err(internal_error!( "can not copy entity type {} to {} because the src has a causality region but the dst does not", self.src.object.as_str(), self.dst.object.as_str() diff --git a/store/postgres/src/store.rs b/store/postgres/src/store.rs index 7eb428a5058..bda5b2da136 100644 --- a/store/postgres/src/store.rs +++ b/store/postgres/src/store.rs @@ -9,8 +9,8 @@ use graph::{ StatusStore, Store as StoreTrait, }, }, - constraint_violation, data::subgraph::status, + internal_error, prelude::{ web3::types::Address, BlockNumber, BlockPtr, CheapClone, DeploymentHash, PartialBlockPtr, QueryExecutionError, StoreError, @@ -87,7 +87,7 @@ impl QueryStoreManager for Store { .and_then(|x| x)?; let chain_store = self.block_store.chain_store(&site.network).ok_or_else(|| { - constraint_violation!( + internal_error!( "Subgraphs index a known network, but {} indexes `{}` which we do not know about. This is most likely a configuration error.", site.deployment, site.network diff --git a/store/postgres/src/subgraph_store.rs b/store/postgres/src/subgraph_store.rs index 339c66cee3f..5564a7d1726 100644 --- a/store/postgres/src/subgraph_store.rs +++ b/store/postgres/src/subgraph_store.rs @@ -21,9 +21,9 @@ use graph::{ PruneReporter, PruneRequest, SubgraphFork, }, }, - constraint_violation, data::query::QueryTarget, data::subgraph::{schema::DeploymentCreate, status, DeploymentFeatures}, + internal_error, prelude::{ anyhow, lazy_static, o, web3::types::Address, ApiVersion, BlockNumber, BlockPtr, ChainStore, DeploymentHash, EntityOperation, Logger, MetricsRegistry, NodeId, @@ -443,7 +443,7 @@ impl SubgraphStoreInner { fn evict(&self, id: &DeploymentHash) -> Result<(), StoreError> { if let Some((site, _)) = self.sites.remove(id) { let store = self.stores.get(&site.shard).ok_or_else(|| { - constraint_violation!( + internal_error!( "shard {} for deployment sgd{} not found when evicting", site.shard, site.id @@ -540,9 +540,7 @@ impl SubgraphStoreInner { let placement = self .placer .place(name.as_str(), network_name) - .map_err(|msg| { - constraint_violation!("illegal indexer name in deployment rule: {}", msg) - })?; + .map_err(|msg| internal_error!("illegal indexer name in deployment rule: {}", msg))?; match placement { None => Ok((PRIMARY_SHARD.clone(), default_node)), @@ -985,7 +983,7 @@ impl SubgraphStoreInner { pub(crate) fn version_info(&self, version: &str) -> Result { if let Some((deployment_id, created_at)) = self.mirror.version_info(version)? { let id = DeploymentHash::new(deployment_id.clone()) - .map_err(|id| constraint_violation!("illegal deployment id {}", id))?; + .map_err(|id| internal_error!("illegal deployment id {}", id))?; let (store, site) = self.store(&id)?; let statuses = store.deployment_statuses(&[site.clone()])?; let status = statuses @@ -994,7 +992,7 @@ impl SubgraphStoreInner { let chain = status .chains .first() - .ok_or_else(|| constraint_violation!("no chain info for {}", deployment_id))?; + .ok_or_else(|| internal_error!("no chain info for {}", deployment_id))?; let latest_ethereum_block_number = chain.latest_block.as_ref().map(|block| block.number()); let subgraph_info = store.subgraph_info(site.cheap_clone())?; @@ -1601,7 +1599,7 @@ impl SubgraphStoreTrait for SubgraphStore { fn active_locator(&self, hash: &str) -> Result, StoreError> { let sites = self.mirror.find_sites(&[hash.to_string()], true)?; if sites.len() > 1 { - return Err(constraint_violation!( + return Err(internal_error!( "There are {} active deployments for {hash}, there should only be one", sites.len() )); diff --git a/store/postgres/src/writable.rs b/store/postgres/src/writable.rs index 3d85042d07c..628b1741e24 100644 --- a/store/postgres/src/writable.rs +++ b/store/postgres/src/writable.rs @@ -9,10 +9,10 @@ use async_trait::async_trait; use graph::blockchain::block_stream::{EntitySourceOperation, FirehoseCursor}; use graph::blockchain::BlockTime; use graph::components::store::{Batch, DeploymentCursorTracker, DerivedEntityQuery, ReadStore}; -use graph::constraint_violation; use graph::data::store::IdList; use graph::data::subgraph::schema; use graph::data_source::CausalityRegion; +use graph::internal_error; use graph::prelude::{ BlockNumber, CacheWeight, Entity, MetricsRegistry, SubgraphDeploymentEntity, SubgraphStore as _, BLOCK_NUMBER_MAX, @@ -133,7 +133,7 @@ impl LastRollupTracker { *last = LastRollup::Some(block_time); } (LastRollup::Some(_) | LastRollup::Unknown, None) => { - constraint_violation!("block time cannot be unset"); + internal_error!("block time cannot be unset"); } } @@ -684,8 +684,8 @@ impl Request { let batch = batch.read().unwrap(); if let Some(err) = &batch.error { // This can happen when appending to the batch failed - // because of a constraint violation. Returning an `Err` - // here will poison and shut down the queue + // because of an internal error. Returning an `Err` here + // will poison and shut down the queue return Err(err.clone()); } let res = store @@ -1342,7 +1342,7 @@ impl Writer { // If there was an error, report that instead of a naked 'writer not running' queue.check_err()?; if join_handle.is_finished() { - Err(constraint_violation!( + Err(internal_error!( "Subgraph writer for {} is not running", queue.store.site )) @@ -1679,7 +1679,7 @@ impl WritableStoreTrait for WritableStore { if let Some(block_ptr) = self.block_ptr.lock().unwrap().as_ref() { if block_ptr_to.number <= block_ptr.number { - return Err(constraint_violation!( + return Err(internal_error!( "transact_block_operations called for block {} but its head is already at {}", block_ptr_to, block_ptr From 7d8ad8a3ada9e8b168b7710082dbdca696aa9ec0 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 11 Apr 2025 19:27:02 -0700 Subject: [PATCH 100/160] graph: Detect unique constraint violations and mark as deterministic --- graph/src/components/store/err.rs | 13 ++++++++++++- store/test-store/tests/postgres/writable.rs | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/graph/src/components/store/err.rs b/graph/src/components/store/err.rs index 4c093d5f4d6..264c1b80df2 100644 --- a/graph/src/components/store/err.rs +++ b/graph/src/components/store/err.rs @@ -70,6 +70,8 @@ pub enum StoreError { WriteFailure(String, BlockNumber, String, String), #[error("database query timed out")] StatementTimeout, + #[error("database constraint violated: {0}")] + ConstraintViolation(String), } // Convenience to report an internal error @@ -127,6 +129,7 @@ impl Clone for StoreError { Self::WriteFailure(arg0.clone(), arg1.clone(), arg2.clone(), arg3.clone()) } Self::StatementTimeout => Self::StatementTimeout, + Self::ConstraintViolation(arg0) => Self::ConstraintViolation(arg0.clone()), } } } @@ -135,6 +138,7 @@ impl StoreError { pub fn from_diesel_error(e: &DieselError) -> Option { const CONN_CLOSE: &str = "server closed the connection unexpectedly"; const STMT_TIMEOUT: &str = "canceling statement due to statement timeout"; + const UNIQUE_CONSTR: &str = "duplicate key value violates unique constraint"; let DieselError::DatabaseError(_, info) = e else { return None; }; @@ -146,6 +150,12 @@ impl StoreError { Some(StoreError::DatabaseUnavailable) } else if info.message().contains(STMT_TIMEOUT) { Some(StoreError::StatementTimeout) + } else if info.message().contains(UNIQUE_CONSTR) { + let msg = match info.details() { + Some(details) => format!("{}: {}", info.message(), details.replace('\n', " ")), + None => info.message().to_string(), + }; + Some(StoreError::ConstraintViolation(msg)) } else { None } @@ -174,7 +184,8 @@ impl StoreError { | UnknownTable(_) | UnknownAttribute(_, _) | InvalidIdentifier(_) - | UnsupportedFilter(_, _) => true, + | UnsupportedFilter(_, _) + | ConstraintViolation(_) => true, // non-deterministic errors Unknown(_) diff --git a/store/test-store/tests/postgres/writable.rs b/store/test-store/tests/postgres/writable.rs index 2e3e138d567..d83ec8cbf48 100644 --- a/store/test-store/tests/postgres/writable.rs +++ b/store/test-store/tests/postgres/writable.rs @@ -449,6 +449,7 @@ fn read_range_pool_created_test() { let pool_created_type = TEST_SUBGRAPH_SCHEMA.entity_type("PoolCreated").unwrap(); let entity_types = vec![pool_created_type.clone()]; + let mut last_op: Option = None; for count in (1..=2).map(|x| x as i64) { let id = if count == 1 { "0xff80818283848586" @@ -478,6 +479,7 @@ fn read_range_pool_created_test() { data, }; + last_op = Some(op.clone()); transact_entity_operations( &subgraph_store, &deployment, @@ -500,5 +502,21 @@ fn read_range_pool_created_test() { let a = result_entities[index as usize].clone(); assert_eq!(a, format!("{:?}", en)); } + + // Make sure we get a constraint violation + let op = last_op.take().unwrap(); + + transact_entity_operations(&subgraph_store, &deployment, block_pointer(3), vec![op]) + .await + .unwrap(); + let res = writable.flush().await; + let exp = "duplicate key value violates unique constraint \"pool_created_pkey\": Key (vid)=(2) already exists."; + match res { + Ok(_) => panic!("Expected error, but got success"), + Err(StoreError::ConstraintViolation(msg)) => { + assert_eq!(msg, exp); + } + Err(e) => panic!("Expected constraint violation, but got {:?}", e), + } }) } From be6c9402dbd2eb14898fe948a5a7bfd633f1c42e Mon Sep 17 00:00:00 2001 From: Shiyas Mohammed <83513144+shiyasmohd@users.noreply.github.com> Date: Thu, 17 Apr 2025 13:40:17 +0530 Subject: [PATCH 101/160] core,node,server: display current node when reassigning subgraph --- .../src/commands/deployment/reassign.rs | 21 +++++++++++++------ .../manager/commands/deployment/reassign.rs | 21 +++++++++++++++---- .../resolvers/deployment_mutation/reassign.rs | 3 +++ 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/core/graphman/src/commands/deployment/reassign.rs b/core/graphman/src/commands/deployment/reassign.rs index 5d3d633e082..3950c8d192f 100644 --- a/core/graphman/src/commands/deployment/reassign.rs +++ b/core/graphman/src/commands/deployment/reassign.rs @@ -24,6 +24,18 @@ impl Deployment { pub fn locator(&self) -> &DeploymentLocator { &self.locator } + + pub fn assigned_node( + &self, + primary_pool: ConnectionPool, + ) -> Result, GraphmanError> { + let primary_conn = primary_pool.get().map_err(GraphmanError::from)?; + let mut catalog_conn = catalog::Connection::new(primary_conn); + let node = catalog_conn + .assigned_node(&self.site) + .map_err(GraphmanError::from)?; + Ok(node) + } } #[derive(Debug, Error)] @@ -70,16 +82,13 @@ pub fn reassign_deployment( notification_sender: Arc, deployment: &Deployment, node: &NodeId, + curr_node: Option, ) -> Result { let primary_conn = primary_pool.get().map_err(GraphmanError::from)?; let mut catalog_conn = catalog::Connection::new(primary_conn); - - let changes: Vec = match catalog_conn - .assigned_node(&deployment.site) - .map_err(GraphmanError::from)? - { + let changes: Vec = match &curr_node { Some(curr) => { - if &curr == node { + if &curr == &node { vec![] } else { catalog_conn diff --git a/node/src/manager/commands/deployment/reassign.rs b/node/src/manager/commands/deployment/reassign.rs index 60528f16206..bd87ee22f31 100644 --- a/node/src/manager/commands/deployment/reassign.rs +++ b/node/src/manager/commands/deployment/reassign.rs @@ -16,11 +16,24 @@ pub fn run( node: &NodeId, ) -> Result<()> { let deployment = load_deployment(primary_pool.clone(), &deployment)?; + let curr_node = deployment.assigned_node(primary_pool.clone())?; + let reassign_msg = match &curr_node { + Some(curr_node) => format!( + "Reassigning deployment {} (was {})", + deployment.locator(), + curr_node + ), + None => format!("Reassigning deployment {}", deployment.locator()), + }; + println!("{}", reassign_msg); - println!("Reassigning deployment {}", deployment.locator()); - - let reassign_result = - reassign_deployment(primary_pool, notification_sender, &deployment, node)?; + let reassign_result = reassign_deployment( + primary_pool, + notification_sender, + &deployment, + node, + curr_node, + )?; match reassign_result { ReassignResult::EmptyResponse => { diff --git a/server/graphman/src/resolvers/deployment_mutation/reassign.rs b/server/graphman/src/resolvers/deployment_mutation/reassign.rs index 3887d67032a..026ef94ed9f 100644 --- a/server/graphman/src/resolvers/deployment_mutation/reassign.rs +++ b/server/graphman/src/resolvers/deployment_mutation/reassign.rs @@ -14,11 +14,14 @@ pub fn run( node: &NodeId, ) -> Result { let deployment = load_deployment(ctx.primary_pool.clone(), deployment)?; + let curr_node = deployment.assigned_node(ctx.primary_pool.clone())?; + let reassign_result = reassign_deployment( ctx.primary_pool.clone(), ctx.notification_sender.clone(), &deployment, &node, + curr_node, )?; Ok(reassign_result) } From 703acae366fb5e80ae4eb5115d8ac6a4bfb3fb6f Mon Sep 17 00:00:00 2001 From: Shiyas Mohammed <83513144+shiyasmohd@users.noreply.github.com> Date: Thu, 17 Apr 2025 18:21:23 +0530 Subject: [PATCH 102/160] graph: enhance err msg for source subgraph manifest resolution in composition --- graph/src/data_source/subgraph.rs | 30 ++++++++++++++----- .../tests/chain/ethereum/manifest.rs | 3 +- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/graph/src/data_source/subgraph.rs b/graph/src/data_source/subgraph.rs index d8ef847aee4..87b44e66174 100644 --- a/graph/src/data_source/subgraph.rs +++ b/graph/src/data_source/subgraph.rs @@ -262,20 +262,32 @@ impl UnresolvedDataSource { let source_raw = resolver .cat(logger, &self.source.address.to_ipfs_link()) .await - .context("Failed to resolve source subgraph manifest")?; + .context(format!( + "Failed to resolve source subgraph [{}] manifest", + self.source.address, + ))?; - let source_raw: serde_yaml::Mapping = serde_yaml::from_slice(&source_raw) - .context("Failed to parse source subgraph manifest as YAML")?; + let source_raw: serde_yaml::Mapping = + serde_yaml::from_slice(&source_raw).context(format!( + "Failed to parse source subgraph [{}] manifest as YAML", + self.source.address + ))?; let deployment_hash = self.source.address.clone(); let source_manifest = UnresolvedSubgraphManifest::::parse(deployment_hash, source_raw) - .context("Failed to parse source subgraph manifest")?; + .context(format!( + "Failed to parse source subgraph [{}] manifest", + self.source.address + ))?; source_manifest .resolve(resolver, logger, LATEST_VERSION.clone()) .await - .context("Failed to resolve source subgraph manifest") + .context(format!( + "Failed to resolve source subgraph [{}] manifest", + self.source.address + )) .map(Arc::new) } @@ -346,7 +358,8 @@ impl UnresolvedDataSource { let source_spec_version = &source_manifest.spec_version; if source_spec_version < &SPEC_VERSION_1_3_0 { return Err(anyhow!( - "Source subgraph manifest spec version {} is not supported, minimum supported version is {}", + "Source subgraph [{}] manifest spec version {} is not supported, minimum supported version is {}", + self.source.address, source_spec_version, SPEC_VERSION_1_3_0 )); @@ -367,7 +380,10 @@ impl UnresolvedDataSource { .iter() .any(|ds| matches!(ds, crate::data_source::DataSource::Subgraph(_))) { - return Err(anyhow!("Nested subgraph data sources are not supported.")); + return Err(anyhow!( + "Nested subgraph data sources [{}] are not supported.", + self.name + )); } let mapping_entities: Vec = self diff --git a/store/test-store/tests/chain/ethereum/manifest.rs b/store/test-store/tests/chain/ethereum/manifest.rs index 02f4e1413f9..f025be2e626 100644 --- a/store/test-store/tests/chain/ethereum/manifest.rs +++ b/store/test-store/tests/chain/ethereum/manifest.rs @@ -1854,7 +1854,8 @@ specVersion: 1.3.0 assert!(matches!(e, SubgraphManifestResolveError::ResolveError(_))); let error_msg = e.to_string(); println!("{}", error_msg); - assert!(error_msg.contains("Nested subgraph data sources are not supported.")); + assert!(error_msg + .contains("Nested subgraph data sources [SubgraphSource] are not supported.")); } } }) From 601267772662385074de6dbdcba9891599c076d3 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 10:14:38 -0700 Subject: [PATCH 103/160] store: Make module connection_pool private No functional change, just changing imports --- core/graphman/src/commands/deployment/info.rs | 2 +- core/graphman/src/commands/deployment/pause.rs | 2 +- core/graphman/src/commands/deployment/reassign.rs | 2 +- core/graphman/src/commands/deployment/resume.rs | 2 +- core/graphman/src/commands/deployment/unassign.rs | 2 +- node/src/bin/manager.rs | 6 ++---- node/src/main.rs | 6 +++--- node/src/manager/commands/assign.rs | 4 +--- node/src/manager/commands/chain.rs | 6 ++---- node/src/manager/commands/copy.rs | 2 +- node/src/manager/commands/database.rs | 2 +- node/src/manager/commands/deployment/info.rs | 2 +- node/src/manager/commands/deployment/pause.rs | 2 +- node/src/manager/commands/deployment/reassign.rs | 2 +- node/src/manager/commands/deployment/restart.rs | 2 +- node/src/manager/commands/deployment/resume.rs | 2 +- node/src/manager/commands/deployment/unassign.rs | 2 +- node/src/manager/commands/drop.rs | 2 +- node/src/manager/commands/index.rs | 3 +-- node/src/manager/commands/prune.rs | 2 +- node/src/manager/commands/rewind.rs | 2 +- node/src/manager/commands/stats.rs | 2 +- node/src/manager/commands/txn_speed.rs | 2 +- node/src/manager/deployment.rs | 2 +- node/src/store_builder.rs | 8 +++----- server/graphman/src/resolvers/context.rs | 2 +- server/graphman/src/server.rs | 2 +- store/postgres/src/lib.rs | 3 ++- store/test-store/src/store.rs | 5 ++--- 29 files changed, 37 insertions(+), 46 deletions(-) diff --git a/core/graphman/src/commands/deployment/info.rs b/core/graphman/src/commands/deployment/info.rs index 2d3f58d5dc9..f4087b3a5e0 100644 --- a/core/graphman/src/commands/deployment/info.rs +++ b/core/graphman/src/commands/deployment/info.rs @@ -7,7 +7,7 @@ use graph::components::store::BlockNumber; use graph::components::store::DeploymentId; use graph::components::store::StatusStore; use graph::data::subgraph::schema::SubgraphHealth; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::Store; use itertools::Itertools; diff --git a/core/graphman/src/commands/deployment/pause.rs b/core/graphman/src/commands/deployment/pause.rs index 2fe4d136613..d7197d42fb3 100644 --- a/core/graphman/src/commands/deployment/pause.rs +++ b/core/graphman/src/commands/deployment/pause.rs @@ -5,7 +5,7 @@ use graph::components::store::DeploymentLocator; use graph::components::store::StoreEvent; use graph_store_postgres::command_support::catalog; use graph_store_postgres::command_support::catalog::Site; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use thiserror::Error; diff --git a/core/graphman/src/commands/deployment/reassign.rs b/core/graphman/src/commands/deployment/reassign.rs index 3950c8d192f..f2b7f9f6479 100644 --- a/core/graphman/src/commands/deployment/reassign.rs +++ b/core/graphman/src/commands/deployment/reassign.rs @@ -7,7 +7,7 @@ use graph::prelude::AssignmentChange; use graph::prelude::NodeId; use graph_store_postgres::command_support::catalog; use graph_store_postgres::command_support::catalog::Site; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use thiserror::Error; diff --git a/core/graphman/src/commands/deployment/resume.rs b/core/graphman/src/commands/deployment/resume.rs index 7eb0ff6e235..ab394ef4791 100644 --- a/core/graphman/src/commands/deployment/resume.rs +++ b/core/graphman/src/commands/deployment/resume.rs @@ -5,7 +5,7 @@ use graph::components::store::DeploymentLocator; use graph::prelude::StoreEvent; use graph_store_postgres::command_support::catalog; use graph_store_postgres::command_support::catalog::Site; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use thiserror::Error; diff --git a/core/graphman/src/commands/deployment/unassign.rs b/core/graphman/src/commands/deployment/unassign.rs index 5233e61ada1..0061fac49b6 100644 --- a/core/graphman/src/commands/deployment/unassign.rs +++ b/core/graphman/src/commands/deployment/unassign.rs @@ -5,7 +5,7 @@ use graph::components::store::DeploymentLocator; use graph::components::store::StoreEvent; use graph_store_postgres::command_support::catalog; use graph_store_postgres::command_support::catalog::Site; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use thiserror::Error; diff --git a/node/src/bin/manager.rs b/node/src/bin/manager.rs index 50ee9b61958..29ef3ff290e 100644 --- a/node/src/bin/manager.rs +++ b/node/src/bin/manager.rs @@ -26,11 +26,9 @@ use graph_node::network_setup::Networks; use graph_node::{ manager::deployment::DeploymentSearch, store_builder::StoreBuilder, MetricsContext, }; -use graph_store_postgres::connection_pool::PoolCoordinator; -use graph_store_postgres::ChainStore; use graph_store_postgres::{ - connection_pool::ConnectionPool, BlockStore, NotificationSender, Shard, Store, SubgraphStore, - SubscriptionManager, PRIMARY_SHARD, + BlockStore, ChainStore, ConnectionPool, NotificationSender, PoolCoordinator, Shard, Store, + SubgraphStore, SubscriptionManager, PRIMARY_SHARD, }; use itertools::Itertools; use lazy_static::lazy_static; diff --git a/node/src/main.rs b/node/src/main.rs index b2003dff28f..6cd892079c1 100644 --- a/node/src/main.rs +++ b/node/src/main.rs @@ -28,9 +28,9 @@ use graph_server_http::GraphQLServer as GraphQLQueryServer; use graph_server_index_node::IndexNodeServer; use graph_server_json_rpc::JsonRpcServer; use graph_server_metrics::PrometheusMetricsServer; -use graph_store_postgres::connection_pool::ConnectionPool; -use graph_store_postgres::Store; -use graph_store_postgres::{register_jobs as register_store_jobs, NotificationSender}; +use graph_store_postgres::{ + register_jobs as register_store_jobs, ConnectionPool, NotificationSender, Store, +}; use graphman_server::GraphmanServer; use graphman_server::GraphmanServerConfig; use std::io::{BufRead, BufReader}; diff --git a/node/src/manager/commands/assign.rs b/node/src/manager/commands/assign.rs index 568856f1f9f..01260538a74 100644 --- a/node/src/manager/commands/assign.rs +++ b/node/src/manager/commands/assign.rs @@ -1,8 +1,6 @@ use graph::components::store::DeploymentLocator; use graph::prelude::{anyhow::anyhow, Error, NodeId, StoreEvent}; -use graph_store_postgres::{ - command_support::catalog, connection_pool::ConnectionPool, NotificationSender, -}; +use graph_store_postgres::{command_support::catalog, ConnectionPool, NotificationSender}; use std::thread; use std::time::Duration; diff --git a/node/src/manager/commands/chain.rs b/node/src/manager/commands/chain.rs index f1bdf7d39b9..90f428b6562 100644 --- a/node/src/manager/commands/chain.rs +++ b/node/src/manager/commands/chain.rs @@ -16,16 +16,14 @@ use graph::prelude::{anyhow, anyhow::bail}; use graph::slog::Logger; use graph::{components::store::BlockStore as _, prelude::anyhow::Error}; use graph_store_postgres::add_chain; -use graph_store_postgres::connection_pool::PoolCoordinator; use graph_store_postgres::find_chain; use graph_store_postgres::update_chain_name; use graph_store_postgres::BlockStore; use graph_store_postgres::ChainStatus; use graph_store_postgres::ChainStore; +use graph_store_postgres::PoolCoordinator; use graph_store_postgres::Shard; -use graph_store_postgres::{ - command_support::catalog::block_store, connection_pool::ConnectionPool, -}; +use graph_store_postgres::{command_support::catalog::block_store, ConnectionPool}; use crate::network_setup::Networks; diff --git a/node/src/manager/commands/copy.rs b/node/src/manager/commands/copy.rs index 9ca80bc9b20..a7857476c58 100644 --- a/node/src/manager/commands/copy.rs +++ b/node/src/manager/commands/copy.rs @@ -17,7 +17,7 @@ use graph_store_postgres::{ }, PRIMARY_SHARD, }; -use graph_store_postgres::{connection_pool::ConnectionPool, Shard, Store, SubgraphStore}; +use graph_store_postgres::{ConnectionPool, Shard, Store, SubgraphStore}; use crate::manager::deployment::DeploymentSearch; use crate::manager::display::List; diff --git a/node/src/manager/commands/database.rs b/node/src/manager/commands/database.rs index 17d11c041cf..bb1f3b195e3 100644 --- a/node/src/manager/commands/database.rs +++ b/node/src/manager/commands/database.rs @@ -1,7 +1,7 @@ use std::{io::Write, time::Instant}; use graph::prelude::anyhow; -use graph_store_postgres::connection_pool::PoolCoordinator; +use graph_store_postgres::PoolCoordinator; pub async fn remap( coord: &PoolCoordinator, diff --git a/node/src/manager/commands/deployment/info.rs b/node/src/manager/commands/deployment/info.rs index 417092d6e2d..27a69c3841a 100644 --- a/node/src/manager/commands/deployment/info.rs +++ b/node/src/manager/commands/deployment/info.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use anyhow::bail; use anyhow::Result; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::Store; use graphman::commands::deployment::info::load_deployment_statuses; use graphman::commands::deployment::info::load_deployments; diff --git a/node/src/manager/commands/deployment/pause.rs b/node/src/manager/commands/deployment/pause.rs index 2a690ea688a..3e35496113e 100644 --- a/node/src/manager/commands/deployment/pause.rs +++ b/node/src/manager/commands/deployment/pause.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use anyhow::Result; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use graphman::commands::deployment::pause::{ load_active_deployment, pause_active_deployment, PauseDeploymentError, diff --git a/node/src/manager/commands/deployment/reassign.rs b/node/src/manager/commands/deployment/reassign.rs index bd87ee22f31..afe4147a21e 100644 --- a/node/src/manager/commands/deployment/reassign.rs +++ b/node/src/manager/commands/deployment/reassign.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use anyhow::Result; use graph::prelude::NodeId; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use graphman::commands::deployment::reassign::{ load_deployment, reassign_deployment, ReassignResult, diff --git a/node/src/manager/commands/deployment/restart.rs b/node/src/manager/commands/deployment/restart.rs index 4febf81b63c..5f3783b3e92 100644 --- a/node/src/manager/commands/deployment/restart.rs +++ b/node/src/manager/commands/deployment/restart.rs @@ -3,7 +3,7 @@ use std::thread::sleep; use std::time::Duration; use anyhow::Result; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use graphman::deployment::DeploymentSelector; diff --git a/node/src/manager/commands/deployment/resume.rs b/node/src/manager/commands/deployment/resume.rs index 7e57d60cd48..01a9924ad51 100644 --- a/node/src/manager/commands/deployment/resume.rs +++ b/node/src/manager/commands/deployment/resume.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use anyhow::Result; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use graphman::commands::deployment::resume::load_paused_deployment; use graphman::commands::deployment::resume::resume_paused_deployment; diff --git a/node/src/manager/commands/deployment/unassign.rs b/node/src/manager/commands/deployment/unassign.rs index 45567e81f63..0c27a2f5944 100644 --- a/node/src/manager/commands/deployment/unassign.rs +++ b/node/src/manager/commands/deployment/unassign.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use anyhow::Result; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use graphman::commands::deployment::unassign::load_assigned_deployment; use graphman::commands::deployment::unassign::unassign_deployment; diff --git a/node/src/manager/commands/drop.rs b/node/src/manager/commands/drop.rs index 2c86e88e23a..b0d10d0ff63 100644 --- a/node/src/manager/commands/drop.rs +++ b/node/src/manager/commands/drop.rs @@ -4,7 +4,7 @@ use crate::manager::{ prompt::prompt_for_confirmation, }; use graph::anyhow::{self, bail}; -use graph_store_postgres::{connection_pool::ConnectionPool, NotificationSender, SubgraphStore}; +use graph_store_postgres::{ConnectionPool, NotificationSender, SubgraphStore}; use std::sync::Arc; /// Finds, unassigns, record and remove matching deployments. diff --git a/node/src/manager/commands/index.rs b/node/src/manager/commands/index.rs index a20ce74e9ea..6aa68137ad1 100644 --- a/node/src/manager/commands/index.rs +++ b/node/src/manager/commands/index.rs @@ -6,8 +6,7 @@ use graph::{ }; use graph_store_postgres::{ command_support::index::{CreateIndex, Method}, - connection_pool::ConnectionPool, - SubgraphStore, + ConnectionPool, SubgraphStore, }; use std::io::Write as _; use std::{collections::HashSet, sync::Arc}; diff --git a/node/src/manager/commands/prune.rs b/node/src/manager/commands/prune.rs index 2c3c2ae2386..dbf114453e8 100644 --- a/node/src/manager/commands/prune.rs +++ b/node/src/manager/commands/prune.rs @@ -14,7 +14,7 @@ use graph::{ data::subgraph::status, prelude::{anyhow, BlockNumber}, }; -use graph_store_postgres::{connection_pool::ConnectionPool, Store}; +use graph_store_postgres::{ConnectionPool, Store}; use crate::manager::{ commands::stats::{abbreviate_table_name, show_stats}, diff --git a/node/src/manager/commands/rewind.rs b/node/src/manager/commands/rewind.rs index 629c4b6e70f..51d432dfd49 100644 --- a/node/src/manager/commands/rewind.rs +++ b/node/src/manager/commands/rewind.rs @@ -10,8 +10,8 @@ use graph::components::store::{BlockStore as _, ChainStore as _, DeploymentLocat use graph::env::ENV_VARS; use graph::prelude::{anyhow, BlockNumber, BlockPtr}; use graph_store_postgres::command_support::catalog::{self as store_catalog}; -use graph_store_postgres::{connection_pool::ConnectionPool, Store}; use graph_store_postgres::{BlockStore, NotificationSender}; +use graph_store_postgres::{ConnectionPool, Store}; async fn block_ptr( store: Arc, diff --git a/node/src/manager/commands/stats.rs b/node/src/manager/commands/stats.rs index d1c2635bf4a..bb3a928b1ad 100644 --- a/node/src/manager/commands/stats.rs +++ b/node/src/manager/commands/stats.rs @@ -11,7 +11,7 @@ use graph::components::store::VersionStats; use graph::prelude::anyhow; use graph_store_postgres::command_support::catalog as store_catalog; use graph_store_postgres::command_support::catalog::Site; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::Shard; use graph_store_postgres::SubgraphStore; use graph_store_postgres::PRIMARY_SHARD; diff --git a/node/src/manager/commands/txn_speed.rs b/node/src/manager/commands/txn_speed.rs index f36aa2dac41..480d4669a9f 100644 --- a/node/src/manager/commands/txn_speed.rs +++ b/node/src/manager/commands/txn_speed.rs @@ -2,7 +2,7 @@ use diesel::PgConnection; use std::{collections::HashMap, thread::sleep, time::Duration}; use graph::prelude::anyhow; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use crate::manager::catalog; diff --git a/node/src/manager/deployment.rs b/node/src/manager/deployment.rs index fc1a3e0e5a7..f4f2a3b2533 100644 --- a/node/src/manager/deployment.rs +++ b/node/src/manager/deployment.rs @@ -11,8 +11,8 @@ use graph::{ prelude::{anyhow, lazy_static, regex::Regex, DeploymentHash}, }; use graph_store_postgres::command_support::catalog as store_catalog; -use graph_store_postgres::connection_pool::ConnectionPool; use graph_store_postgres::unused; +use graph_store_postgres::ConnectionPool; lazy_static! { // `Qm...` optionally follow by `:$shard` diff --git a/node/src/store_builder.rs b/node/src/store_builder.rs index 5294179f8eb..e1d1d38635f 100644 --- a/node/src/store_builder.rs +++ b/node/src/store_builder.rs @@ -8,13 +8,11 @@ use graph::{ prelude::{info, CheapClone, Logger}, util::security::SafeDisplay, }; -use graph_store_postgres::connection_pool::{ - ConnectionPool, ForeignServer, PoolCoordinator, PoolRole, -}; use graph_store_postgres::{ BlockStore as DieselBlockStore, ChainHeadUpdateListener as PostgresChainHeadUpdateListener, - ChainStoreMetrics, NotificationSender, Shard as ShardName, Store as DieselStore, SubgraphStore, - SubscriptionManager, PRIMARY_SHARD, + ChainStoreMetrics, ConnectionPool, ForeignServer, NotificationSender, PoolCoordinator, + PoolRole, Shard as ShardName, Store as DieselStore, SubgraphStore, SubscriptionManager, + PRIMARY_SHARD, }; use crate::config::{Config, Shard}; diff --git a/server/graphman/src/resolvers/context.rs b/server/graphman/src/resolvers/context.rs index 8cc3e819c6d..14726b2ae30 100644 --- a/server/graphman/src/resolvers/context.rs +++ b/server/graphman/src/resolvers/context.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use async_graphql::Context; use async_graphql::Result; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use graph_store_postgres::Store; diff --git a/server/graphman/src/server.rs b/server/graphman/src/server.rs index ea71e7c2228..a969433cdea 100644 --- a/server/graphman/src/server.rs +++ b/server/graphman/src/server.rs @@ -10,8 +10,8 @@ use axum::Router; use graph::log::factory::LoggerFactory; use graph::prelude::ComponentLoggerConfig; use graph::prelude::ElasticComponentLoggerConfig; -use graph_store_postgres::connection_pool::ConnectionPool; use graph_store_postgres::graphman::GraphmanStore; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use graph_store_postgres::Store; use slog::{info, Logger}; diff --git a/store/postgres/src/lib.rs b/store/postgres/src/lib.rs index 759e8601313..ddf920d7663 100644 --- a/store/postgres/src/lib.rs +++ b/store/postgres/src/lib.rs @@ -17,7 +17,7 @@ mod block_store; mod catalog; mod chain_head_listener; mod chain_store; -pub mod connection_pool; +mod connection_pool; mod copy; mod deployment; mod deployment_store; @@ -60,6 +60,7 @@ pub use self::block_store::BlockStore; pub use self::block_store::ChainStatus; pub use self::chain_head_listener::ChainHeadUpdateListener; pub use self::chain_store::{ChainStore, ChainStoreMetrics, Storage}; +pub use self::connection_pool::{ConnectionPool, ForeignServer, PoolCoordinator, PoolRole}; pub use self::detail::DeploymentDetail; pub use self::jobs::register as register_jobs; pub use self::notification_listener::NotificationSender; diff --git a/store/test-store/src/store.rs b/store/test-store/src/store.rs index 2fa96148ba9..b191916a9b6 100644 --- a/store/test-store/src/store.rs +++ b/store/test-store/src/store.rs @@ -25,10 +25,9 @@ use graph_graphql::prelude::{ use graph_graphql::test_support::GraphQLMetrics; use graph_node::config::{Config, Opt}; use graph_node::store_builder::StoreBuilder; -use graph_store_postgres::layout_for_tests::FAKE_NETWORK_SHARED; -use graph_store_postgres::{connection_pool::ConnectionPool, Shard, SubscriptionManager}; use graph_store_postgres::{ - BlockStore as DieselBlockStore, DeploymentPlacer, SubgraphStore as DieselSubgraphStore, + layout_for_tests::FAKE_NETWORK_SHARED, BlockStore as DieselBlockStore, ConnectionPool, + DeploymentPlacer, Shard, SubgraphStore as DieselSubgraphStore, SubscriptionManager, PRIMARY_SHARD, }; use hex_literal::hex; From a6dbe5996aeb5dadb2610ab592ec531e1f6001cf Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 10:24:06 -0700 Subject: [PATCH 104/160] store: Move connection_pool.rs to its own module No functional change --- store/postgres/src/block_store.rs | 4 ++-- store/postgres/src/catalog.rs | 2 +- store/postgres/src/chain_head_listener.rs | 2 +- store/postgres/src/chain_store.rs | 3 +-- store/postgres/src/copy.rs | 6 +++--- store/postgres/src/deployment.rs | 2 +- store/postgres/src/deployment_store.rs | 2 +- store/postgres/src/dynds/shared.rs | 2 +- store/postgres/src/graphman/mod.rs | 2 +- store/postgres/src/jobs.rs | 2 +- store/postgres/src/lib.rs | 4 ++-- store/postgres/src/{connection_pool.rs => pool/mod.rs} | 0 store/postgres/src/primary.rs | 3 +-- store/postgres/src/relational.rs | 2 +- store/postgres/src/subgraph_store.rs | 3 +-- 15 files changed, 18 insertions(+), 21 deletions(-) rename store/postgres/src/{connection_pool.rs => pool/mod.rs} (100%) diff --git a/store/postgres/src/block_store.rs b/store/postgres/src/block_store.rs index 84a19b601e5..d34915248b3 100644 --- a/store/postgres/src/block_store.rs +++ b/store/postgres/src/block_store.rs @@ -23,7 +23,7 @@ use graph::{prelude::StoreError, util::timed_cache::TimedCache}; use crate::{ chain_head_listener::ChainHeadUpdateSender, chain_store::{ChainStoreMetrics, Storage}, - connection_pool::ConnectionPool, + pool::ConnectionPool, primary::Mirror as PrimaryMirror, ChainStore, NotificationSender, Shard, PRIMARY_SHARD, }; @@ -60,7 +60,7 @@ pub mod primary { }; use crate::chain_store::Storage; - use crate::{connection_pool::ConnectionPool, Shard}; + use crate::{ConnectionPool, Shard}; table! { chains(id) { diff --git a/store/postgres/src/catalog.rs b/store/postgres/src/catalog.rs index ba532dd53ff..a6767082555 100644 --- a/store/postgres/src/catalog.rs +++ b/store/postgres/src/catalog.rs @@ -22,8 +22,8 @@ use graph::{ prelude::{lazy_static, StoreError}, }; -use crate::connection_pool::ForeignServer; use crate::{ + pool::ForeignServer, primary::{Namespace, Site, NAMESPACE_PUBLIC}, relational::SqlName, }; diff --git a/store/postgres/src/chain_head_listener.rs b/store/postgres/src/chain_head_listener.rs index b10ab46529f..301c1f19209 100644 --- a/store/postgres/src/chain_head_listener.rs +++ b/store/postgres/src/chain_head_listener.rs @@ -11,8 +11,8 @@ use std::sync::Arc; use lazy_static::lazy_static; use crate::{ - connection_pool::ConnectionPool, notification_listener::{JsonNotification, NotificationListener, SafeChannelName}, + pool::ConnectionPool, NotificationSender, }; use graph::blockchain::ChainHeadUpdateListener as ChainHeadUpdateListenerTrait; diff --git a/store/postgres/src/chain_store.rs b/store/postgres/src/chain_store.rs index 0ec347d2bd5..a94c44a8870 100644 --- a/store/postgres/src/chain_store.rs +++ b/store/postgres/src/chain_store.rs @@ -34,8 +34,7 @@ use graph::{ensure, internal_error}; use self::recent_blocks_cache::RecentBlocksCache; use crate::{ - block_store::ChainStatus, chain_head_listener::ChainHeadUpdateSender, - connection_pool::ConnectionPool, + block_store::ChainStatus, chain_head_listener::ChainHeadUpdateSender, pool::ConnectionPool, }; /// Our own internal notion of a block diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index 75cc80fb3f6..58420b053a5 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -46,11 +46,11 @@ use crate::{ advisory_lock, catalog, deployment, dynds::DataSourcesTable, primary::{DeploymentId, Primary, Site}, - relational::index::IndexList, + relational::{index::IndexList, Layout, Table}, + relational_queries as rq, vid_batcher::{VidBatcher, VidRange}, + ConnectionPool, }; -use crate::{connection_pool::ConnectionPool, relational::Layout}; -use crate::{relational::Table, relational_queries as rq}; const LOG_INTERVAL: Duration = Duration::from_secs(3 * 60); diff --git a/store/postgres/src/deployment.rs b/store/postgres/src/deployment.rs index d58b26370c8..49f42ba07b9 100644 --- a/store/postgres/src/deployment.rs +++ b/store/postgres/src/deployment.rs @@ -40,7 +40,7 @@ use stable_hash_legacy::crypto::SetHasher; use std::{collections::BTreeSet, convert::TryFrom, ops::Bound, time::Duration}; use std::{str::FromStr, sync::Arc}; -use crate::connection_pool::ForeignServer; +use crate::ForeignServer; use crate::{block_range::BLOCK_RANGE_COLUMN, primary::Site}; use graph::internal_error; diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index 948b6e94410..bb7f842843b 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -56,7 +56,7 @@ use crate::relational::index::{CreateIndex, IndexList, Method}; use crate::relational::{Layout, LayoutCache, SqlName, Table}; use crate::relational_queries::FromEntityData; use crate::{advisory_lock, catalog, retry}; -use crate::{connection_pool::ConnectionPool, detail}; +use crate::{detail, ConnectionPool}; use crate::{dynds, primary::Site}; /// When connected to read replicas, this allows choosing which DB server to use for an operation. diff --git a/store/postgres/src/dynds/shared.rs b/store/postgres/src/dynds/shared.rs index 5a2af316fcf..7fdec556ada 100644 --- a/store/postgres/src/dynds/shared.rs +++ b/store/postgres/src/dynds/shared.rs @@ -17,8 +17,8 @@ use graph::{ prelude::{serde_json, BigDecimal, BlockNumber, DeploymentHash, StoreError}, }; -use crate::connection_pool::ForeignServer; use crate::primary::Site; +use crate::ForeignServer; table! { subgraphs.dynamic_ethereum_contract_data_source (vid) { diff --git a/store/postgres/src/graphman/mod.rs b/store/postgres/src/graphman/mod.rs index c9aba751f50..4f538cd6e23 100644 --- a/store/postgres/src/graphman/mod.rs +++ b/store/postgres/src/graphman/mod.rs @@ -6,7 +6,7 @@ use graphman_store::Execution; use graphman_store::ExecutionId; use graphman_store::ExecutionStatus; -use crate::connection_pool::ConnectionPool; +use crate::ConnectionPool; mod schema; diff --git a/store/postgres/src/jobs.rs b/store/postgres/src/jobs.rs index 17d2d279ce3..a150598427e 100644 --- a/store/postgres/src/jobs.rs +++ b/store/postgres/src/jobs.rs @@ -10,7 +10,7 @@ use graph::prelude::{error, Logger, MetricsRegistry, StoreError, ENV_VARS}; use graph::prometheus::Gauge; use graph::util::jobs::{Job, Runner}; -use crate::connection_pool::ConnectionPool; +use crate::ConnectionPool; use crate::{unused, Store, SubgraphStore}; pub fn register( diff --git a/store/postgres/src/lib.rs b/store/postgres/src/lib.rs index ddf920d7663..42c439d2f3c 100644 --- a/store/postgres/src/lib.rs +++ b/store/postgres/src/lib.rs @@ -17,7 +17,6 @@ mod block_store; mod catalog; mod chain_head_listener; mod chain_store; -mod connection_pool; mod copy; mod deployment; mod deployment_store; @@ -27,6 +26,7 @@ mod fork; mod functions; mod jobs; mod notification_listener; +mod pool; mod primary; pub mod query_store; mod relational; @@ -60,10 +60,10 @@ pub use self::block_store::BlockStore; pub use self::block_store::ChainStatus; pub use self::chain_head_listener::ChainHeadUpdateListener; pub use self::chain_store::{ChainStore, ChainStoreMetrics, Storage}; -pub use self::connection_pool::{ConnectionPool, ForeignServer, PoolCoordinator, PoolRole}; pub use self::detail::DeploymentDetail; pub use self::jobs::register as register_jobs; pub use self::notification_listener::NotificationSender; +pub use self::pool::{ConnectionPool, ForeignServer, PoolCoordinator, PoolRole}; pub use self::primary::{db_version, UnusedDeployment}; pub use self::store::Store; pub use self::store_events::SubscriptionManager; diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/pool/mod.rs similarity index 100% rename from store/postgres/src/connection_pool.rs rename to store/postgres/src/pool/mod.rs diff --git a/store/postgres/src/primary.rs b/store/postgres/src/primary.rs index 2d4b2624289..08f5a62300b 100644 --- a/store/postgres/src/primary.rs +++ b/store/postgres/src/primary.rs @@ -3,10 +3,9 @@ //! for the primary shard. use crate::{ block_range::UNVERSIONED_RANGE, - connection_pool::{ConnectionPool, ForeignServer}, detail::DeploymentDetail, subgraph_store::{unused, Shard, PRIMARY_SHARD}, - NotificationSender, + ConnectionPool, ForeignServer, NotificationSender, }; use diesel::{ connection::SimpleConnection, diff --git a/store/postgres/src/relational.rs b/store/postgres/src/relational.rs index 6bf8759a202..27cee515265 100644 --- a/store/postgres/src/relational.rs +++ b/store/postgres/src/relational.rs @@ -77,7 +77,7 @@ use graph::prelude::{ use crate::block_range::{BoundSide, BLOCK_COLUMN, BLOCK_RANGE_COLUMN}; pub use crate::catalog::Catalog; -use crate::connection_pool::ForeignServer; +use crate::ForeignServer; use crate::{catalog, deployment}; use self::rollup::Rollup; diff --git a/store/postgres/src/subgraph_store.rs b/store/postgres/src/subgraph_store.rs index 5564a7d1726..a4e7ffda659 100644 --- a/store/postgres/src/subgraph_store.rs +++ b/store/postgres/src/subgraph_store.rs @@ -37,7 +37,6 @@ use graph::{ }; use crate::{ - connection_pool::ConnectionPool, deployment::{OnSync, SubgraphHealth}, primary::{self, DeploymentId, Mirror as PrimaryMirror, Primary, Site}, relational::{ @@ -45,7 +44,7 @@ use crate::{ Layout, }, writable::{SourceableStore, WritableStore}, - NotificationSender, + ConnectionPool, NotificationSender, }; use crate::{ deployment_store::{DeploymentStore, ReplicaId}, From 5d3d4655b65b5720970d3535fb2c21300df9b786 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 10:30:34 -0700 Subject: [PATCH 105/160] store: Move pool::ForeignServer to its own module No functional change --- store/postgres/src/pool/foreign_server.rs | 237 +++++++++++++++++++ store/postgres/src/pool/mod.rs | 265 ++-------------------- store/postgres/src/primary.rs | 8 +- 3 files changed, 262 insertions(+), 248 deletions(-) create mode 100644 store/postgres/src/pool/foreign_server.rs diff --git a/store/postgres/src/pool/foreign_server.rs b/store/postgres/src/pool/foreign_server.rs new file mode 100644 index 00000000000..3f8daf64b54 --- /dev/null +++ b/store/postgres/src/pool/foreign_server.rs @@ -0,0 +1,237 @@ +use diesel::{connection::SimpleConnection, pg::PgConnection}; + +use graph::{ + prelude::{ + anyhow::{self, anyhow, bail}, + StoreError, ENV_VARS, + }, + util::security::SafeDisplay, +}; + +use std::fmt::Write; + +use postgres::config::{Config, Host}; + +use crate::catalog; +use crate::primary::NAMESPACE_PUBLIC; +use crate::{Shard, PRIMARY_SHARD}; + +use super::{PRIMARY_PUBLIC, PRIMARY_TABLES, SHARDED_TABLES}; + +pub struct ForeignServer { + pub name: String, + pub shard: Shard, + pub user: String, + pub password: String, + pub host: String, + pub port: u16, + pub dbname: String, +} + +impl ForeignServer { + /// The name of the foreign server under which data for `shard` is + /// accessible + pub fn name(shard: &Shard) -> String { + format!("shard_{}", shard.as_str()) + } + + /// The name of the schema under which the `subgraphs` schema for + /// `shard` is accessible in shards that are not `shard`. In most cases + /// you actually want to use `metadata_schema_in` + pub fn metadata_schema(shard: &Shard) -> String { + format!("{}_subgraphs", Self::name(shard)) + } + + /// The name of the schema under which the `subgraphs` schema for + /// `shard` is accessible in the shard `current`. It is permissible for + /// `shard` and `current` to be the same. + pub fn metadata_schema_in(shard: &Shard, current: &Shard) -> String { + if shard == current { + "subgraphs".to_string() + } else { + Self::metadata_schema(&shard) + } + } + + pub fn new_from_raw(shard: String, postgres_url: &str) -> Result { + Self::new(Shard::new(shard)?, postgres_url) + } + + pub fn new(shard: Shard, postgres_url: &str) -> Result { + let config: Config = match postgres_url.parse() { + Ok(config) => config, + Err(e) => panic!( + "failed to parse Postgres connection string `{}`: {}", + SafeDisplay(postgres_url), + e + ), + }; + + let host = match config.get_hosts().get(0) { + Some(Host::Tcp(host)) => host.to_string(), + _ => bail!("can not find host name in `{}`", SafeDisplay(postgres_url)), + }; + + let user = config + .get_user() + .ok_or_else(|| anyhow!("could not find user in `{}`", SafeDisplay(postgres_url)))? + .to_string(); + let password = String::from_utf8( + config + .get_password() + .ok_or_else(|| { + anyhow!( + "could not find password in `{}`; you must provide one.", + SafeDisplay(postgres_url) + ) + })? + .into(), + )?; + let port = config.get_ports().first().cloned().unwrap_or(5432u16); + let dbname = config + .get_dbname() + .map(|s| s.to_string()) + .ok_or_else(|| anyhow!("could not find user in `{}`", SafeDisplay(postgres_url)))?; + + Ok(Self { + name: Self::name(&shard), + shard, + user, + password, + host, + port, + dbname, + }) + } + + /// Create a new foreign server and user mapping on `conn` for this foreign + /// server + pub(super) fn create(&self, conn: &mut PgConnection) -> Result<(), StoreError> { + let query = format!( + "\ + create server \"{name}\" + foreign data wrapper postgres_fdw + options (host '{remote_host}', \ + port '{remote_port}', \ + dbname '{remote_db}', \ + fetch_size '{fetch_size}', \ + updatable 'false'); + create user mapping + for current_user server \"{name}\" + options (user '{remote_user}', password '{remote_password}');", + name = self.name, + remote_host = self.host, + remote_port = self.port, + remote_db = self.dbname, + remote_user = self.user, + remote_password = self.password, + fetch_size = ENV_VARS.store.fdw_fetch_size, + ); + Ok(conn.batch_execute(&query)?) + } + + /// Update an existing user mapping with possibly new details + pub(super) fn update(&self, conn: &mut PgConnection) -> Result<(), StoreError> { + let options = catalog::server_options(conn, &self.name)?; + let set_or_add = |option: &str| -> &'static str { + if options.contains_key(option) { + "set" + } else { + "add" + } + }; + + let query = format!( + "\ + alter server \"{name}\" + options (set host '{remote_host}', \ + {set_port} port '{remote_port}', \ + set dbname '{remote_db}', \ + {set_fetch_size} fetch_size '{fetch_size}'); + alter user mapping + for current_user server \"{name}\" + options (set user '{remote_user}', set password '{remote_password}');", + name = self.name, + remote_host = self.host, + set_port = set_or_add("port"), + set_fetch_size = set_or_add("fetch_size"), + remote_port = self.port, + remote_db = self.dbname, + remote_user = self.user, + remote_password = self.password, + fetch_size = ENV_VARS.store.fdw_fetch_size, + ); + Ok(conn.batch_execute(&query)?) + } + + /// Map key tables from the primary into our local schema. If we are the + /// primary, set them up as views. + pub(super) fn map_primary(conn: &mut PgConnection, shard: &Shard) -> Result<(), StoreError> { + catalog::recreate_schema(conn, PRIMARY_PUBLIC)?; + + let mut query = String::new(); + for table_name in PRIMARY_TABLES { + let create_stmt = if shard == &*PRIMARY_SHARD { + format!( + "create view {nsp}.{table_name} as select * from public.{table_name};", + nsp = PRIMARY_PUBLIC, + table_name = table_name + ) + } else { + catalog::create_foreign_table( + conn, + NAMESPACE_PUBLIC, + table_name, + PRIMARY_PUBLIC, + Self::name(&PRIMARY_SHARD).as_str(), + )? + }; + write!(query, "{}", create_stmt)?; + } + conn.batch_execute(&query)?; + Ok(()) + } + + /// Map the `subgraphs` schema from the foreign server `self` into the + /// database accessible through `conn` + pub(super) fn map_metadata(&self, conn: &mut PgConnection) -> Result<(), StoreError> { + let nsp = Self::metadata_schema(&self.shard); + catalog::recreate_schema(conn, &nsp)?; + let mut query = String::new(); + for (src_nsp, src_tables) in SHARDED_TABLES { + for src_table in src_tables { + let create_stmt = + catalog::create_foreign_table(conn, src_nsp, src_table, &nsp, &self.name)?; + write!(query, "{}", create_stmt)?; + } + } + Ok(conn.batch_execute(&query)?) + } + + pub(super) fn needs_remap(&self, conn: &mut PgConnection) -> Result { + fn different(mut existing: Vec, mut needed: Vec) -> bool { + existing.sort(); + needed.sort(); + existing != needed + } + + if &self.shard == &*PRIMARY_SHARD { + let existing = catalog::foreign_tables(conn, PRIMARY_PUBLIC)?; + let needed = PRIMARY_TABLES + .into_iter() + .map(String::from) + .collect::>(); + if different(existing, needed) { + return Ok(true); + } + } + + let existing = catalog::foreign_tables(conn, &Self::metadata_schema(&self.shard))?; + let needed = SHARDED_TABLES + .iter() + .flat_map(|(_, tables)| *tables) + .map(|table| table.to_string()) + .collect::>(); + Ok(different(existing, needed)) + } +} diff --git a/store/postgres/src/pool/mod.rs b/store/postgres/src/pool/mod.rs index abe9109e1d6..1ce05bc92fc 100644 --- a/store/postgres/src/pool/mod.rs +++ b/store/postgres/src/pool/mod.rs @@ -14,36 +14,41 @@ use graph::futures03::future::join_all; use graph::futures03::FutureExt as _; use graph::internal_error; use graph::prelude::tokio::time::Instant; +use graph::prelude::{ + anyhow::anyhow, crit, debug, error, info, o, tokio::sync::Semaphore, CancelGuard, CancelHandle, + CancelToken as _, CancelableError, Counter, Gauge, Logger, MovingStats, PoolWaitStats, + StoreError, ENV_VARS, +}; use graph::prelude::{tokio, MetricsRegistry}; use graph::slog::warn; use graph::util::timed_rw_lock::TimedMutex; -use graph::{ - prelude::{ - anyhow::{self, anyhow, bail}, - crit, debug, error, info, o, - tokio::sync::Semaphore, - CancelGuard, CancelHandle, CancelToken as _, CancelableError, Counter, Gauge, Logger, - MovingStats, PoolWaitStats, StoreError, ENV_VARS, - }, - util::security::SafeDisplay, -}; -use std::fmt::{self, Write}; +use std::fmt::{self}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex}; use std::time::Duration; use std::{collections::HashMap, sync::RwLock}; -use postgres::config::{Config, Host}; - use crate::advisory_lock::with_migration_lock; use crate::catalog; -use crate::primary::{self, Mirror, Namespace, NAMESPACE_PUBLIC}; +use crate::primary::{self, Mirror, Namespace}; use crate::{Shard, PRIMARY_SHARD}; +mod foreign_server; + +pub use foreign_server::ForeignServer; + +/// The namespace under which the `PRIMARY_TABLES` are mapped into each +/// shard +pub(crate) const PRIMARY_PUBLIC: &'static str = "primary_public"; + /// Tables that we map from the primary into `primary_public` in each shard const PRIMARY_TABLES: [&str; 3] = ["deployment_schemas", "chains", "active_copies"]; +/// The namespace under which we create views in the primary that union all +/// the `SHARDED_TABLES` +pub(crate) const CROSS_SHARD_NSP: &'static str = "sharded"; + /// Tables that we map from each shard into each other shard into the /// `shard__subgraphs` namespace const SHARDED_TABLES: [(&str, &[&str]); 2] = [ @@ -88,227 +93,6 @@ fn check_mirrored_tables() { } } -pub struct ForeignServer { - pub name: String, - pub shard: Shard, - pub user: String, - pub password: String, - pub host: String, - pub port: u16, - pub dbname: String, -} - -impl ForeignServer { - pub(crate) const PRIMARY_PUBLIC: &'static str = "primary_public"; - pub(crate) const CROSS_SHARD_NSP: &'static str = "sharded"; - - /// The name of the foreign server under which data for `shard` is - /// accessible - pub fn name(shard: &Shard) -> String { - format!("shard_{}", shard.as_str()) - } - - /// The name of the schema under which the `subgraphs` schema for - /// `shard` is accessible in shards that are not `shard`. In most cases - /// you actually want to use `metadata_schema_in` - pub fn metadata_schema(shard: &Shard) -> String { - format!("{}_subgraphs", Self::name(shard)) - } - - /// The name of the schema under which the `subgraphs` schema for - /// `shard` is accessible in the shard `current`. It is permissible for - /// `shard` and `current` to be the same. - pub fn metadata_schema_in(shard: &Shard, current: &Shard) -> String { - if shard == current { - "subgraphs".to_string() - } else { - Self::metadata_schema(&shard) - } - } - - pub fn new_from_raw(shard: String, postgres_url: &str) -> Result { - Self::new(Shard::new(shard)?, postgres_url) - } - - pub fn new(shard: Shard, postgres_url: &str) -> Result { - let config: Config = match postgres_url.parse() { - Ok(config) => config, - Err(e) => panic!( - "failed to parse Postgres connection string `{}`: {}", - SafeDisplay(postgres_url), - e - ), - }; - - let host = match config.get_hosts().get(0) { - Some(Host::Tcp(host)) => host.to_string(), - _ => bail!("can not find host name in `{}`", SafeDisplay(postgres_url)), - }; - - let user = config - .get_user() - .ok_or_else(|| anyhow!("could not find user in `{}`", SafeDisplay(postgres_url)))? - .to_string(); - let password = String::from_utf8( - config - .get_password() - .ok_or_else(|| { - anyhow!( - "could not find password in `{}`; you must provide one.", - SafeDisplay(postgres_url) - ) - })? - .into(), - )?; - let port = config.get_ports().first().cloned().unwrap_or(5432u16); - let dbname = config - .get_dbname() - .map(|s| s.to_string()) - .ok_or_else(|| anyhow!("could not find user in `{}`", SafeDisplay(postgres_url)))?; - - Ok(Self { - name: Self::name(&shard), - shard, - user, - password, - host, - port, - dbname, - }) - } - - /// Create a new foreign server and user mapping on `conn` for this foreign - /// server - fn create(&self, conn: &mut PgConnection) -> Result<(), StoreError> { - let query = format!( - "\ - create server \"{name}\" - foreign data wrapper postgres_fdw - options (host '{remote_host}', \ - port '{remote_port}', \ - dbname '{remote_db}', \ - fetch_size '{fetch_size}', \ - updatable 'false'); - create user mapping - for current_user server \"{name}\" - options (user '{remote_user}', password '{remote_password}');", - name = self.name, - remote_host = self.host, - remote_port = self.port, - remote_db = self.dbname, - remote_user = self.user, - remote_password = self.password, - fetch_size = ENV_VARS.store.fdw_fetch_size, - ); - Ok(conn.batch_execute(&query)?) - } - - /// Update an existing user mapping with possibly new details - fn update(&self, conn: &mut PgConnection) -> Result<(), StoreError> { - let options = catalog::server_options(conn, &self.name)?; - let set_or_add = |option: &str| -> &'static str { - if options.contains_key(option) { - "set" - } else { - "add" - } - }; - - let query = format!( - "\ - alter server \"{name}\" - options (set host '{remote_host}', \ - {set_port} port '{remote_port}', \ - set dbname '{remote_db}', \ - {set_fetch_size} fetch_size '{fetch_size}'); - alter user mapping - for current_user server \"{name}\" - options (set user '{remote_user}', set password '{remote_password}');", - name = self.name, - remote_host = self.host, - set_port = set_or_add("port"), - set_fetch_size = set_or_add("fetch_size"), - remote_port = self.port, - remote_db = self.dbname, - remote_user = self.user, - remote_password = self.password, - fetch_size = ENV_VARS.store.fdw_fetch_size, - ); - Ok(conn.batch_execute(&query)?) - } - - /// Map key tables from the primary into our local schema. If we are the - /// primary, set them up as views. - fn map_primary(conn: &mut PgConnection, shard: &Shard) -> Result<(), StoreError> { - catalog::recreate_schema(conn, Self::PRIMARY_PUBLIC)?; - - let mut query = String::new(); - for table_name in PRIMARY_TABLES { - let create_stmt = if shard == &*PRIMARY_SHARD { - format!( - "create view {nsp}.{table_name} as select * from public.{table_name};", - nsp = Self::PRIMARY_PUBLIC, - table_name = table_name - ) - } else { - catalog::create_foreign_table( - conn, - NAMESPACE_PUBLIC, - table_name, - Self::PRIMARY_PUBLIC, - Self::name(&PRIMARY_SHARD).as_str(), - )? - }; - write!(query, "{}", create_stmt)?; - } - conn.batch_execute(&query)?; - Ok(()) - } - - /// Map the `subgraphs` schema from the foreign server `self` into the - /// database accessible through `conn` - fn map_metadata(&self, conn: &mut PgConnection) -> Result<(), StoreError> { - let nsp = Self::metadata_schema(&self.shard); - catalog::recreate_schema(conn, &nsp)?; - let mut query = String::new(); - for (src_nsp, src_tables) in SHARDED_TABLES { - for src_table in src_tables { - let create_stmt = - catalog::create_foreign_table(conn, src_nsp, src_table, &nsp, &self.name)?; - write!(query, "{}", create_stmt)?; - } - } - Ok(conn.batch_execute(&query)?) - } - - fn needs_remap(&self, conn: &mut PgConnection) -> Result { - fn different(mut existing: Vec, mut needed: Vec) -> bool { - existing.sort(); - needed.sort(); - existing != needed - } - - if &self.shard == &*PRIMARY_SHARD { - let existing = catalog::foreign_tables(conn, Self::PRIMARY_PUBLIC)?; - let needed = PRIMARY_TABLES - .into_iter() - .map(String::from) - .collect::>(); - if different(existing, needed) { - return Ok(true); - } - } - - let existing = catalog::foreign_tables(conn, &Self::metadata_schema(&self.shard))?; - let needed = SHARDED_TABLES - .iter() - .flat_map(|(_, tables)| *tables) - .map(|table| table.to_string()) - .collect::>(); - Ok(different(existing, needed)) - } -} - /// How long to keep connections in the `fdw_pool` around before closing /// them on idle. This is much shorter than the default of 10 minutes. const FDW_IDLE_TIMEOUT: Duration = Duration::from_secs(60); @@ -1223,10 +1007,7 @@ impl PoolInner { info!(&self.logger, "Dropping cross-shard views"); let mut conn = self.get()?; conn.transaction(|conn| { - let query = format!( - "drop schema if exists {} cascade", - ForeignServer::CROSS_SHARD_NSP - ); + let query = format!("drop schema if exists {} cascade", CROSS_SHARD_NSP); conn.batch_execute(&query)?; Ok(()) }) @@ -1258,7 +1039,7 @@ impl PoolInner { } let mut conn = self.get()?; - let sharded = Namespace::special(ForeignServer::CROSS_SHARD_NSP); + let sharded = Namespace::special(CROSS_SHARD_NSP); if catalog::has_namespace(&mut conn, &sharded)? { // We dropped the namespace before, but another node must have // recreated it in the meantime so we don't need to do anything @@ -1267,7 +1048,7 @@ impl PoolInner { info!(&self.logger, "Creating cross-shard views"); conn.transaction(|conn| { - let query = format!("create schema {}", ForeignServer::CROSS_SHARD_NSP); + let query = format!("create schema {}", CROSS_SHARD_NSP); conn.batch_execute(&query)?; for (src_nsp, src_tables) in SHARDED_TABLES { // Pairs of (shard, nsp) for all servers @@ -1277,7 +1058,7 @@ impl PoolInner { conn, src_nsp, src_table, - ForeignServer::CROSS_SHARD_NSP, + CROSS_SHARD_NSP, &nsps, )?; conn.batch_execute(&create_view)?; diff --git a/store/postgres/src/primary.rs b/store/postgres/src/primary.rs index 08f5a62300b..8d49153d214 100644 --- a/store/postgres/src/primary.rs +++ b/store/postgres/src/primary.rs @@ -4,6 +4,7 @@ use crate::{ block_range::UNVERSIONED_RANGE, detail::DeploymentDetail, + pool::PRIMARY_PUBLIC, subgraph_store::{unused, Shard, PRIMARY_SHARD}, ConnectionPool, ForeignServer, NotificationSender, }; @@ -2006,12 +2007,7 @@ impl Mirror { // Repopulate `PUBLIC_TABLES` by copying their data wholesale for table_name in Self::PUBLIC_TABLES { - copy_table( - conn, - ForeignServer::PRIMARY_PUBLIC, - NAMESPACE_PUBLIC, - table_name, - )?; + copy_table(conn, PRIMARY_PUBLIC, NAMESPACE_PUBLIC, table_name)?; check_cancel()?; } From 54ae8727b17400f96d323ce656fba4ce915dfd3d Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 10:46:30 -0700 Subject: [PATCH 106/160] store: Move PoolCoordinator to its own module --- store/postgres/src/pool/coordinator.rs | 315 +++++++++++++++++++++++++ store/postgres/src/pool/mod.rs | 309 +----------------------- 2 files changed, 319 insertions(+), 305 deletions(-) create mode 100644 store/postgres/src/pool/coordinator.rs diff --git a/store/postgres/src/pool/coordinator.rs b/store/postgres/src/pool/coordinator.rs new file mode 100644 index 00000000000..f58a553b693 --- /dev/null +++ b/store/postgres/src/pool/coordinator.rs @@ -0,0 +1,315 @@ +use graph::cheap_clone::CheapClone; +use graph::futures03::future::join_all; +use graph::futures03::FutureExt as _; +use graph::internal_error; +use graph::prelude::MetricsRegistry; +use graph::prelude::{crit, debug, error, info, o, StoreError}; +use graph::slog::Logger; + +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; + +use crate::advisory_lock::with_migration_lock; +use crate::{Shard, PRIMARY_SHARD}; + +use super::{ConnectionPool, ForeignServer, MigrationCount, PoolInner, PoolRole, PoolState}; + +/// Helper to coordinate propagating schema changes from the database that +/// changes schema to all other shards so they can update their fdw mappings +/// of tables imported from that shard +pub struct PoolCoordinator { + logger: Logger, + pools: Mutex>, + servers: Arc>, +} + +impl PoolCoordinator { + pub fn new(logger: &Logger, servers: Arc>) -> Self { + let logger = logger.new(o!("component" => "ConnectionPool", "component" => "Coordinator")); + Self { + logger, + pools: Mutex::new(HashMap::new()), + servers, + } + } + + pub fn create_pool( + self: Arc, + logger: &Logger, + name: &str, + pool_name: PoolRole, + postgres_url: String, + pool_size: u32, + fdw_pool_size: Option, + registry: Arc, + ) -> ConnectionPool { + let is_writable = !pool_name.is_replica(); + + let pool = ConnectionPool::create( + name, + pool_name, + postgres_url, + pool_size, + fdw_pool_size, + logger, + registry, + self.cheap_clone(), + ); + + // Ignore non-writable pools (replicas), there is no need (and no + // way) to coordinate schema changes with them + if is_writable { + self.pools + .lock() + .unwrap() + .insert(pool.shard.clone(), pool.inner.cheap_clone()); + } + + pool + } + + /// Propagate changes to the schema in `shard` to all other pools. Those + /// other pools will then recreate any tables that they imported from + /// `shard`. If `pool` is a new shard, we also map all other shards into + /// it. + /// + /// This tries to take the migration lock and must therefore be run from + /// code that does _not_ hold the migration lock as it will otherwise + /// deadlock + fn propagate(&self, pool: &PoolInner, count: MigrationCount) -> Result<(), StoreError> { + // We need to remap all these servers into `pool` if the list of + // tables that are mapped have changed from the code of the previous + // version. Since dropping and recreating the foreign table + // definitions can slow the startup of other nodes down because of + // locking, we try to only do this when it is actually needed + for server in self.servers.iter() { + if pool.needs_remap(server)? { + pool.remap(server)?; + } + } + + // pool had schema changes, refresh the import from pool into all + // other shards. This makes sure that schema changes to + // already-mapped tables are propagated to all other shards. Since + // we run `propagate` after migrations have been applied to `pool`, + // we can be sure that these mappings use the correct schema + if count.had_migrations() { + let server = self.server(&pool.shard)?; + for pool in self.pools.lock().unwrap().values() { + let pool = pool.get_unready(); + let remap_res = pool.remap(server); + if let Err(e) = remap_res { + error!(pool.logger, "Failed to map imports from {}", server.shard; "error" => e.to_string()); + return Err(e); + } + } + } + Ok(()) + } + + /// Return a list of all pools, regardless of whether they are ready or + /// not. + pub fn pools(&self) -> Vec> { + self.pools + .lock() + .unwrap() + .values() + .map(|state| state.get_unready()) + .collect::>() + } + + pub fn servers(&self) -> Arc> { + self.servers.clone() + } + + fn server(&self, shard: &Shard) -> Result<&ForeignServer, StoreError> { + self.servers + .iter() + .find(|server| &server.shard == shard) + .ok_or_else(|| internal_error!("unknown shard {shard}")) + } + + fn primary(&self) -> Result, StoreError> { + let map = self.pools.lock().unwrap(); + let pool_state = map.get(&*&PRIMARY_SHARD).ok_or_else(|| { + internal_error!("internal error: primary shard not found in pool coordinator") + })?; + + Ok(pool_state.get_unready()) + } + + /// Setup all pools the coordinator knows about and return the number of + /// pools that were successfully set up. + /// + /// # Panics + /// + /// If any errors besides a database not being available happen during + /// the migration, the process panics + pub async fn setup_all(&self, logger: &Logger) -> usize { + let pools = self + .pools + .lock() + .unwrap() + .values() + .cloned() + .collect::>(); + + let res = self.setup(pools).await; + + match res { + Ok(count) => { + info!(logger, "Setup finished"; "shards" => count); + count + } + Err(e) => { + crit!(logger, "database setup failed"; "error" => format!("{e}")); + panic!("database setup failed: {}", e); + } + } + } + + /// A helper to call `setup` from a non-async context. Returns `true` if + /// the setup was actually run, i.e. if `pool` was available + pub(crate) fn setup_bg(self: Arc, pool: PoolState) -> Result { + let migrated = graph::spawn_thread("database-setup", move || { + graph::block_on(self.setup(vec![pool.clone()])) + }) + .join() + // unwrap: propagate panics + .unwrap()?; + Ok(migrated == 1) + } + + /// Setup all pools by doing the following steps: + /// 1. Get the migration lock in the primary. This makes sure that only + /// one node runs migrations + /// 2. Remove the views in `sharded` as they might interfere with + /// running migrations + /// 3. In parallel, do the following in each pool: + /// 1. Configure fdw servers + /// 2. Run migrations in all pools in parallel + /// 4. In parallel, do the following in each pool: + /// 1. Create/update the mappings in `shard__subgraphs` and in + /// `primary_public` + /// 5. Create the views in `sharded` again + /// 6. Release the migration lock + /// + /// This method tolerates databases that are not available and will + /// simply ignore them. The returned count is the number of pools that + /// were successfully set up. + /// + /// When this method returns, the entries from `states` that were + /// successfully set up will be marked as ready. The method returns the + /// number of pools that were set up + async fn setup(&self, states: Vec) -> Result { + type MigrationCounts = Vec<(PoolState, MigrationCount)>; + + /// Filter out pools that are not available. We don't want to fail + /// because one of the pools is not available. We will just ignore + /// them and continue with the others. + fn filter_unavailable( + (state, res): (PoolState, Result), + ) -> Option> { + if let Err(StoreError::DatabaseUnavailable) = res { + error!( + state.logger, + "migrations failed because database was unavailable" + ); + None + } else { + Some(res.map(|count| (state, count))) + } + } + + /// Migrate all pools in parallel + async fn migrate( + pools: &[PoolState], + servers: &[ForeignServer], + ) -> Result { + let futures = pools + .iter() + .map(|state| { + state + .get_unready() + .cheap_clone() + .migrate(servers) + .map(|res| (state.cheap_clone(), res)) + }) + .collect::>(); + join_all(futures) + .await + .into_iter() + .filter_map(filter_unavailable) + .collect::, _>>() + } + + /// Propagate the schema changes to all other pools in parallel + async fn propagate( + this: &PoolCoordinator, + migrated: MigrationCounts, + ) -> Result, StoreError> { + let futures = migrated + .into_iter() + .map(|(state, count)| async move { + let pool = state.get_unready(); + let res = this.propagate(&pool, count); + (state.cheap_clone(), res) + }) + .collect::>(); + join_all(futures) + .await + .into_iter() + .filter_map(filter_unavailable) + .map(|res| res.map(|(state, ())| state)) + .collect::, _>>() + } + + let primary = self.primary()?; + + let mut pconn = primary.get().map_err(|_| StoreError::DatabaseUnavailable)?; + + let states: Vec<_> = states + .into_iter() + .filter(|pool| pool.needs_setup()) + .collect(); + if states.is_empty() { + return Ok(0); + } + + // Everything here happens under the migration lock. Anything called + // from here should not try to get that lock, otherwise the process + // will deadlock + debug!(self.logger, "Waiting for migration lock"); + let res = with_migration_lock(&mut pconn, |_| async { + debug!(self.logger, "Migration lock acquired"); + + // While we were waiting for the migration lock, another thread + // might have already run this + let states: Vec<_> = states + .into_iter() + .filter(|pool| pool.needs_setup()) + .collect(); + if states.is_empty() { + debug!(self.logger, "No pools to set up"); + return Ok(0); + } + + primary.drop_cross_shard_views()?; + + let migrated = migrate(&states, self.servers.as_ref()).await?; + + let propagated = propagate(&self, migrated).await?; + + primary.create_cross_shard_views(&self.servers)?; + + for state in &propagated { + state.set_ready(); + } + Ok(propagated.len()) + }) + .await; + debug!(self.logger, "Database setup finished"); + + res + } +} diff --git a/store/postgres/src/pool/mod.rs b/store/postgres/src/pool/mod.rs index 1ce05bc92fc..9d5b92d019e 100644 --- a/store/postgres/src/pool/mod.rs +++ b/store/postgres/src/pool/mod.rs @@ -10,8 +10,6 @@ use diesel_migrations::{EmbeddedMigrations, HarnessWithOutput}; use graph::cheap_clone::CheapClone; use graph::components::store::QueryPermit; use graph::derive::CheapClone; -use graph::futures03::future::join_all; -use graph::futures03::FutureExt as _; use graph::internal_error; use graph::prelude::tokio::time::Instant; use graph::prelude::{ @@ -25,17 +23,18 @@ use graph::util::timed_rw_lock::TimedMutex; use std::fmt::{self}; use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use std::time::Duration; use std::{collections::HashMap, sync::RwLock}; -use crate::advisory_lock::with_migration_lock; use crate::catalog; use crate::primary::{self, Mirror, Namespace}; use crate::{Shard, PRIMARY_SHARD}; +mod coordinator; mod foreign_server; +pub use coordinator::PoolCoordinator; pub use foreign_server::ForeignServer; /// The namespace under which the `PRIMARY_TABLES` are mapped into each @@ -116,7 +115,7 @@ enum PoolStateInner { /// of queries; instead of queueing them until the database is available, /// they return almost immediately with an error #[derive(Clone, CheapClone)] -struct PoolState { +pub(super) struct PoolState { logger: Logger, inner: Arc>, } @@ -1169,303 +1168,3 @@ fn migrate_schema(logger: &Logger, conn: &mut PgConnection) -> Result>, - servers: Arc>, -} - -impl PoolCoordinator { - pub fn new(logger: &Logger, servers: Arc>) -> Self { - let logger = logger.new(o!("component" => "ConnectionPool", "component" => "Coordinator")); - Self { - logger, - pools: Mutex::new(HashMap::new()), - servers, - } - } - - pub fn create_pool( - self: Arc, - logger: &Logger, - name: &str, - pool_name: PoolRole, - postgres_url: String, - pool_size: u32, - fdw_pool_size: Option, - registry: Arc, - ) -> ConnectionPool { - let is_writable = !pool_name.is_replica(); - - let pool = ConnectionPool::create( - name, - pool_name, - postgres_url, - pool_size, - fdw_pool_size, - logger, - registry, - self.cheap_clone(), - ); - - // Ignore non-writable pools (replicas), there is no need (and no - // way) to coordinate schema changes with them - if is_writable { - self.pools - .lock() - .unwrap() - .insert(pool.shard.clone(), pool.inner.cheap_clone()); - } - - pool - } - - /// Propagate changes to the schema in `shard` to all other pools. Those - /// other pools will then recreate any tables that they imported from - /// `shard`. If `pool` is a new shard, we also map all other shards into - /// it. - /// - /// This tries to take the migration lock and must therefore be run from - /// code that does _not_ hold the migration lock as it will otherwise - /// deadlock - fn propagate(&self, pool: &PoolInner, count: MigrationCount) -> Result<(), StoreError> { - // We need to remap all these servers into `pool` if the list of - // tables that are mapped have changed from the code of the previous - // version. Since dropping and recreating the foreign table - // definitions can slow the startup of other nodes down because of - // locking, we try to only do this when it is actually needed - for server in self.servers.iter() { - if pool.needs_remap(server)? { - pool.remap(server)?; - } - } - - // pool had schema changes, refresh the import from pool into all - // other shards. This makes sure that schema changes to - // already-mapped tables are propagated to all other shards. Since - // we run `propagate` after migrations have been applied to `pool`, - // we can be sure that these mappings use the correct schema - if count.had_migrations() { - let server = self.server(&pool.shard)?; - for pool in self.pools.lock().unwrap().values() { - let pool = pool.get_unready(); - let remap_res = pool.remap(server); - if let Err(e) = remap_res { - error!(pool.logger, "Failed to map imports from {}", server.shard; "error" => e.to_string()); - return Err(e); - } - } - } - Ok(()) - } - - /// Return a list of all pools, regardless of whether they are ready or - /// not. - pub fn pools(&self) -> Vec> { - self.pools - .lock() - .unwrap() - .values() - .map(|state| state.get_unready()) - .collect::>() - } - - pub fn servers(&self) -> Arc> { - self.servers.clone() - } - - fn server(&self, shard: &Shard) -> Result<&ForeignServer, StoreError> { - self.servers - .iter() - .find(|server| &server.shard == shard) - .ok_or_else(|| internal_error!("unknown shard {shard}")) - } - - fn primary(&self) -> Result, StoreError> { - let map = self.pools.lock().unwrap(); - let pool_state = map.get(&*&PRIMARY_SHARD).ok_or_else(|| { - internal_error!("internal error: primary shard not found in pool coordinator") - })?; - - Ok(pool_state.get_unready()) - } - - /// Setup all pools the coordinator knows about and return the number of - /// pools that were successfully set up. - /// - /// # Panics - /// - /// If any errors besides a database not being available happen during - /// the migration, the process panics - pub async fn setup_all(&self, logger: &Logger) -> usize { - let pools = self - .pools - .lock() - .unwrap() - .values() - .cloned() - .collect::>(); - - let res = self.setup(pools).await; - - match res { - Ok(count) => { - info!(logger, "Setup finished"; "shards" => count); - count - } - Err(e) => { - crit!(logger, "database setup failed"; "error" => format!("{e}")); - panic!("database setup failed: {}", e); - } - } - } - - /// A helper to call `setup` from a non-async context. Returns `true` if - /// the setup was actually run, i.e. if `pool` was available - fn setup_bg(self: Arc, pool: PoolState) -> Result { - let migrated = graph::spawn_thread("database-setup", move || { - graph::block_on(self.setup(vec![pool.clone()])) - }) - .join() - // unwrap: propagate panics - .unwrap()?; - Ok(migrated == 1) - } - - /// Setup all pools by doing the following steps: - /// 1. Get the migration lock in the primary. This makes sure that only - /// one node runs migrations - /// 2. Remove the views in `sharded` as they might interfere with - /// running migrations - /// 3. In parallel, do the following in each pool: - /// 1. Configure fdw servers - /// 2. Run migrations in all pools in parallel - /// 4. In parallel, do the following in each pool: - /// 1. Create/update the mappings in `shard__subgraphs` and in - /// `primary_public` - /// 5. Create the views in `sharded` again - /// 6. Release the migration lock - /// - /// This method tolerates databases that are not available and will - /// simply ignore them. The returned count is the number of pools that - /// were successfully set up. - /// - /// When this method returns, the entries from `states` that were - /// successfully set up will be marked as ready. The method returns the - /// number of pools that were set up - async fn setup(&self, states: Vec) -> Result { - type MigrationCounts = Vec<(PoolState, MigrationCount)>; - - /// Filter out pools that are not available. We don't want to fail - /// because one of the pools is not available. We will just ignore - /// them and continue with the others. - fn filter_unavailable( - (state, res): (PoolState, Result), - ) -> Option> { - if let Err(StoreError::DatabaseUnavailable) = res { - error!( - state.logger, - "migrations failed because database was unavailable" - ); - None - } else { - Some(res.map(|count| (state, count))) - } - } - - /// Migrate all pools in parallel - async fn migrate( - pools: &[PoolState], - servers: &[ForeignServer], - ) -> Result { - let futures = pools - .iter() - .map(|state| { - state - .get_unready() - .cheap_clone() - .migrate(servers) - .map(|res| (state.cheap_clone(), res)) - }) - .collect::>(); - join_all(futures) - .await - .into_iter() - .filter_map(filter_unavailable) - .collect::, _>>() - } - - /// Propagate the schema changes to all other pools in parallel - async fn propagate( - this: &PoolCoordinator, - migrated: MigrationCounts, - ) -> Result, StoreError> { - let futures = migrated - .into_iter() - .map(|(state, count)| async move { - let pool = state.get_unready(); - let res = this.propagate(&pool, count); - (state.cheap_clone(), res) - }) - .collect::>(); - join_all(futures) - .await - .into_iter() - .filter_map(filter_unavailable) - .map(|res| res.map(|(state, ())| state)) - .collect::, _>>() - } - - let primary = self.primary()?; - - let mut pconn = primary.get().map_err(|_| StoreError::DatabaseUnavailable)?; - - let states: Vec<_> = states - .into_iter() - .filter(|pool| pool.needs_setup()) - .collect(); - if states.is_empty() { - return Ok(0); - } - - // Everything here happens under the migration lock. Anything called - // from here should not try to get that lock, otherwise the process - // will deadlock - debug!(self.logger, "Waiting for migration lock"); - let res = with_migration_lock(&mut pconn, |_| async { - debug!(self.logger, "Migration lock acquired"); - - // While we were waiting for the migration lock, another thread - // might have already run this - let states: Vec<_> = states - .into_iter() - .filter(|pool| pool.needs_setup()) - .collect(); - if states.is_empty() { - debug!(self.logger, "No pools to set up"); - return Ok(0); - } - - primary.drop_cross_shard_views()?; - - let migrated = migrate(&states, self.servers.as_ref()).await?; - - let propagated = propagate(&self, migrated).await?; - - primary.create_cross_shard_views(&self.servers)?; - - for state in &propagated { - state.set_ready(); - } - Ok(propagated.len()) - }) - .await; - debug!(self.logger, "Database setup finished"); - - res - } -} From 69c182b81d7f471a7493af8f360a00117c331efe Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 11:01:04 -0700 Subject: [PATCH 107/160] store: Move PoolStateTracker and handlers to own module Also, rename it to just StateTracker --- store/postgres/src/pool/mod.rs | 219 +--------------------- store/postgres/src/pool/state_tracker.rs | 224 +++++++++++++++++++++++ 2 files changed, 232 insertions(+), 211 deletions(-) create mode 100644 store/postgres/src/pool/state_tracker.rs diff --git a/store/postgres/src/pool/mod.rs b/store/postgres/src/pool/mod.rs index 9d5b92d019e..5fcc7b0cd1c 100644 --- a/store/postgres/src/pool/mod.rs +++ b/store/postgres/src/pool/mod.rs @@ -1,7 +1,7 @@ use diesel::r2d2::Builder; use diesel::{connection::SimpleConnection, pg::PgConnection}; use diesel::{ - r2d2::{self, event as e, ConnectionManager, HandleEvent, Pool, PooledConnection}, + r2d2::{ConnectionManager, Pool, PooledConnection}, Connection, }; use diesel::{sql_query, RunQueryDsl}; @@ -14,15 +14,14 @@ use graph::internal_error; use graph::prelude::tokio::time::Instant; use graph::prelude::{ anyhow::anyhow, crit, debug, error, info, o, tokio::sync::Semaphore, CancelGuard, CancelHandle, - CancelToken as _, CancelableError, Counter, Gauge, Logger, MovingStats, PoolWaitStats, - StoreError, ENV_VARS, + CancelToken as _, CancelableError, Gauge, Logger, MovingStats, PoolWaitStats, StoreError, + ENV_VARS, }; use graph::prelude::{tokio, MetricsRegistry}; use graph::slog::warn; use graph::util::timed_rw_lock::TimedMutex; use std::fmt::{self}; -use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::time::Duration; use std::{collections::HashMap, sync::RwLock}; @@ -33,9 +32,11 @@ use crate::{Shard, PRIMARY_SHARD}; mod coordinator; mod foreign_server; +mod state_tracker; pub use coordinator::PoolCoordinator; pub use foreign_server::ForeignServer; +use state_tracker::{ErrorHandler, EventHandler, StateTracker}; /// The namespace under which the `PRIMARY_TABLES` are mapped into each /// shard @@ -204,7 +205,7 @@ impl PoolState { pub struct ConnectionPool { inner: PoolState, pub shard: Shard, - state_tracker: PoolStateTracker, + state_tracker: StateTracker, } impl fmt::Debug for ConnectionPool { @@ -240,47 +241,6 @@ impl PoolRole { } } -#[derive(Clone)] -struct PoolStateTracker { - available: Arc, - ignore_timeout: Arc, -} - -impl PoolStateTracker { - fn new() -> Self { - Self { - available: Arc::new(AtomicBool::new(true)), - ignore_timeout: Arc::new(AtomicBool::new(false)), - } - } - - fn mark_available(&self) { - self.available.store(true, Ordering::Relaxed); - } - - fn mark_unavailable(&self) { - self.available.store(false, Ordering::Relaxed); - } - - fn is_available(&self) -> bool { - self.available.load(Ordering::Relaxed) - } - - fn timeout_is_ignored(&self) -> bool { - self.ignore_timeout.load(Ordering::Relaxed) - } - - fn ignore_timeout(&self, f: F) -> R - where - F: FnOnce() -> R, - { - self.ignore_timeout.store(true, Ordering::Relaxed); - let res = f(); - self.ignore_timeout.store(false, Ordering::Relaxed); - res - } -} - impl ConnectionPool { fn create( shard_name: &str, @@ -292,7 +252,7 @@ impl ConnectionPool { registry: Arc, coord: Arc, ) -> ConnectionPool { - let state_tracker = PoolStateTracker::new(); + let state_tracker = StateTracker::new(); let shard = Shard::new(shard_name.to_string()).expect("shard_name is a valid name for a shard"); let inner = { @@ -461,169 +421,6 @@ impl ConnectionPool { } } -fn brief_error_msg(error: &dyn std::error::Error) -> String { - // For 'Connection refused' errors, Postgres includes the IP and - // port number in the error message. We want to suppress that and - // only use the first line from the error message. For more detailed - // analysis, 'Connection refused' manifests as a - // `ConnectionError(BadConnection("could not connect to server: - // Connection refused.."))` - error - .to_string() - .split('\n') - .next() - .unwrap_or("no error details provided") - .to_string() -} - -#[derive(Clone)] -struct ErrorHandler { - logger: Logger, - counter: Counter, - state_tracker: PoolStateTracker, -} - -impl ErrorHandler { - fn new(logger: Logger, counter: Counter, state_tracker: PoolStateTracker) -> Self { - Self { - logger, - counter, - state_tracker, - } - } -} -impl std::fmt::Debug for ErrorHandler { - fn fmt(&self, _f: &mut fmt::Formatter) -> fmt::Result { - fmt::Result::Ok(()) - } -} - -impl r2d2::HandleError for ErrorHandler { - fn handle_error(&self, error: r2d2::Error) { - let msg = brief_error_msg(&error); - - // Don't count canceling statements for timeouts etc. as a - // connection error. Unfortunately, we only have the textual error - // and need to infer whether the error indicates that the database - // is down or if something else happened. When querying a replica, - // these messages indicate that a query was canceled because it - // conflicted with replication, but does not indicate that there is - // a problem with the database itself. - // - // This check will break if users run Postgres (or even graph-node) - // in a locale other than English. In that case, their database will - // be marked as unavailable even though it is perfectly fine. - if msg.contains("canceling statement") - || msg.contains("terminating connection due to conflict with recovery") - { - return; - } - - self.counter.inc(); - if self.state_tracker.is_available() { - error!(self.logger, "Postgres connection error"; "error" => msg); - } - self.state_tracker.mark_unavailable(); - } -} - -#[derive(Clone)] -struct EventHandler { - logger: Logger, - count_gauge: Gauge, - wait_gauge: Gauge, - size_gauge: Gauge, - wait_stats: PoolWaitStats, - state_tracker: PoolStateTracker, -} - -impl EventHandler { - fn new( - logger: Logger, - registry: Arc, - wait_stats: PoolWaitStats, - const_labels: HashMap, - state_tracker: PoolStateTracker, - ) -> Self { - let count_gauge = registry - .global_gauge( - "store_connection_checkout_count", - "The number of Postgres connections currently checked out", - const_labels.clone(), - ) - .expect("failed to create `store_connection_checkout_count` counter"); - let wait_gauge = registry - .global_gauge( - "store_connection_wait_time_ms", - "Average connection wait time", - const_labels.clone(), - ) - .expect("failed to create `store_connection_wait_time_ms` counter"); - let size_gauge = registry - .global_gauge( - "store_connection_pool_size_count", - "Overall size of the connection pool", - const_labels, - ) - .expect("failed to create `store_connection_pool_size_count` counter"); - EventHandler { - logger, - count_gauge, - wait_gauge, - wait_stats, - size_gauge, - state_tracker, - } - } - - fn add_conn_wait_time(&self, duration: Duration) { - self.wait_stats - .write() - .unwrap() - .add_and_register(duration, &self.wait_gauge); - } -} - -impl std::fmt::Debug for EventHandler { - fn fmt(&self, _f: &mut fmt::Formatter) -> fmt::Result { - fmt::Result::Ok(()) - } -} - -impl HandleEvent for EventHandler { - fn handle_acquire(&self, _: e::AcquireEvent) { - self.size_gauge.inc(); - self.state_tracker.mark_available(); - } - - fn handle_release(&self, _: e::ReleaseEvent) { - self.size_gauge.dec(); - } - - fn handle_checkout(&self, event: e::CheckoutEvent) { - self.count_gauge.inc(); - self.add_conn_wait_time(event.duration()); - self.state_tracker.mark_available(); - } - - fn handle_timeout(&self, event: e::TimeoutEvent) { - if self.state_tracker.timeout_is_ignored() { - return; - } - self.add_conn_wait_time(event.timeout()); - if self.state_tracker.is_available() { - error!(self.logger, "Connection checkout timed out"; - "wait_ms" => event.timeout().as_millis() - ) - } - self.state_tracker.mark_unavailable(); - } - - fn handle_checkin(&self, _: e::CheckinEvent) { - self.count_gauge.dec(); - } -} - #[derive(Clone)] pub struct PoolInner { logger: Logger, @@ -662,7 +459,7 @@ impl PoolInner { fdw_pool_size: Option, logger: &Logger, registry: Arc, - state_tracker: PoolStateTracker, + state_tracker: StateTracker, ) -> PoolInner { check_mirrored_tables(); diff --git a/store/postgres/src/pool/state_tracker.rs b/store/postgres/src/pool/state_tracker.rs new file mode 100644 index 00000000000..231a66a9292 --- /dev/null +++ b/store/postgres/src/pool/state_tracker.rs @@ -0,0 +1,224 @@ +//! Event/error handlers for our r2d2 pools + +use diesel::r2d2::{self, event as e, HandleEvent}; + +use graph::prelude::error; +use graph::prelude::Counter; +use graph::prelude::Gauge; +use graph::prelude::MetricsRegistry; +use graph::prelude::PoolWaitStats; +use graph::slog::Logger; + +use std::collections::HashMap; +use std::fmt; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering; +use std::sync::Arc; +use std::time::Duration; + +/// Track whether a database is available or not using the event and error +/// handlers from this module. The pool must be set up with these handlers +/// when it is created +#[derive(Clone)] +pub(super) struct StateTracker { + available: Arc, + ignore_timeout: Arc, +} + +impl StateTracker { + pub(super) fn new() -> Self { + Self { + available: Arc::new(AtomicBool::new(true)), + ignore_timeout: Arc::new(AtomicBool::new(false)), + } + } + + pub(super) fn mark_available(&self) { + self.available.store(true, Ordering::Relaxed); + } + + fn mark_unavailable(&self) { + self.available.store(false, Ordering::Relaxed); + } + + pub(super) fn is_available(&self) -> bool { + self.available.load(Ordering::Relaxed) + } + + fn timeout_is_ignored(&self) -> bool { + self.ignore_timeout.load(Ordering::Relaxed) + } + + pub(super) fn ignore_timeout(&self, f: F) -> R + where + F: FnOnce() -> R, + { + self.ignore_timeout.store(true, Ordering::Relaxed); + let res = f(); + self.ignore_timeout.store(false, Ordering::Relaxed); + res + } +} + +#[derive(Clone)] +pub(super) struct ErrorHandler { + logger: Logger, + counter: Counter, + state_tracker: StateTracker, +} + +impl ErrorHandler { + pub(super) fn new(logger: Logger, counter: Counter, state_tracker: StateTracker) -> Self { + Self { + logger, + counter, + state_tracker, + } + } +} +impl std::fmt::Debug for ErrorHandler { + fn fmt(&self, _f: &mut fmt::Formatter) -> fmt::Result { + fmt::Result::Ok(()) + } +} + +impl r2d2::HandleError for ErrorHandler { + fn handle_error(&self, error: r2d2::Error) { + let msg = brief_error_msg(&error); + + // Don't count canceling statements for timeouts etc. as a + // connection error. Unfortunately, we only have the textual error + // and need to infer whether the error indicates that the database + // is down or if something else happened. When querying a replica, + // these messages indicate that a query was canceled because it + // conflicted with replication, but does not indicate that there is + // a problem with the database itself. + // + // This check will break if users run Postgres (or even graph-node) + // in a locale other than English. In that case, their database will + // be marked as unavailable even though it is perfectly fine. + if msg.contains("canceling statement") + || msg.contains("terminating connection due to conflict with recovery") + { + return; + } + + self.counter.inc(); + if self.state_tracker.is_available() { + error!(self.logger, "Postgres connection error"; "error" => msg); + } + self.state_tracker.mark_unavailable(); + } +} + +#[derive(Clone)] +pub(super) struct EventHandler { + logger: Logger, + count_gauge: Gauge, + wait_gauge: Gauge, + size_gauge: Gauge, + wait_stats: PoolWaitStats, + state_tracker: StateTracker, +} + +impl EventHandler { + pub(super) fn new( + logger: Logger, + registry: Arc, + wait_stats: PoolWaitStats, + const_labels: HashMap, + state_tracker: StateTracker, + ) -> Self { + let count_gauge = registry + .global_gauge( + "store_connection_checkout_count", + "The number of Postgres connections currently checked out", + const_labels.clone(), + ) + .expect("failed to create `store_connection_checkout_count` counter"); + let wait_gauge = registry + .global_gauge( + "store_connection_wait_time_ms", + "Average connection wait time", + const_labels.clone(), + ) + .expect("failed to create `store_connection_wait_time_ms` counter"); + let size_gauge = registry + .global_gauge( + "store_connection_pool_size_count", + "Overall size of the connection pool", + const_labels, + ) + .expect("failed to create `store_connection_pool_size_count` counter"); + EventHandler { + logger, + count_gauge, + wait_gauge, + wait_stats, + size_gauge, + state_tracker, + } + } + + fn add_conn_wait_time(&self, duration: Duration) { + self.wait_stats + .write() + .unwrap() + .add_and_register(duration, &self.wait_gauge); + } +} + +impl std::fmt::Debug for EventHandler { + fn fmt(&self, _f: &mut fmt::Formatter) -> fmt::Result { + fmt::Result::Ok(()) + } +} + +impl HandleEvent for EventHandler { + fn handle_acquire(&self, _: e::AcquireEvent) { + self.size_gauge.inc(); + self.state_tracker.mark_available(); + } + + fn handle_release(&self, _: e::ReleaseEvent) { + self.size_gauge.dec(); + } + + fn handle_checkout(&self, event: e::CheckoutEvent) { + self.count_gauge.inc(); + self.add_conn_wait_time(event.duration()); + self.state_tracker.mark_available(); + } + + fn handle_timeout(&self, event: e::TimeoutEvent) { + if self.state_tracker.timeout_is_ignored() { + return; + } + self.add_conn_wait_time(event.timeout()); + if self.state_tracker.is_available() { + error!(self.logger, "Connection checkout timed out"; + "wait_ms" => event.timeout().as_millis() + ) + } + self.state_tracker.mark_unavailable(); + } + + fn handle_checkin(&self, _: e::CheckinEvent) { + self.count_gauge.dec(); + } +} + +fn brief_error_msg(error: &dyn std::error::Error) -> String { + // For 'Connection refused' errors, Postgres includes the IP and + // port number in the error message. We want to suppress that and + // only use the first line from the error message. For more detailed + // analysis, 'Connection refused' manifests as a + // `ConnectionError(BadConnection("could not connect to server: + // Connection refused.."))` + error + .to_string() + .split('\n') + .next() + .unwrap_or("no error details provided") + .to_string() +} From ea250b9a8c2375bc4695fdbbebd6bc6805bc6e07 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 14 Apr 2025 11:18:42 -0700 Subject: [PATCH 108/160] node, store: Add 'graphman chain ingest' command --- node/src/bin/manager.rs | 21 ++++++++++++++++++- node/src/manager/commands/chain.rs | 33 ++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/node/src/bin/manager.rs b/node/src/bin/manager.rs index 29ef3ff290e..0b531ee004b 100644 --- a/node/src/bin/manager.rs +++ b/node/src/bin/manager.rs @@ -8,7 +8,7 @@ use graph::components::network_provider::ChainName; use graph::endpoint::EndpointMetrics; use graph::env::ENV_VARS; use graph::log::logger_with_levels; -use graph::prelude::{MetricsRegistry, BLOCK_NUMBER_MAX}; +use graph::prelude::{BlockNumber, MetricsRegistry, BLOCK_NUMBER_MAX}; use graph::{data::graphql::load_manager::LoadManager, prelude::chrono, prometheus::Registry}; use graph::{ prelude::{ @@ -585,6 +585,19 @@ pub enum ChainCommand { #[clap(value_parser = clap::builder::NonEmptyStringValueParser::new())] chain_name: String, }, + + /// Ingest a block into the block cache. + /// + /// This will overwrite any blocks we may already have in the block + /// cache, and can therefore be used to get rid of duplicate blocks in + /// the block cache as well as making sure that a certain block is in + /// the cache + Ingest { + /// The name of the chain + name: String, + /// The block number to ingest + number: BlockNumber, + }, } #[derive(Clone, Debug, Subcommand)] @@ -1450,6 +1463,12 @@ async fn main() -> anyhow::Result<()> { } } } + Ingest { name, number } => { + let logger = ctx.logger.cheap_clone(); + let (chain_store, ethereum_adapter) = + ctx.chain_store_and_adapter(&name).await?; + commands::chain::ingest(&logger, chain_store, ethereum_adapter, number).await + } } } Stats(cmd) => { diff --git a/node/src/manager/commands/chain.rs b/node/src/manager/commands/chain.rs index 90f428b6562..2c07c3d37b8 100644 --- a/node/src/manager/commands/chain.rs +++ b/node/src/manager/commands/chain.rs @@ -10,11 +10,16 @@ use graph::cheap_clone::CheapClone; use graph::components::network_provider::ChainIdentifierStore; use graph::components::network_provider::ChainName; use graph::components::store::StoreError; +use graph::futures03::compat::Future01CompatExt as _; use graph::prelude::BlockNumber; use graph::prelude::ChainStore as _; +use graph::prelude::LightEthereumBlockExt; use graph::prelude::{anyhow, anyhow::bail}; use graph::slog::Logger; use graph::{components::store::BlockStore as _, prelude::anyhow::Error}; +use graph_chain_ethereum::chain::BlockFinality; +use graph_chain_ethereum::EthereumAdapter; +use graph_chain_ethereum::EthereumAdapterTrait as _; use graph_store_postgres::add_chain; use graph_store_postgres::find_chain; use graph_store_postgres::update_chain_name; @@ -259,3 +264,31 @@ pub fn change_block_cache_shard( Ok(()) } + +pub async fn ingest( + logger: &Logger, + chain_store: Arc, + ethereum_adapter: Arc, + number: BlockNumber, +) -> Result<(), Error> { + let Some(block) = ethereum_adapter + .block_by_number(logger, number) + .compat() + .await + .map_err(|e| anyhow!("error getting block number {number}: {}", e))? + else { + bail!("block number {number} not found"); + }; + let ptr = block.block_ptr(); + // For inserting the block, it doesn't matter whether the block is final or not. + let block = Arc::new(BlockFinality::Final(Arc::new(block))); + chain_store.upsert_block(block).await?; + + let rows = chain_store.confirm_block_hash(ptr.number, &ptr.hash)?; + + println!("Inserted block {}", ptr); + if rows > 0 { + println!(" (also deleted {rows} duplicate row(s) with that number)"); + } + Ok(()) +} From feaea62d0cc701db9c41321bb9ff05c26f4d728d Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 14 Apr 2025 13:53:19 -0700 Subject: [PATCH 109/160] node: Allow connecting to only one chain 'graphman chain ingest' is taking forever since it connects to all chains, but we only need an adapter for a single chain --- node/src/bin/manager.rs | 14 +++++++++- node/src/chain.rs | 50 +++++++++++++++++++++++++++++++--- node/src/network_setup.rs | 57 ++++++++++++++++++++++++++++++++++----- 3 files changed, 111 insertions(+), 10 deletions(-) diff --git a/node/src/bin/manager.rs b/node/src/bin/manager.rs index 0b531ee004b..803625a6021 100644 --- a/node/src/bin/manager.rs +++ b/node/src/bin/manager.rs @@ -1026,7 +1026,19 @@ impl Context { self, chain_name: &str, ) -> anyhow::Result<(Arc, Arc)> { - let networks = self.networks().await?; + let logger = self.logger.clone(); + let registry = self.metrics_registry(); + let metrics = Arc::new(EndpointMetrics::mock()); + let networks = Networks::from_config_for_chain( + logger, + &self.config, + registry, + metrics, + &[], + chain_name, + ) + .await?; + let chain_store = self.chain_store(chain_name)?; let ethereum_adapter = networks .ethereum_rpcs(chain_name.into()) diff --git a/node/src/chain.rs b/node/src/chain.rs index 4ff45b8211a..e2325aa6c7a 100644 --- a/node/src/chain.rs +++ b/node/src/chain.rs @@ -48,10 +48,39 @@ pub enum ProviderNetworkStatus { }, } +pub trait ChainFilter: Send + Sync { + fn filter(&self, chain_name: &str) -> bool; +} + +pub struct AnyChainFilter; + +impl ChainFilter for AnyChainFilter { + fn filter(&self, _: &str) -> bool { + true + } +} + +pub struct OneChainFilter { + chain_name: String, +} + +impl OneChainFilter { + pub fn new(chain_name: String) -> Self { + Self { chain_name } + } +} + +impl ChainFilter for OneChainFilter { + fn filter(&self, chain_name: &str) -> bool { + self.chain_name == chain_name + } +} + pub fn create_substreams_networks( logger: Logger, config: &Config, endpoint_metrics: Arc, + chain_filter: &dyn ChainFilter, ) -> Vec { debug!( logger, @@ -63,7 +92,13 @@ pub fn create_substreams_networks( let mut networks_by_kind: BTreeMap<(BlockchainKind, ChainName), Vec>> = BTreeMap::new(); - for (name, chain) in &config.chains.chains { + let filtered_chains = config + .chains + .chains + .iter() + .filter(|(name, _)| chain_filter.filter(name)); + + for (name, chain) in filtered_chains { let name: ChainName = name.as_str().into(); for provider in &chain.providers { if let ProviderDetails::Substreams(ref firehose) = provider.details { @@ -113,6 +148,7 @@ pub fn create_firehose_networks( logger: Logger, config: &Config, endpoint_metrics: Arc, + chain_filter: &dyn ChainFilter, ) -> Vec { debug!( logger, @@ -124,7 +160,13 @@ pub fn create_firehose_networks( let mut networks_by_kind: BTreeMap<(BlockchainKind, ChainName), Vec>> = BTreeMap::new(); - for (name, chain) in &config.chains.chains { + let filtered_chains = config + .chains + .chains + .iter() + .filter(|(name, _)| chain_filter.filter(name)); + + for (name, chain) in filtered_chains { let name: ChainName = name.as_str().into(); for provider in &chain.providers { let logger = logger.cheap_clone(); @@ -179,11 +221,12 @@ pub fn create_firehose_networks( /// Parses all Ethereum connection strings and returns their network names and /// `EthereumAdapter`. -pub async fn create_all_ethereum_networks( +pub async fn create_ethereum_networks( logger: Logger, registry: Arc, config: &Config, endpoint_metrics: Arc, + chain_filter: &dyn ChainFilter, ) -> anyhow::Result> { let eth_rpc_metrics = Arc::new(ProviderEthRpcMetrics::new(registry)); let eth_networks_futures = config @@ -191,6 +234,7 @@ pub async fn create_all_ethereum_networks( .chains .iter() .filter(|(_, chain)| chain.protocol == BlockchainKind::Ethereum) + .filter(|(name, _)| chain_filter.filter(name)) .map(|(name, _)| { create_ethereum_networks_for_chain( &logger, diff --git a/node/src/network_setup.rs b/node/src/network_setup.rs index 1ebe2b5109c..55a4995eb6b 100644 --- a/node/src/network_setup.rs +++ b/node/src/network_setup.rs @@ -30,8 +30,8 @@ use graph_store_postgres::{BlockStore, ChainHeadUpdateListener}; use std::{any::Any, cmp::Ordering, sync::Arc, time::Duration}; use crate::chain::{ - create_all_ethereum_networks, create_firehose_networks, create_substreams_networks, - networks_as_chains, + create_ethereum_networks, create_firehose_networks, create_substreams_networks, + networks_as_chains, AnyChainFilter, ChainFilter, OneChainFilter, }; #[derive(Debug, Clone)] @@ -183,31 +183,38 @@ impl Networks { .await } - pub async fn from_config( + async fn from_config_inner( logger: Logger, config: &crate::config::Config, registry: Arc, endpoint_metrics: Arc, provider_checks: &[Arc], + chain_filter: &dyn ChainFilter, ) -> Result { if config.query_only(&config.node) { return Ok(Networks::noop()); } - let eth = create_all_ethereum_networks( + let eth = create_ethereum_networks( logger.cheap_clone(), registry, &config, endpoint_metrics.cheap_clone(), + chain_filter, ) .await?; let firehose = create_firehose_networks( logger.cheap_clone(), &config, endpoint_metrics.cheap_clone(), + chain_filter, + ); + let substreams = create_substreams_networks( + logger.cheap_clone(), + &config, + endpoint_metrics, + chain_filter, ); - let substreams = - create_substreams_networks(logger.cheap_clone(), &config, endpoint_metrics); let adapters: Vec<_> = eth .into_iter() .chain(firehose.into_iter()) @@ -217,6 +224,44 @@ impl Networks { Ok(Networks::new(&logger, adapters, provider_checks)) } + pub async fn from_config_for_chain( + logger: Logger, + config: &crate::config::Config, + registry: Arc, + endpoint_metrics: Arc, + provider_checks: &[Arc], + chain_name: &str, + ) -> Result { + let filter = OneChainFilter::new(chain_name.to_string()); + Self::from_config_inner( + logger, + config, + registry, + endpoint_metrics, + provider_checks, + &filter, + ) + .await + } + + pub async fn from_config( + logger: Logger, + config: &crate::config::Config, + registry: Arc, + endpoint_metrics: Arc, + provider_checks: &[Arc], + ) -> Result { + Self::from_config_inner( + logger, + config, + registry, + endpoint_metrics, + provider_checks, + &AnyChainFilter, + ) + .await + } + fn new( logger: &Logger, adapters: Vec, From 91a1208fa18165ce8c782dfb06499fa87c1b58ec Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 09:23:54 +0100 Subject: [PATCH 110/160] build(deps): bump tokio from 1.38.0 to 1.44.2 (#5946) Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.38.0 to 1.44.2. - [Release notes](https://github.com/tokio-rs/tokio/releases) - [Commits](https://github.com/tokio-rs/tokio/compare/tokio-1.38.0...tokio-1.44.2) --- updated-dependencies: - dependency-name: tokio dependency-version: 1.44.2 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 21 ++++++++++----------- Cargo.toml | 2 +- graph/Cargo.toml | 2 +- tests/Cargo.toml | 2 +- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 33815c70807..1f0af14b722 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3095,9 +3095,9 @@ checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] name = "libredox" @@ -3264,13 +3264,13 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.11" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ "libc", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -5294,28 +5294,27 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.38.0" +version = "1.44.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" +checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48" dependencies = [ "backtrace", "bytes", "libc", "mio", - "num_cpus", "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "tokio-macros" -version = "2.3.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index b938992bc30..18b28fa2e2b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,7 +85,7 @@ strum = { version = "0.26", features = ["derive"] } syn = { version = "2.0.87", features = ["full"] } test-store = { path = "./store/test-store" } thiserror = "1.0.25" -tokio = { version = "1.38.0", features = ["full"] } +tokio = { version = "1.44.2", features = ["full"] } tonic = { version = "0.12.3", features = ["tls-roots", "gzip"] } tonic-build = { version = "0.12.3", features = ["prost"] } tower-http = { version = "0.5.2", features = ["cors"] } diff --git a/graph/Cargo.toml b/graph/Cargo.toml index dc4bd6e42e9..5823159bd6f 100644 --- a/graph/Cargo.toml +++ b/graph/Cargo.toml @@ -65,7 +65,7 @@ slog-envlogger = "2.1.0" slog-term = "2.7.0" petgraph = "0.6.5" tiny-keccak = "1.5.0" -tokio = { version = "1.38.0", features = [ +tokio = { version = "1.44.2", features = [ "time", "sync", "macros", diff --git a/tests/Cargo.toml b/tests/Cargo.toml index ad4a4a9c785..6f5e317fa8b 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -19,7 +19,7 @@ graph-runtime-wasm = { path = "../runtime/wasm" } serde = { workspace = true } serde_yaml = { workspace = true } slog = { version = "2.7.0", features = ["release_max_level_trace", "max_level_trace"] } -tokio = { version = "1.38.0", features = ["rt", "macros", "process"] } +tokio = { version = "1.44.2", features = ["rt", "macros", "process"] } # Once graph upgrades to web3 0.19, we don't need this anymore. The version # here needs to be kept in sync with the web3 version that the graph crate # uses until then From fedf07ddd048cabd9d4c67e0a453f1107f498bb4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 09:24:11 +0100 Subject: [PATCH 111/160] build(deps): bump crossbeam-channel from 0.5.13 to 0.5.15 (#5939) Bumps [crossbeam-channel](https://github.com/crossbeam-rs/crossbeam) from 0.5.13 to 0.5.15. - [Release notes](https://github.com/crossbeam-rs/crossbeam/releases) - [Changelog](https://github.com/crossbeam-rs/crossbeam/blob/master/CHANGELOG.md) - [Commits](https://github.com/crossbeam-rs/crossbeam/compare/crossbeam-channel-0.5.13...crossbeam-channel-0.5.15) --- updated-dependencies: - dependency-name: crossbeam-channel dependency-version: 0.5.15 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1f0af14b722..2dd0edfb55f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -977,9 +977,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.13" +version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" dependencies = [ "crossbeam-utils", ] From 0edc5bff2aeeb60078a1f315f1fc2ad7c741dc5e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 09:24:57 +0100 Subject: [PATCH 112/160] build(deps): bump petgraph from 0.6.5 to 0.8.1 (#5931) Bumps [petgraph](https://github.com/petgraph/petgraph) from 0.6.5 to 0.8.1. - [Release notes](https://github.com/petgraph/petgraph/releases) - [Changelog](https://github.com/petgraph/petgraph/blob/master/CHANGELOG.md) - [Commits](https://github.com/petgraph/petgraph/compare/petgraph@v0.6.5...petgraph@v0.8.1) --- updated-dependencies: - dependency-name: petgraph dependency-version: 0.8.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 91 +++++++++++++++++++++++++++++++++--------------- graph/Cargo.toml | 2 +- 2 files changed, 64 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2dd0edfb55f..626d018d94f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,6 +57,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -199,7 +205,7 @@ dependencies = [ "futures-util", "handlebars", "http 1.1.0", - "indexmap 2.2.6", + "indexmap 2.9.0", "mime", "multer", "num-traits", @@ -266,7 +272,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "741110dda927420a28fbc1c310543d3416f789a6ba96859c2c265843a0a96887" dependencies = [ "bytes", - "indexmap 2.2.6", + "indexmap 2.9.0", "serde", "serde_json", ] @@ -1553,9 +1559,9 @@ dependencies = [ [[package]] name = "fixedbitset" -version = "0.4.2" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flate2" @@ -1573,6 +1579,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "foreign-types" version = "0.3.2" @@ -1767,7 +1779,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" dependencies = [ "fallible-iterator 0.3.0", - "indexmap 2.2.6", + "indexmap 2.9.0", "stable_deref_trait", ] @@ -1857,7 +1869,7 @@ dependencies = [ "num-traits", "object_store", "parking_lot", - "petgraph", + "petgraph 0.8.1", "priority-queue", "prometheus", "prost", @@ -2283,7 +2295,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.2.6", + "indexmap 2.9.0", "slab", "tokio", "tokio-util 0.7.11", @@ -2302,7 +2314,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.1.0", - "indexmap 2.2.6", + "indexmap 2.9.0", "slab", "tokio", "tokio-util 0.7.11", @@ -2347,6 +2359,17 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + [[package]] name = "hdrhistogram" version = "7.5.4" @@ -2868,12 +2891,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.6" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown 0.14.5", + "hashbrown 0.15.2", "serde", ] @@ -3404,7 +3427,7 @@ checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "crc32fast", "hashbrown 0.14.5", - "indexmap 2.2.6", + "indexmap 2.9.0", "memchr", ] @@ -3632,12 +3655,24 @@ dependencies = [ [[package]] name = "petgraph" -version = "0.6.5" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset", + "indexmap 2.9.0", +] + +[[package]] +name = "petgraph" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +checksum = "7a98c6720655620a521dcc722d0ad66cd8afd5d86e34a89ef691c50b7b24de06" dependencies = [ "fixedbitset", - "indexmap 2.2.6", + "hashbrown 0.15.2", + "indexmap 2.9.0", + "serde", ] [[package]] @@ -3814,7 +3849,7 @@ checksum = "70c501afe3a2e25c9bd219aa56ec1e04cdb3fcdd763055be268778c13fa82c1f" dependencies = [ "autocfg", "equivalent", - "indexmap 2.2.6", + "indexmap 2.9.0", ] [[package]] @@ -3884,7 +3919,7 @@ dependencies = [ "log", "multimap", "once_cell", - "petgraph", + "petgraph 0.7.1", "prettyplease", "prost", "prost-types", @@ -3939,7 +3974,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "322330e133eab455718444b4e033ebfac7c6528972c784fcde28d2cc783c6257" dependencies = [ "anyhow", - "indexmap 2.2.6", + "indexmap 2.9.0", "log", "protobuf 3.7.1", "protobuf-support", @@ -4618,7 +4653,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.2.6", + "indexmap 2.9.0", "itoa", "ryu", "serde", @@ -5493,7 +5528,7 @@ version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" dependencies = [ - "indexmap 2.2.6", + "indexmap 2.9.0", "toml_datetime", "winnow 0.5.40", ] @@ -5504,7 +5539,7 @@ version = "0.22.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f21c7aaf97f1bd9ca9d4f9e73b0a6c74bd5afef56f2bc931943a6e1c37e04e38" dependencies = [ - "indexmap 2.2.6", + "indexmap 2.9.0", "serde", "serde_spanned", "toml_datetime", @@ -6065,7 +6100,7 @@ version = "0.116.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a58e28b80dd8340cb07b8242ae654756161f6fc8d0038123d679b7b99964fa50" dependencies = [ - "indexmap 2.2.6", + "indexmap 2.9.0", "semver", ] @@ -6075,7 +6110,7 @@ version = "0.118.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77f1154f1ab868e2a01d9834a805faca7bf8b50d041b4ca714d005d0dab1c50c" dependencies = [ - "indexmap 2.2.6", + "indexmap 2.9.0", "semver", ] @@ -6091,7 +6126,7 @@ dependencies = [ "bumpalo", "cfg-if 1.0.0", "fxprof-processed-profile", - "indexmap 2.2.6", + "indexmap 2.9.0", "libc", "log", "object 0.32.2", @@ -6216,7 +6251,7 @@ dependencies = [ "anyhow", "cranelift-entity", "gimli 0.28.1", - "indexmap 2.2.6", + "indexmap 2.9.0", "log", "object 0.32.2", "serde", @@ -6301,7 +6336,7 @@ dependencies = [ "anyhow", "cc", "cfg-if 1.0.0", - "indexmap 2.2.6", + "indexmap 2.9.0", "libc", "log", "mach", @@ -6353,7 +6388,7 @@ checksum = "4b804dfd3d0c0d6d37aa21026fe7772ba1a769c89ee4f5c4f13b82d91d75216f" dependencies = [ "anyhow", "heck 0.4.1", - "indexmap 2.2.6", + "indexmap 2.9.0", "wit-parser", ] @@ -6713,7 +6748,7 @@ checksum = "316b36a9f0005f5aa4b03c39bc3728d045df136f8c13a73b7db4510dec725e08" dependencies = [ "anyhow", "id-arena", - "indexmap 2.2.6", + "indexmap 2.9.0", "log", "semver", "serde", diff --git a/graph/Cargo.toml b/graph/Cargo.toml index 5823159bd6f..190f671c4e6 100644 --- a/graph/Cargo.toml +++ b/graph/Cargo.toml @@ -63,7 +63,7 @@ strum_macros = "0.26.4" slog-async = "2.5.0" slog-envlogger = "2.1.0" slog-term = "2.7.0" -petgraph = "0.6.5" +petgraph = "0.8.1" tiny-keccak = "1.5.0" tokio = { version = "1.44.2", features = [ "time", From 647aac795ca0019be4f79a7d5f58c4bf09e61416 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 09:25:16 +0100 Subject: [PATCH 113/160] build(deps): bump openssl from 0.10.71 to 0.10.72 (#5928) Bumps [openssl](https://github.com/sfackler/rust-openssl) from 0.10.71 to 0.10.72. - [Release notes](https://github.com/sfackler/rust-openssl/releases) - [Commits](https://github.com/sfackler/rust-openssl/compare/openssl-v0.10.71...openssl-v0.10.72) --- updated-dependencies: - dependency-name: openssl dependency-version: 0.10.72 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 8 ++++---- store/postgres/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 626d018d94f..54eabed0833 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3484,9 +3484,9 @@ checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" [[package]] name = "openssl" -version = "0.10.71" +version = "0.10.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e14130c6a98cd258fdcb0fb6d744152343ff729cbfcb28c656a9d12b999fbcd" +checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" dependencies = [ "bitflags 2.6.0", "cfg-if 1.0.0", @@ -3516,9 +3516,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.106" +version = "0.9.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb61ea9811cc39e3c2069f40b8b8e2e70d8569b361f879786cc7ed48b777cdd" +checksum = "8288979acd84749c744a9014b4382d42b8f7b2592847b5afb2ed29e5d16ede07" dependencies = [ "cc", "libc", diff --git a/store/postgres/Cargo.toml b/store/postgres/Cargo.toml index 9a746646807..9d678971ef0 100644 --- a/store/postgres/Cargo.toml +++ b/store/postgres/Cargo.toml @@ -21,7 +21,7 @@ lazy_static = "1.5" lru_time_cache = "0.11" maybe-owned = "0.3.4" postgres = "0.19.1" -openssl = "0.10.71" +openssl = "0.10.72" postgres-openssl = "0.5.0" rand = "0.8.4" serde = { workspace = true } From 838b952365bd78cfcf0a2405cc90188629714ae0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 09:25:46 +0100 Subject: [PATCH 114/160] build(deps): bump reqwest from 0.12.5 to 0.12.15 (#5906) Bumps [reqwest](https://github.com/seanmonstar/reqwest) from 0.12.5 to 0.12.15. - [Release notes](https://github.com/seanmonstar/reqwest/releases) - [Changelog](https://github.com/seanmonstar/reqwest/blob/master/CHANGELOG.md) - [Commits](https://github.com/seanmonstar/reqwest/compare/v0.12.5...v0.12.15) --- updated-dependencies: - dependency-name: reqwest dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 164 +++++++++++++++++++++++++++++++++++++---------- Cargo.toml | 2 +- graph/Cargo.toml | 2 +- 3 files changed, 133 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 54eabed0833..2c29c805f4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2990,10 +2990,11 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -4243,9 +4244,9 @@ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "reqwest" -version = "0.12.5" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" +checksum = "d19c46a6fdd48bc4dab94b6103fccc55d34c67cc0ad04653aad4ea2a07cd7bbb" dependencies = [ "base64 0.22.1", "bytes", @@ -4272,7 +4273,7 @@ dependencies = [ "pin-project-lite", "quinn", "rustls", - "rustls-native-certs 0.7.1", + "rustls-native-certs 0.8.1", "rustls-pemfile", "rustls-pki-types", "serde", @@ -4284,13 +4285,14 @@ dependencies = [ "tokio-native-tls", "tokio-rustls", "tokio-util 0.7.11", + "tower 0.5.2", "tower-service 0.3.3", "url", "wasm-bindgen", "wasm-bindgen-futures", "wasm-streams", "web-sys", - "winreg", + "windows-registry", ] [[package]] @@ -5093,6 +5095,9 @@ name = "sync_wrapper" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +dependencies = [ + "futures-core", +] [[package]] name = "synstructure" @@ -5107,20 +5112,20 @@ dependencies = [ [[package]] name = "system-configuration" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", "core-foundation 0.9.4", "system-configuration-sys", ] [[package]] name = "system-configuration-sys" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" dependencies = [ "core-foundation-sys", "libc", @@ -5990,23 +5995,24 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if 1.0.0", + "once_cell", + "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", "syn 2.0.87", @@ -6027,9 +6033,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -6037,9 +6043,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", @@ -6050,9 +6056,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "wasm-encoder" @@ -6540,6 +6549,41 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + +[[package]] +name = "windows-registry" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets 0.53.0", +] + +[[package]] +name = "windows-result" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -6582,13 +6626,29 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -6601,6 +6661,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -6613,6 +6679,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -6625,12 +6697,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -6643,6 +6727,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -6655,6 +6745,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -6667,6 +6763,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -6679,6 +6781,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + [[package]] name = "winnow" version = "0.5.40" @@ -6697,16 +6805,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "winreg" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" -dependencies = [ - "cfg-if 1.0.0", - "windows-sys 0.48.0", -] - [[package]] name = "wiremock" version = "0.6.2" diff --git a/Cargo.toml b/Cargo.toml index 18b28fa2e2b..ffc3961d405 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,7 +70,7 @@ lazy_static = "1.5.0" prost = "0.13" prost-types = "0.13" regex = "1.5.4" -reqwest = "0.12.5" +reqwest = "0.12.15" serde = { version = "1.0.126", features = ["rc"] } serde_derive = "1.0.125" serde_json = { version = "1.0", features = ["arbitrary_precision"] } diff --git a/graph/Cargo.toml b/graph/Cargo.toml index 190f671c4e6..ecfc4251f32 100644 --- a/graph/Cargo.toml +++ b/graph/Cargo.toml @@ -24,7 +24,7 @@ chrono = "0.4.38" envconfig = "0.10.0" Inflector = "0.11.3" isatty = "0.1.9" -reqwest = { version = "0.12.5", features = ["json", "stream", "multipart"] } +reqwest = { version = "0.12.15", features = ["json", "stream", "multipart"] } ethabi = "17.2" hex = "0.4.3" http0 = { version = "0", package = "http" } From ca43f5080c4945c417ca4f2f7a707f954ea54b20 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 09:26:03 +0100 Subject: [PATCH 115/160] build(deps): bump protobuf-parse from 3.7.1 to 3.7.2 (#5905) Bumps [protobuf-parse](https://github.com/stepancheg/rust-protobuf) from 3.7.1 to 3.7.2. - [Changelog](https://github.com/stepancheg/rust-protobuf/blob/master/CHANGELOG.md) - [Commits](https://github.com/stepancheg/rust-protobuf/compare/v3.7.1...v3.7.2) --- updated-dependencies: - dependency-name: protobuf-parse dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 16 ++++++++-------- chain/common/Cargo.toml | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2c29c805f4a..39d3f9f66ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1929,7 +1929,7 @@ version = "0.36.0" dependencies = [ "anyhow", "heck 0.5.0", - "protobuf 3.7.1", + "protobuf 3.7.2", "protobuf-parse", ] @@ -3959,9 +3959,9 @@ checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" [[package]] name = "protobuf" -version = "3.7.1" +version = "3.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3a7c64d9bf75b1b8d981124c14c179074e8caa7dfe7b6a12e6222ddcd0c8f72" +checksum = "d65a1d4ddae7d8b5de68153b48f6aa3bba8cb002b243dbdbc55a5afbc98f99f4" dependencies = [ "once_cell", "protobuf-support", @@ -3970,14 +3970,14 @@ dependencies = [ [[package]] name = "protobuf-parse" -version = "3.7.1" +version = "3.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322330e133eab455718444b4e033ebfac7c6528972c784fcde28d2cc783c6257" +checksum = "b4aeaa1f2460f1d348eeaeed86aea999ce98c1bded6f089ff8514c9d9dbdc973" dependencies = [ "anyhow", "indexmap 2.9.0", "log", - "protobuf 3.7.1", + "protobuf 3.7.2", "protobuf-support", "tempfile", "thiserror 1.0.61", @@ -3986,9 +3986,9 @@ dependencies = [ [[package]] name = "protobuf-support" -version = "3.7.1" +version = "3.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b088fd20b938a875ea00843b6faf48579462630015c3788d397ad6a786663252" +checksum = "3e36c2f31e0a47f9280fb347ef5e461ffcd2c52dd520d8e216b52f93b0b0d7d6" dependencies = [ "thiserror 1.0.61", ] diff --git a/chain/common/Cargo.toml b/chain/common/Cargo.toml index 6c1cfd9dc03..eef11ed85a3 100644 --- a/chain/common/Cargo.toml +++ b/chain/common/Cargo.toml @@ -7,6 +7,6 @@ edition.workspace = true [dependencies] protobuf = "3.0.2" -protobuf-parse = "3.7.1" +protobuf-parse = "3.7.2" anyhow = "1" heck = "0.5" From 718963dca7ff800ef3544cc8a9ad9749054c6423 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 09:26:22 +0100 Subject: [PATCH 116/160] build(deps): bump envconfig from 0.10.0 to 0.11.0 (#5890) Bumps [envconfig](https://github.com/greyblake/envconfig-rs) from 0.10.0 to 0.11.0. - [Changelog](https://github.com/greyblake/envconfig-rs/blob/master/CHANGELOG.md) - [Commits](https://github.com/greyblake/envconfig-rs/compare/v0.10.0...v0.11.0) --- updated-dependencies: - dependency-name: envconfig dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 10 +++++----- chain/ethereum/Cargo.toml | 2 +- graph/Cargo.toml | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 39d3f9f66ed..1585276c7c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1428,22 +1428,22 @@ dependencies = [ [[package]] name = "envconfig" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea81cc7e21f55a9d9b1efb6816904978d0bfbe31a50347cb24b2e75564bcac9b" +checksum = "3c1d02ec9fdd0a585580bdc8fb7ad01675eee5e3b7336cedbabe3aab4a026dbc" dependencies = [ "envconfig_derive", ] [[package]] name = "envconfig_derive" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dfca278e5f84b45519acaaff758ebfa01f18e96998bc24b8f1b722dd804b9bf" +checksum = "d4291f0c7220b67ad15e9d5300ba2f215cee504f0924d60e77c9d1c77e7a69b1" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.87", ] [[package]] diff --git a/chain/ethereum/Cargo.toml b/chain/ethereum/Cargo.toml index 43d1afb9bd3..c72772aaa95 100644 --- a/chain/ethereum/Cargo.toml +++ b/chain/ethereum/Cargo.toml @@ -4,7 +4,7 @@ version.workspace = true edition.workspace = true [dependencies] -envconfig = "0.10.0" +envconfig = "0.11.0" jsonrpc-core = "18.0.0" graph = { path = "../../graph" } serde = { workspace = true } diff --git a/graph/Cargo.toml b/graph/Cargo.toml index ecfc4251f32..9e05b88c48b 100644 --- a/graph/Cargo.toml +++ b/graph/Cargo.toml @@ -21,7 +21,7 @@ graph_derive = { path = "./derive" } diesel = { workspace = true } diesel_derives = { workspace = true } chrono = "0.4.38" -envconfig = "0.10.0" +envconfig = "0.11.0" Inflector = "0.11.3" isatty = "0.1.9" reqwest = { version = "0.12.15", features = ["json", "stream", "multipart"] } From 6efa1a9945c0d37376028b3b226a635747d0ed94 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 09:26:42 +0100 Subject: [PATCH 117/160] build(deps): bump pretty_assertions from 1.4.0 to 1.4.1 (#5887) Bumps [pretty_assertions](https://github.com/rust-pretty-assertions/rust-pretty-assertions) from 1.4.0 to 1.4.1. - [Release notes](https://github.com/rust-pretty-assertions/rust-pretty-assertions/releases) - [Changelog](https://github.com/rust-pretty-assertions/rust-pretty-assertions/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-pretty-assertions/rust-pretty-assertions/compare/v1.4.0...v1.4.1) --- updated-dependencies: - dependency-name: pretty_assertions dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 8 ++++---- store/postgres/Cargo.toml | 2 +- store/test-store/Cargo.toml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1585276c7c5..7a7a50ff07b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3811,9 +3811,9 @@ dependencies = [ [[package]] name = "pretty_assertions" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" dependencies = [ "diff", "yansi", @@ -6884,9 +6884,9 @@ checksum = "63658493314859b4dfdf3fb8c1defd61587839def09582db50b8a4e93afca6bb" [[package]] name = "yansi" -version = "0.5.1" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" [[package]] name = "yoke" diff --git a/store/postgres/Cargo.toml b/store/postgres/Cargo.toml index 9d678971ef0..fc16a9f6cda 100644 --- a/store/postgres/Cargo.toml +++ b/store/postgres/Cargo.toml @@ -31,7 +31,7 @@ anyhow = "1.0.86" git-testament = "0.2.5" itertools = "0.13.0" hex = "0.4.3" -pretty_assertions = "1.4.0" +pretty_assertions = "1.4.1" [dev-dependencies] clap.workspace = true diff --git a/store/test-store/Cargo.toml b/store/test-store/Cargo.toml index fe05f12233e..2435b447570 100644 --- a/store/test-store/Cargo.toml +++ b/store/test-store/Cargo.toml @@ -18,4 +18,4 @@ prost-types = { workspace = true } [dev-dependencies] hex = "0.4.3" -pretty_assertions = "1.4.0" +pretty_assertions = "1.4.1" From 7e44e9da9a4343f27e843231fd067b16018bc82f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 09:26:59 +0100 Subject: [PATCH 118/160] build(deps): bump proc-macro2 from 1.0.86 to 1.0.94 (#5886) Bumps [proc-macro2](https://github.com/dtolnay/proc-macro2) from 1.0.86 to 1.0.94. - [Release notes](https://github.com/dtolnay/proc-macro2/releases) - [Commits](https://github.com/dtolnay/proc-macro2/compare/1.0.86...1.0.94) --- updated-dependencies: - dependency-name: proc-macro2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- graph/derive/Cargo.toml | 2 +- runtime/derive/Cargo.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7a7a50ff07b..22a49d067ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3875,9 +3875,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" dependencies = [ "unicode-ident", ] diff --git a/graph/derive/Cargo.toml b/graph/derive/Cargo.toml index 3598e9022a6..f43691ba463 100644 --- a/graph/derive/Cargo.toml +++ b/graph/derive/Cargo.toml @@ -14,7 +14,7 @@ proc-macro = true [dependencies] syn = { workspace = true } quote = "1.0" -proc-macro2 = "1.0.85" +proc-macro2 = "1.0.94" heck = "0.5" [dev-dependencies] diff --git a/runtime/derive/Cargo.toml b/runtime/derive/Cargo.toml index 9019e5ad36e..bc3f74ec9f6 100644 --- a/runtime/derive/Cargo.toml +++ b/runtime/derive/Cargo.toml @@ -9,5 +9,5 @@ proc-macro = true [dependencies] syn = { workspace = true } quote = "1.0" -proc-macro2 = "1.0.85" +proc-macro2 = "1.0.94" heck = "0.5" From 92312d2782975b4d1019de1fc9eedbe09eecb97e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 09:27:19 +0100 Subject: [PATCH 119/160] build(deps): bump strum_macros from 0.26.4 to 0.27.1 (#5884) Bumps [strum_macros](https://github.com/Peternator7/strum) from 0.26.4 to 0.27.1. - [Release notes](https://github.com/Peternator7/strum/releases) - [Changelog](https://github.com/Peternator7/strum/blob/master/CHANGELOG.md) - [Commits](https://github.com/Peternator7/strum/commits/v0.27.1) --- updated-dependencies: - dependency-name: strum_macros dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 17 +++++++++++++++-- graph/Cargo.toml | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 22a49d067ab..e6e0f3bdff8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1892,7 +1892,7 @@ dependencies = [ "sqlparser", "stable-hash 0.3.4", "stable-hash 0.4.4", - "strum_macros", + "strum_macros 0.27.1", "thiserror 1.0.61", "tiny-keccak 1.5.0", "tokio", @@ -4962,7 +4962,7 @@ version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" dependencies = [ - "strum_macros", + "strum_macros 0.26.4", ] [[package]] @@ -4978,6 +4978,19 @@ dependencies = [ "syn 2.0.87", ] +[[package]] +name = "strum_macros" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c77a8c5abcaf0f9ce05d62342b7d298c346515365c36b673df4ebe3ced01fde8" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.87", +] + [[package]] name = "substreams" version = "0.6.0" diff --git a/graph/Cargo.toml b/graph/Cargo.toml index 9e05b88c48b..4459cd316c1 100644 --- a/graph/Cargo.toml +++ b/graph/Cargo.toml @@ -59,7 +59,7 @@ sqlparser = { workspace = true } # stable-hash = { version = "0.4.2" } stable-hash = { git = "https://github.com/graphprotocol/stable-hash", branch = "main" } stable-hash_legacy = { git = "https://github.com/graphprotocol/stable-hash", branch = "old", package = "stable-hash", doc = false } -strum_macros = "0.26.4" +strum_macros = "0.27.1" slog-async = "2.5.0" slog-envlogger = "2.1.0" slog-term = "2.7.0" From 6c989037c24cc7a1a064d4831c431a2507197ef4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 09:28:06 +0100 Subject: [PATCH 120/160] build(deps): bump git-testament from 0.2.5 to 0.2.6 (#5882) Bumps [git-testament](https://github.com/kinnison/git-testament) from 0.2.5 to 0.2.6. - [Commits](https://github.com/kinnison/git-testament/compare/0.2.5...0.2.6) --- updated-dependencies: - dependency-name: git-testament dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 8 ++++---- server/index-node/Cargo.toml | 2 +- store/postgres/Cargo.toml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e6e0f3bdff8..29a2cefe90d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1791,18 +1791,18 @@ checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" [[package]] name = "git-testament" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "710c78d2b68e46e62f5ba63ba0a7a2986640f37f9ecc07903b9ad4e7b2dbfc8e" +checksum = "5a74999c921479f919c87a9d2e6922a79a18683f18105344df8e067149232e51" dependencies = [ "git-testament-derive", ] [[package]] name = "git-testament-derive" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b31494efbbe1a6730f6943759c21b92c8dc431cb4df177e6f2a6429c3c96842" +checksum = "bbeac967e71eb3dc1656742fc7521ec7cd3b6b88738face65bf1fddf702bc4c0" dependencies = [ "log", "proc-macro2", diff --git a/server/index-node/Cargo.toml b/server/index-node/Cargo.toml index 72b7ff869f7..63c68a311a8 100644 --- a/server/index-node/Cargo.toml +++ b/server/index-node/Cargo.toml @@ -11,4 +11,4 @@ graph-chain-arweave = { path = "../../chain/arweave" } graph-chain-ethereum = { path = "../../chain/ethereum" } graph-chain-near = { path = "../../chain/near" } graph-chain-substreams = { path = "../../chain/substreams" } -git-testament = "0.2.5" +git-testament = "0.2.6" diff --git a/store/postgres/Cargo.toml b/store/postgres/Cargo.toml index fc16a9f6cda..7574eaf80a6 100644 --- a/store/postgres/Cargo.toml +++ b/store/postgres/Cargo.toml @@ -28,7 +28,7 @@ serde = { workspace = true } serde_json = { workspace = true } stable-hash_legacy = { git = "https://github.com/graphprotocol/stable-hash", branch = "old", package = "stable-hash" } anyhow = "1.0.86" -git-testament = "0.2.5" +git-testament = "0.2.6" itertools = "0.13.0" hex = "0.4.3" pretty_assertions = "1.4.1" From 35f94971cd8d66167534deeafed5ac6afc20fa9a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 09:39:35 +0100 Subject: [PATCH 121/160] build(deps): bump priority-queue from 2.0.3 to 2.3.1 (#5904) Bumps [priority-queue](https://github.com/garro95/priority-queue) from 2.0.3 to 2.3.1. - [Release notes](https://github.com/garro95/priority-queue/releases) - [Commits](https://github.com/garro95/priority-queue/compare/2.0.3...2.3.1) --- updated-dependencies: - dependency-name: priority-queue dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- graph/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 29a2cefe90d..88389d7dd73 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3844,9 +3844,9 @@ dependencies = [ [[package]] name = "priority-queue" -version = "2.0.3" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70c501afe3a2e25c9bd219aa56ec1e04cdb3fcdd763055be268778c13fa82c1f" +checksum = "ef08705fa1589a1a59aa924ad77d14722cb0cd97b67dd5004ed5f4a4873fce8d" dependencies = [ "autocfg", "equivalent", diff --git a/graph/Cargo.toml b/graph/Cargo.toml index 4459cd316c1..6547d0281c6 100644 --- a/graph/Cargo.toml +++ b/graph/Cargo.toml @@ -78,7 +78,7 @@ tokio-retry = "0.3.0" toml = "0.8.8" url = "2.5.4" prometheus = "0.13.4" -priority-queue = "2.0.3" +priority-queue = "2.3.1" tonic = { workspace = true } prost = { workspace = true } prost-types = { workspace = true } From c19de01a21bc5484dc8fb6f5a21540db69bd733e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 10:09:55 +0100 Subject: [PATCH 122/160] build(deps): bump derive_more from 0.99.18 to 2.0.1 (#5903) * build(deps): bump derive_more from 0.99.18 to 2.0.1 Bumps [derive_more](https://github.com/JelteF/derive_more) from 0.99.18 to 2.0.1. - [Release notes](https://github.com/JelteF/derive_more/releases) - [Changelog](https://github.com/JelteF/derive_more/blob/master/CHANGELOG.md) - [Commits](https://github.com/JelteF/derive_more/compare/v0.99.18...v2.0.1) --- updated-dependencies: - dependency-name: derive_more dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * fix version --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Filipe Azevedo --- Cargo.lock | 47 +++++++++++++++++++++++++++---- store/postgres/Cargo.toml | 2 +- store/postgres/src/block_range.rs | 1 + store/postgres/src/lib.rs | 2 -- 4 files changed, 44 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 88389d7dd73..ec8c31e1233 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -797,6 +797,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +[[package]] +name = "convert_case" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb402b8d4c85569410425650ce3eddc7d698ed96d39a73f941b08fb63082f1e7" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -1188,17 +1197,39 @@ dependencies = [ [[package]] name = "derive_more" -version = "0.99.18" +version = "0.99.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" +checksum = "3da29a38df43d6f156149c9b43ded5e018ddff2a855cf2cfd62e8cd7d079c69f" dependencies = [ - "convert_case", + "convert_case 0.4.0", "proc-macro2", "quote", "rustc_version", "syn 2.0.87", ] +[[package]] +name = "derive_more" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" +dependencies = [ + "convert_case 0.7.1", + "proc-macro2", + "quote", + "syn 2.0.87", + "unicode-xid", +] + [[package]] name = "diesel" version = "2.2.7" @@ -2150,7 +2181,7 @@ dependencies = [ "blake3 1.6.1", "chrono", "clap", - "derive_more", + "derive_more 2.0.1", "diesel", "diesel-derive-enum", "diesel-dynamic-schema", @@ -5862,6 +5893,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4259d9d4425d9f0661581b804cb85fe66a4c631cadd8f490d1c13a35d5d9291" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unicode-width" version = "0.1.13" @@ -6460,7 +6497,7 @@ dependencies = [ "arrayvec 0.7.4", "base64 0.13.1", "bytes", - "derive_more", + "derive_more 0.99.19", "ethabi", "ethereum-types", "futures 0.3.30", diff --git a/store/postgres/Cargo.toml b/store/postgres/Cargo.toml index 7574eaf80a6..c95b3cb83a4 100644 --- a/store/postgres/Cargo.toml +++ b/store/postgres/Cargo.toml @@ -7,7 +7,7 @@ edition.workspace = true async-trait = "0.1.50" blake3 = "1.6" chrono = { workspace = true } -derive_more = { version = "0.99.18" } +derive_more = { version = "2.0.1", features = ["full"] } diesel = { workspace = true } diesel-dynamic-schema = { workspace = true } diesel-derive-enum = { workspace = true } diff --git a/store/postgres/src/block_range.rs b/store/postgres/src/block_range.rs index 7dbcaa29c00..d6044c644ad 100644 --- a/store/postgres/src/block_range.rs +++ b/store/postgres/src/block_range.rs @@ -1,3 +1,4 @@ +use derive_more::Constructor; use diesel::pg::Pg; use diesel::query_builder::{AstPass, QueryFragment}; use diesel::result::QueryResult; diff --git a/store/postgres/src/lib.rs b/store/postgres/src/lib.rs index 42c439d2f3c..25c8c285910 100644 --- a/store/postgres/src/lib.rs +++ b/store/postgres/src/lib.rs @@ -2,8 +2,6 @@ //! [Store] for the details of how the store is organized across //! different databases/shards. -#[macro_use] -extern crate derive_more; #[macro_use] extern crate diesel; #[macro_use] From c72998b93230e2e84f5618818020b87f36da8c36 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 12:24:11 -0700 Subject: [PATCH 123/160] core: Extract transact_block_state into function Reduce the duplication between normal and wasm block processing by factoring the common part into a function --- core/src/subgraph/runner.rs | 433 ++++++++++++++---------------------- 1 file changed, 173 insertions(+), 260 deletions(-) diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index 71c36886d2e..fef9b6c0c2e 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -356,6 +356,157 @@ where } } + async fn transact_block_state( + &mut self, + logger: &Logger, + block_ptr: BlockPtr, + firehose_cursor: FirehoseCursor, + block_time: BlockTime, + mut block_state: BlockState, + proof_of_indexing: SharedProofOfIndexing, + offchain_mods: Vec, + processed_offchain_data_sources: Vec, + cancel_handle: &CancelHandle, + ) -> Result { + let has_errors = block_state.has_errors(); + let is_non_fatal_errors_active = self + .inputs + .features + .contains(&SubgraphFeature::NonFatalErrors); + + // Avoid writing to store if block stream has been canceled + if cancel_handle.is_canceled() { + return Err(ProcessingError::Canceled); + } + + if let Some(proof_of_indexing) = proof_of_indexing.into_inner() { + update_proof_of_indexing( + proof_of_indexing, + block_time, + &self.metrics.host.stopwatch, + &mut block_state.entity_cache, + ) + .await + .non_deterministic()?; + } + + let section = self + .metrics + .host + .stopwatch + .start_section("as_modifications"); + let ModificationsAndCache { + modifications: mut mods, + entity_lfu_cache: cache, + evict_stats, + } = block_state + .entity_cache + .as_modifications(block_ptr.number) + .map_err(|e| ProcessingError::Unknown(e.into()))?; + section.end(); + + trace!(self.logger, "Entity cache statistics"; + "weight" => evict_stats.new_weight, + "evicted_weight" => evict_stats.evicted_weight, + "count" => evict_stats.new_count, + "evicted_count" => evict_stats.evicted_count, + "stale_update" => evict_stats.stale_update, + "hit_rate" => format!("{:.0}%", evict_stats.hit_rate_pct()), + "accesses" => evict_stats.accesses, + "evict_time_ms" => evict_stats.evict_time.as_millis()); + + mods.extend(offchain_mods); + + // Put the cache back in the state, asserting that the placeholder cache was not used. + assert!(self.state.entity_lfu_cache.is_empty()); + self.state.entity_lfu_cache = cache; + + if !mods.is_empty() { + info!(&logger, "Applying {} entity operation(s)", mods.len()); + } + + let err_count = block_state.deterministic_errors.len(); + for (i, e) in block_state.deterministic_errors.iter().enumerate() { + let message = format!("{:#}", e).replace('\n', "\t"); + error!(&logger, "Subgraph error {}/{}", i + 1, err_count; + "error" => message, + "code" => LogCode::SubgraphSyncingFailure + ); + } + + // Transact entity operations into the store and update the + // subgraph's block stream pointer + let _section = self.metrics.host.stopwatch.start_section("transact_block"); + let start = Instant::now(); + + // If a deterministic error has happened, make the PoI to be the only entity that'll be stored. + if has_errors && !is_non_fatal_errors_active { + let is_poi_entity = + |entity_mod: &EntityModification| entity_mod.key().entity_type.is_poi(); + mods.retain(is_poi_entity); + // Confidence check + assert!( + mods.len() == 1, + "There should be only one PoI EntityModification" + ); + } + + let BlockState { + deterministic_errors, + persisted_data_sources, + metrics: block_state_metrics, + .. + } = block_state; + + let first_error = deterministic_errors.first().cloned(); + + let is_caught_up = self.is_caught_up(&block_ptr).await.non_deterministic()?; + + self.inputs + .store + .transact_block_operations( + block_ptr.clone(), + block_time, + firehose_cursor, + mods, + &self.metrics.host.stopwatch, + persisted_data_sources, + deterministic_errors, + processed_offchain_data_sources, + is_non_fatal_errors_active, + is_caught_up, + ) + .await + .classify() + .detail("Failed to transact block operations")?; + + // For subgraphs with `nonFatalErrors` feature disabled, we consider + // any error as fatal. + // + // So we do an early return to make the subgraph stop processing blocks. + // + // In this scenario the only entity that is stored/transacted is the PoI, + // all of the others are discarded. + if has_errors && !is_non_fatal_errors_active { + // Only the first error is reported. + return Err(ProcessingError::Deterministic(Box::new( + first_error.unwrap(), + ))); + } + + let elapsed = start.elapsed().as_secs_f64(); + self.metrics + .subgraph + .block_ops_transaction_duration + .observe(elapsed); + + block_state_metrics + .flush_metrics_to_store(&logger, block_ptr, self.inputs.deployment.id) + .non_deterministic()?; + + Ok(has_errors) + } + /// Processes a block and returns the updated context and a boolean flag indicating /// whether new dynamic data sources have been added to the subgraph. async fn process_block( @@ -625,55 +776,6 @@ where } } - let has_errors = block_state.has_errors(); - let is_non_fatal_errors_active = self - .inputs - .features - .contains(&SubgraphFeature::NonFatalErrors); - - // Apply entity operations and advance the stream - - // Avoid writing to store if block stream has been canceled - if block_stream_cancel_handle.is_canceled() { - return Err(ProcessingError::Canceled); - } - - if let Some(proof_of_indexing) = proof_of_indexing.into_inner() { - update_proof_of_indexing( - proof_of_indexing, - block.timestamp(), - &self.metrics.host.stopwatch, - &mut block_state.entity_cache, - ) - .await - .non_deterministic()?; - } - - let section = self - .metrics - .host - .stopwatch - .start_section("as_modifications"); - let ModificationsAndCache { - modifications: mut mods, - entity_lfu_cache: cache, - evict_stats, - } = block_state - .entity_cache - .as_modifications(block.number()) - .map_err(|e| ProcessingError::Unknown(e.into()))?; - section.end(); - - trace!(self.logger, "Entity cache statistics"; - "weight" => evict_stats.new_weight, - "evicted_weight" => evict_stats.evicted_weight, - "count" => evict_stats.new_count, - "evicted_count" => evict_stats.evicted_count, - "stale_update" => evict_stats.stale_update, - "hit_rate" => format!("{:.0}%", evict_stats.hit_rate_pct()), - "accesses" => evict_stats.accesses, - "evict_time_ms" => evict_stats.evict_time.as_millis()); - // Check for offchain events and process them, including their entity modifications in the // set to be transacted. let offchain_events = self @@ -685,95 +787,23 @@ where self.handle_offchain_triggers(offchain_events, &block) .await .non_deterministic()?; - mods.extend(offchain_mods); - - // Put the cache back in the state, asserting that the placeholder cache was not used. - assert!(self.state.entity_lfu_cache.is_empty()); - self.state.entity_lfu_cache = cache; + block_state + .persisted_data_sources + .extend(persisted_off_chain_data_sources); - if !mods.is_empty() { - info!(&logger, "Applying {} entity operation(s)", mods.len()); - } - - let err_count = block_state.deterministic_errors.len(); - for (i, e) in block_state.deterministic_errors.iter().enumerate() { - let message = format!("{:#}", e).replace('\n', "\t"); - error!(&logger, "Subgraph error {}/{}", i + 1, err_count; - "error" => message, - "code" => LogCode::SubgraphSyncingFailure - ); - } - - // Transact entity operations into the store and update the - // subgraph's block stream pointer - let _section = self.metrics.host.stopwatch.start_section("transact_block"); - let start = Instant::now(); - - // If a deterministic error has happened, make the PoI to be the only entity that'll be stored. - if has_errors && !is_non_fatal_errors_active { - let is_poi_entity = - |entity_mod: &EntityModification| entity_mod.key().entity_type.is_poi(); - mods.retain(is_poi_entity); - // Confidence check - assert!( - mods.len() == 1, - "There should be only one PoI EntityModification" - ); - } - - let BlockState { - deterministic_errors, - mut persisted_data_sources, - metrics: block_state_metrics, - .. - } = block_state; - - let first_error = deterministic_errors.first().cloned(); - - let is_caught_up = self.is_caught_up(&block_ptr).await.non_deterministic()?; - - persisted_data_sources.extend(persisted_off_chain_data_sources); - self.inputs - .store - .transact_block_operations( + let has_errors = self + .transact_block_state( + &logger, block_ptr.clone(), + firehose_cursor.clone(), block.timestamp(), - firehose_cursor, - mods, - &self.metrics.host.stopwatch, - persisted_data_sources, - deterministic_errors, + block_state, + proof_of_indexing, + offchain_mods, processed_offchain_data_sources, - is_non_fatal_errors_active, - is_caught_up, + block_stream_cancel_handle, ) - .await - .classify() - .detail("Failed to transact block operations")?; - - // For subgraphs with `nonFatalErrors` feature disabled, we consider - // any error as fatal. - // - // So we do an early return to make the subgraph stop processing blocks. - // - // In this scenario the only entity that is stored/transacted is the PoI, - // all of the others are discarded. - if has_errors && !is_non_fatal_errors_active { - // Only the first error is reported. - return Err(ProcessingError::Deterministic(Box::new( - first_error.unwrap(), - ))); - } - - let elapsed = start.elapsed().as_secs_f64(); - self.metrics - .subgraph - .block_ops_transaction_duration - .observe(elapsed); - - block_state_metrics - .flush_metrics_to_store(&logger, block_ptr, self.inputs.deployment.id) - .non_deterministic()?; + .await?; // To prevent a buggy pending version from replacing a current version, if errors are // present the subgraph will be unassigned. @@ -1336,7 +1366,7 @@ where // Causality region for onchain triggers. let causality_region = PoICausalityRegion::from_network(&self.inputs.network); - let mut block_state = { + let block_state = { match self .process_wasm_block( &proof_of_indexing, @@ -1371,136 +1401,19 @@ where } }; - let has_errors = block_state.has_errors(); - let is_non_fatal_errors_active = self - .inputs - .features - .contains(&SubgraphFeature::NonFatalErrors); - - // Apply entity operations and advance the stream - - // Avoid writing to store if block stream has been canceled - if cancel_handle.is_canceled() { - return Err(ProcessingError::Canceled.into()); - } - - if let Some(proof_of_indexing) = proof_of_indexing.into_inner() { - update_proof_of_indexing( - proof_of_indexing, - block_time, - &self.metrics.host.stopwatch, - &mut block_state.entity_cache, - ) - .await - .non_deterministic()?; - } - - let section = self - .metrics - .host - .stopwatch - .start_section("as_modifications"); - let ModificationsAndCache { - modifications: mut mods, - entity_lfu_cache: cache, - evict_stats, - } = block_state - .entity_cache - .as_modifications(block_ptr.number) - .map_err(|e| ProcessingError::Unknown(e.into()))?; - section.end(); - - trace!(self.logger, "Entity cache statistics"; - "weight" => evict_stats.new_weight, - "evicted_weight" => evict_stats.evicted_weight, - "count" => evict_stats.new_count, - "evicted_count" => evict_stats.evicted_count, - "stale_update" => evict_stats.stale_update, - "hit_rate" => format!("{:.0}%", evict_stats.hit_rate_pct()), - "accesses" => evict_stats.accesses, - "evict_time_ms" => evict_stats.evict_time.as_millis()); - - // Put the cache back in the state, asserting that the placeholder cache was not used. - assert!(self.state.entity_lfu_cache.is_empty()); - self.state.entity_lfu_cache = cache; - - if !mods.is_empty() { - info!(&logger, "Applying {} entity operation(s)", mods.len()); - } - - let err_count = block_state.deterministic_errors.len(); - for (i, e) in block_state.deterministic_errors.iter().enumerate() { - let message = format!("{:#}", e).replace('\n', "\t"); - error!(&logger, "Subgraph error {}/{}", i + 1, err_count; - "error" => message, - "code" => LogCode::SubgraphSyncingFailure - ); - } - - // Transact entity operations into the store and update the - // subgraph's block stream pointer - let _section = self.metrics.host.stopwatch.start_section("transact_block"); - let start = Instant::now(); - - // If a deterministic error has happened, make the PoI to be the only entity that'll be stored. - if has_errors && !is_non_fatal_errors_active { - let is_poi_entity = - |entity_mod: &EntityModification| entity_mod.key().entity_type.is_poi(); - mods.retain(is_poi_entity); - // Confidence check - assert!( - mods.len() == 1, - "There should be only one PoI EntityModification" - ); - } - - let BlockState { - deterministic_errors, - .. - } = block_state; - - let first_error = deterministic_errors.first().cloned(); - - // We consider a subgraph caught up when it's at most 1 blocks behind the chain head. - let is_caught_up = self.is_caught_up(&block_ptr).await.non_deterministic()?; - - self.inputs - .store - .transact_block_operations( - block_ptr, + let has_errors = self + .transact_block_state( + &logger, + block_ptr.clone(), + cursor.clone(), block_time, - cursor, - mods, - &self.metrics.host.stopwatch, + block_state, + proof_of_indexing, vec![], - deterministic_errors, vec![], - is_non_fatal_errors_active, - is_caught_up, + cancel_handle, ) - .await - .classify() - .detail("Failed to transact block operations")?; - - // For subgraphs with `nonFatalErrors` feature disabled, we consider - // any error as fatal. - // - // So we do an early return to make the subgraph stop processing blocks. - // - // In this scenario the only entity that is stored/transacted is the PoI, - // all of the others are discarded. - if has_errors && !is_non_fatal_errors_active { - // Only the first error is reported. - return Err(ProcessingError::Deterministic(Box::new( - first_error.unwrap(), - ))); - } - - let elapsed = start.elapsed().as_secs_f64(); - self.metrics - .subgraph - .block_ops_transaction_duration - .observe(elapsed); + .await?; // To prevent a buggy pending version from replacing a current version, if errors are // present the subgraph will be unassigned. From 6f558d6bd1d9805f27508c1b1854b92f6f3b451d Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 15:38:01 -0700 Subject: [PATCH 124/160] core: block_state does not need to be mutable in transact_block_state --- core/src/subgraph/runner.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index fef9b6c0c2e..df589fc8a1a 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -362,7 +362,7 @@ where block_ptr: BlockPtr, firehose_cursor: FirehoseCursor, block_time: BlockTime, - mut block_state: BlockState, + block_state: BlockState, proof_of_indexing: SharedProofOfIndexing, offchain_mods: Vec, processed_offchain_data_sources: Vec, @@ -374,6 +374,14 @@ where .features .contains(&SubgraphFeature::NonFatalErrors); + let BlockState { + deterministic_errors, + persisted_data_sources, + metrics: block_state_metrics, + mut entity_cache, + .. + } = block_state; + // Avoid writing to store if block stream has been canceled if cancel_handle.is_canceled() { return Err(ProcessingError::Canceled); @@ -384,7 +392,7 @@ where proof_of_indexing, block_time, &self.metrics.host.stopwatch, - &mut block_state.entity_cache, + &mut entity_cache, ) .await .non_deterministic()?; @@ -399,8 +407,7 @@ where modifications: mut mods, entity_lfu_cache: cache, evict_stats, - } = block_state - .entity_cache + } = entity_cache .as_modifications(block_ptr.number) .map_err(|e| ProcessingError::Unknown(e.into()))?; section.end(); @@ -425,8 +432,8 @@ where info!(&logger, "Applying {} entity operation(s)", mods.len()); } - let err_count = block_state.deterministic_errors.len(); - for (i, e) in block_state.deterministic_errors.iter().enumerate() { + let err_count = deterministic_errors.len(); + for (i, e) in deterministic_errors.iter().enumerate() { let message = format!("{:#}", e).replace('\n', "\t"); error!(&logger, "Subgraph error {}/{}", i + 1, err_count; "error" => message, @@ -451,13 +458,6 @@ where ); } - let BlockState { - deterministic_errors, - persisted_data_sources, - metrics: block_state_metrics, - .. - } = block_state; - let first_error = deterministic_errors.first().cloned(); let is_caught_up = self.is_caught_up(&block_ptr).await.non_deterministic()?; From c67f109a9d00e07b38704019ef316fe66d13eb0b Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 15:43:18 -0700 Subject: [PATCH 125/160] core: Make the handling of nonfatal errors a little less mindbending --- core/src/subgraph/inputs.rs | 8 ++++++++ core/src/subgraph/runner.rs | 16 ++++------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/core/src/subgraph/inputs.rs b/core/src/subgraph/inputs.rs index ca52073ab06..91bbdd131f4 100644 --- a/core/src/subgraph/inputs.rs +++ b/core/src/subgraph/inputs.rs @@ -75,4 +75,12 @@ impl IndexingInputs { instrument: *instrument, } } + + pub fn errors_are_non_fatal(&self) -> bool { + self.features.contains(&SubgraphFeature::NonFatalErrors) + } + + pub fn errors_are_fatal(&self) -> bool { + !self.features.contains(&SubgraphFeature::NonFatalErrors) + } } diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index df589fc8a1a..808d7318784 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -21,10 +21,7 @@ use graph::components::{ subgraph::{MappingError, PoICausalityRegion, ProofOfIndexing, SharedProofOfIndexing}, }; use graph::data::store::scalar::Bytes; -use graph::data::subgraph::{ - schema::{SubgraphError, SubgraphHealth}, - SubgraphFeature, -}; +use graph::data::subgraph::schema::{SubgraphError, SubgraphHealth}; use graph::data_source::{ offchain, CausalityRegion, DataSource, DataSourceCreationError, TriggerData, }; @@ -369,11 +366,6 @@ where cancel_handle: &CancelHandle, ) -> Result { let has_errors = block_state.has_errors(); - let is_non_fatal_errors_active = self - .inputs - .features - .contains(&SubgraphFeature::NonFatalErrors); - let BlockState { deterministic_errors, persisted_data_sources, @@ -447,7 +439,7 @@ where let start = Instant::now(); // If a deterministic error has happened, make the PoI to be the only entity that'll be stored. - if has_errors && !is_non_fatal_errors_active { + if has_errors && self.inputs.errors_are_fatal() { let is_poi_entity = |entity_mod: &EntityModification| entity_mod.key().entity_type.is_poi(); mods.retain(is_poi_entity); @@ -473,7 +465,7 @@ where persisted_data_sources, deterministic_errors, processed_offchain_data_sources, - is_non_fatal_errors_active, + self.inputs.errors_are_non_fatal(), is_caught_up, ) .await @@ -487,7 +479,7 @@ where // // In this scenario the only entity that is stored/transacted is the PoI, // all of the others are discarded. - if has_errors && !is_non_fatal_errors_active { + if has_errors && self.inputs.errors_are_fatal() { // Only the first error is reported. return Err(ProcessingError::Deterministic(Box::new( first_error.unwrap(), From 91bd6a7446546f6e37487d90bbeeb74e3e550a9a Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 16:32:43 -0700 Subject: [PATCH 126/160] core, graph: Specialize Cancelable a little to what we actually use This is so we can write down types involving cancelables --- core/src/subgraph/registrar.rs | 4 +-- core/src/subgraph/runner.rs | 2 +- graph/src/blockchain/block_stream.rs | 2 +- graph/src/ext/futures.rs | 51 ++++++++++------------------ 4 files changed, 21 insertions(+), 38 deletions(-) diff --git a/core/src/subgraph/registrar.rs b/core/src/subgraph/registrar.rs index 3a712b6daa9..325c4c4560a 100644 --- a/core/src/subgraph/registrar.rs +++ b/core/src/subgraph/registrar.rs @@ -120,9 +120,7 @@ where .compat() .map_err(SubgraphAssignmentProviderError::Unknown) .map_err(CancelableError::Error) - .cancelable(&assignment_event_stream_cancel_handle, || { - Err(CancelableError::Cancel) - }) + .cancelable(&assignment_event_stream_cancel_handle) .compat() .for_each(move |assignment_event| { assert_eq!(assignment_event.node_id(), &node_id); diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index 808d7318784..3fd068188f6 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -267,7 +267,7 @@ where ) .await? .map_err(CancelableError::from) - .cancelable(&block_stream_canceler, || Err(CancelableError::Cancel)); + .cancelable(&block_stream_canceler); // Keep the stream's cancel guard around to be able to shut it down when the subgraph // deployment is unassigned diff --git a/graph/src/blockchain/block_stream.rs b/graph/src/blockchain/block_stream.rs index b9f602d802c..99f2dabd1ac 100644 --- a/graph/src/blockchain/block_stream.rs +++ b/graph/src/blockchain/block_stream.rs @@ -1024,7 +1024,7 @@ mod test { let mut stream = BufferedBlockStream::spawn_from_stream(buffer_size, stream) .map_err(CancelableError::Error) - .cancelable(&guard, || Err(CancelableError::Cancel)); + .cancelable(&guard); let mut blocks = HashSet::::new(); let mut count = 0; diff --git a/graph/src/ext/futures.rs b/graph/src/ext/futures.rs index c25550a426f..7c5eb0fc96e 100644 --- a/graph/src/ext/futures.rs +++ b/graph/src/ext/futures.rs @@ -12,42 +12,45 @@ use std::time::Duration; /// /// Created by calling `cancelable` extension method. /// Can be canceled through the corresponding `CancelGuard`. -pub struct Cancelable { +pub struct Cancelable { inner: T, cancel_receiver: Fuse>, - on_cancel: C, } -impl Cancelable { +impl Cancelable { pub fn get_mut(&mut self) -> &mut T { &mut self.inner } } /// It's not viable to use `select` directly, so we do a custom implementation. -impl S::Item + Unpin> Stream for Cancelable { - type Item = S::Item; +impl> + Unpin, R, E: Display + Debug> Stream for Cancelable { + type Item = Result>; fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { // Error if the stream was canceled by dropping the sender. match self.cancel_receiver.poll_unpin(cx) { Poll::Ready(Ok(_)) => unreachable!(), - Poll::Ready(Err(_)) => Poll::Ready(Some((self.on_cancel)())), - Poll::Pending => Pin::new(&mut self.inner).poll_next(cx), + Poll::Ready(Err(_)) => Poll::Ready(Some(Err(CancelableError::Cancel))), + Poll::Pending => Pin::new(&mut self.inner) + .poll_next(cx) + .map_err(|x| CancelableError::Error(x)), } } } -impl F::Output + Unpin> Future for Cancelable { - type Output = F::Output; +impl> + Unpin, R, E: Display + Debug> Future for Cancelable { + type Output = Result>; fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { // Error if the future was canceled by dropping the sender. // `canceled` is fused so we may ignore `Ok`s. match self.cancel_receiver.poll_unpin(cx) { Poll::Ready(Ok(_)) => unreachable!(), - Poll::Ready(Err(_)) => Poll::Ready((self.on_cancel)()), - Poll::Pending => Pin::new(&mut self.inner).poll(cx), + Poll::Ready(Err(_)) => Poll::Ready(Err(CancelableError::Cancel)), + Poll::Pending => Pin::new(&mut self.inner) + .poll(cx) + .map_err(|x| CancelableError::Error(x)), } } } @@ -209,25 +212,16 @@ pub trait StreamExtension: Stream + Sized { /// When `cancel` is called on a `CancelGuard` or it is dropped, /// `Cancelable` receives an error. /// - fn cancelable Self::Item>( - self, - guard: &impl Canceler, - on_cancel: C, - ) -> Cancelable; + fn cancelable(self, guard: &impl Canceler) -> Cancelable; } impl StreamExtension for S { - fn cancelable S::Item>( - self, - guard: &impl Canceler, - on_cancel: C, - ) -> Cancelable { + fn cancelable(self, guard: &impl Canceler) -> Cancelable { let (canceler, cancel_receiver) = oneshot::channel(); guard.add_cancel_sender(canceler); Cancelable { inner: self, cancel_receiver: cancel_receiver.fuse(), - on_cancel, } } } @@ -237,27 +231,18 @@ pub trait FutureExtension: Future + Sized { /// `Cancelable` receives an error. /// /// `on_cancel` is called to make an error value upon cancelation. - fn cancelable Self::Output>( - self, - guard: &impl Canceler, - on_cancel: C, - ) -> Cancelable; + fn cancelable(self, guard: &impl Canceler) -> Cancelable; fn timeout(self, dur: Duration) -> tokio::time::Timeout; } impl FutureExtension for F { - fn cancelable F::Output>( - self, - guard: &impl Canceler, - on_cancel: C, - ) -> Cancelable { + fn cancelable(self, guard: &impl Canceler) -> Cancelable { let (canceler, cancel_receiver) = oneshot::channel(); guard.add_cancel_sender(canceler); Cancelable { inner: self, cancel_receiver: cancel_receiver.fuse(), - on_cancel, } } From c91a09efcc0b87102bec102aa4d14af4b1f7cf09 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 16:54:37 -0700 Subject: [PATCH 127/160] core: Reduce noise by storing CancelHandle in Runner --- core/src/subgraph/registrar.rs | 1 - core/src/subgraph/runner.rs | 115 ++++++++++++++++----------------- 2 files changed, 54 insertions(+), 62 deletions(-) diff --git a/core/src/subgraph/registrar.rs b/core/src/subgraph/registrar.rs index 325c4c4560a..6f7ae17425f 100644 --- a/core/src/subgraph/registrar.rs +++ b/core/src/subgraph/registrar.rs @@ -119,7 +119,6 @@ where assignment_event_stream .compat() .map_err(SubgraphAssignmentProviderError::Unknown) - .map_err(CancelableError::Error) .cancelable(&assignment_event_stream_cancel_handle) .compat() .for_each(move |assignment_event| { diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index 3fd068188f6..e69f23ee67d 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -8,7 +8,7 @@ use crate::subgraph::stream::new_block_stream; use anyhow::Context as _; use async_trait::async_trait; use graph::blockchain::block_stream::{ - BlockStreamError, BlockStreamEvent, BlockWithTriggers, FirehoseCursor, + BlockStream, BlockStreamError, BlockStreamEvent, BlockWithTriggers, FirehoseCursor, }; use graph::blockchain::{ Block, BlockTime, Blockchain, DataSource as _, SubgraphFilter, Trigger, TriggerFilter as _, @@ -26,8 +26,8 @@ use graph::data_source::{ offchain, CausalityRegion, DataSource, DataSourceCreationError, TriggerData, }; use graph::env::EnvVars; +use graph::ext::futures::Cancelable; use graph::futures03::stream::StreamExt; -use graph::futures03::TryStreamExt; use graph::prelude::{ anyhow, hex, retry, thiserror, BlockNumber, BlockPtr, BlockState, CancelGuard, CancelHandle, CancelToken as _, CancelableError, CheapClone as _, EntityCache, EntityModification, Error, @@ -60,6 +60,7 @@ where inputs: Arc>, logger: Logger, pub metrics: RunnerMetrics, + cancel_handle: Option, } #[derive(Debug, thiserror::Error)] @@ -99,6 +100,7 @@ where }, logger, metrics, + cancel_handle: None, } } @@ -206,6 +208,39 @@ where self.build_filter() } + async fn start_block_stream(&mut self) -> Result>>, Error> { + let block_stream_canceler = CancelGuard::new(); + let block_stream_cancel_handle = block_stream_canceler.handle(); + // TriggerFilter needs to be rebuilt eveytime the blockstream is restarted + self.ctx.filter = Some(self.build_filter()); + + let block_stream = new_block_stream( + &self.inputs, + self.ctx.filter.clone().unwrap(), // Safe to unwrap as we just called `build_filter` in the previous line + &self.metrics.subgraph, + ) + .await? + .cancelable(&block_stream_canceler); + + self.cancel_handle = Some(block_stream_cancel_handle); + + // Keep the stream's cancel guard around to be able to shut it down when the subgraph + // deployment is unassigned + self.ctx + .instances + .insert(self.inputs.deployment.id, block_stream_canceler); + + Ok(block_stream) + } + + fn is_canceled(&self) -> bool { + if let Some(ref cancel_handle) = self.cancel_handle { + cancel_handle.is_canceled() + } else { + false + } + } + pub async fn run(self) -> Result<(), SubgraphRunnerError> { self.run_inner(false).await.map(|_| ()) } @@ -255,27 +290,9 @@ where loop { debug!(self.logger, "Starting or restarting subgraph"); - let block_stream_canceler = CancelGuard::new(); - let block_stream_cancel_handle = block_stream_canceler.handle(); - // TriggerFilter needs to be rebuilt eveytime the blockstream is restarted - self.ctx.filter = Some(self.build_filter()); + let mut block_stream = self.start_block_stream().await?; - let mut block_stream = new_block_stream( - &self.inputs, - self.ctx.filter.clone().unwrap(), // Safe to unwrap as we just called `build_filter` in the previous line - &self.metrics.subgraph, - ) - .await? - .map_err(CancelableError::from) - .cancelable(&block_stream_canceler); - - // Keep the stream's cancel guard around to be able to shut it down when the subgraph - // deployment is unassigned - self.ctx - .instances - .insert(self.inputs.deployment.id, block_stream_canceler); - - debug!(self.logger, "Starting block stream"); + debug!(self.logger, "Started block stream"); self.metrics.subgraph.deployment_status.running(); @@ -291,21 +308,18 @@ where // This will require some code refactor in how the BlockStream is created let block_start = Instant::now(); - let action = self - .handle_stream_event(event, &block_stream_cancel_handle) - .await - .map(|res| { - self.metrics - .subgraph - .observe_block_processed(block_start.elapsed(), res.block_finished()); - res - })?; + let action = self.handle_stream_event(event).await.map(|res| { + self.metrics + .subgraph + .observe_block_processed(block_start.elapsed(), res.block_finished()); + res + })?; self.update_deployment_synced_metric(); // It is possible that the subgraph was unassigned, but the runner was in // a retry delay state and did not observe the cancel signal. - if block_stream_cancel_handle.is_canceled() { + if self.is_canceled() { // It is also possible that the runner was in a retry delay state while // the subgraph was reassigned and a new runner was started. if self.ctx.instances.contains(&self.inputs.deployment.id) { @@ -363,7 +377,6 @@ where proof_of_indexing: SharedProofOfIndexing, offchain_mods: Vec, processed_offchain_data_sources: Vec, - cancel_handle: &CancelHandle, ) -> Result { let has_errors = block_state.has_errors(); let BlockState { @@ -375,7 +388,7 @@ where } = block_state; // Avoid writing to store if block stream has been canceled - if cancel_handle.is_canceled() { + if self.is_canceled() { return Err(ProcessingError::Canceled); } @@ -503,7 +516,6 @@ where /// whether new dynamic data sources have been added to the subgraph. async fn process_block( &mut self, - block_stream_cancel_handle: &CancelHandle, block: BlockWithTriggers, firehose_cursor: FirehoseCursor, ) -> Result { @@ -793,7 +805,6 @@ where proof_of_indexing, offchain_mods, processed_offchain_data_sources, - block_stream_cancel_handle, ) .await?; @@ -1114,7 +1125,6 @@ where async fn handle_stream_event( &mut self, event: Option, CancelableError>>, - cancel_handle: &CancelHandle, ) -> Result { let action = match event { Some(Ok(BlockStreamEvent::ProcessWasmBlock( @@ -1130,14 +1140,7 @@ where .stopwatch .start_section(PROCESS_WASM_BLOCK_SECTION_NAME); let res = self - .handle_process_wasm_block( - block_ptr.clone(), - block_time, - data, - handler, - cursor, - cancel_handle, - ) + .handle_process_wasm_block(block_ptr.clone(), block_time, data, handler, cursor) .await; let start = Instant::now(); self.handle_action(start, block_ptr, res).await? @@ -1148,8 +1151,7 @@ where .stream .stopwatch .start_section(PROCESS_BLOCK_SECTION_NAME); - self.handle_process_block(block, cursor, cancel_handle) - .await? + self.handle_process_block(block, cursor).await? } Some(Ok(BlockStreamEvent::Revert(revert_to_ptr, cursor))) => { let _section = self @@ -1161,7 +1163,7 @@ where } // Log and drop the errors from the block_stream // The block stream will continue attempting to produce blocks - Some(Err(e)) => self.handle_err(e, cancel_handle).await?, + Some(Err(e)) => self.handle_err(e).await?, // If the block stream ends, that means that there is no more indexing to do. // Typically block streams produce indefinitely, but tests are an example of finite block streams. None => Action::Stop, @@ -1304,24 +1306,19 @@ trait StreamEventHandler { block_data: Box<[u8]>, handler: String, cursor: FirehoseCursor, - cancel_handle: &CancelHandle, ) -> Result; async fn handle_process_block( &mut self, block: BlockWithTriggers, cursor: FirehoseCursor, - cancel_handle: &CancelHandle, ) -> Result; async fn handle_revert( &mut self, revert_to_ptr: BlockPtr, cursor: FirehoseCursor, ) -> Result; - async fn handle_err( - &mut self, - err: CancelableError, - cancel_handle: &CancelHandle, - ) -> Result; + async fn handle_err(&mut self, err: CancelableError) + -> Result; fn needs_restart(&self, revert_to_ptr: BlockPtr, subgraph_ptr: BlockPtr) -> bool; } @@ -1338,7 +1335,6 @@ where block_data: Box<[u8]>, handler: String, cursor: FirehoseCursor, - cancel_handle: &CancelHandle, ) -> Result { let logger = self.logger.new(o!( "block_number" => format!("{:?}", block_ptr.number), @@ -1403,7 +1399,6 @@ where proof_of_indexing, vec![], vec![], - cancel_handle, ) .await?; @@ -1427,7 +1422,6 @@ where &mut self, block: BlockWithTriggers, cursor: FirehoseCursor, - cancel_handle: &CancelHandle, ) -> Result { let block_ptr = block.ptr(); self.metrics @@ -1460,7 +1454,7 @@ where let start = Instant::now(); - let res = self.process_block(cancel_handle, block, cursor).await; + let res = self.process_block(block, cursor).await; self.handle_action(start, block_ptr, res).await } @@ -1519,9 +1513,8 @@ where async fn handle_err( &mut self, err: CancelableError, - cancel_handle: &CancelHandle, ) -> Result { - if cancel_handle.is_canceled() { + if self.is_canceled() { debug!(&self.logger, "Subgraph block stream shut down cleanly"); return Ok(Action::Stop); } From a8cacba38733f58680ac43c72670d8724dac7743 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 17:28:25 -0700 Subject: [PATCH 128/160] core, graph: Factor out the decision to cancel a buggy subgraph --- core/src/subgraph/runner.rs | 131 +++++++++++----------- graph/src/components/subgraph/instance.rs | 4 - 2 files changed, 64 insertions(+), 71 deletions(-) diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index e69f23ee67d..b56d2289b3f 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -36,6 +36,7 @@ use graph::prelude::{ }; use graph::schema::EntityKey; use graph::slog::{debug, error, info, o, trace, warn, Logger}; +use graph::util::lfu_cache::EvictStats; use graph::util::{backoff::ExponentialBackoff, lfu_cache::LfuCache}; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -377,8 +378,19 @@ where proof_of_indexing: SharedProofOfIndexing, offchain_mods: Vec, processed_offchain_data_sources: Vec, - ) -> Result { - let has_errors = block_state.has_errors(); + ) -> Result<(), ProcessingError> { + fn log_evict_stats(logger: &Logger, evict_stats: &EvictStats) { + trace!(logger, "Entity cache statistics"; + "weight" => evict_stats.new_weight, + "evicted_weight" => evict_stats.evicted_weight, + "count" => evict_stats.new_count, + "evicted_count" => evict_stats.evicted_count, + "stale_update" => evict_stats.stale_update, + "hit_rate" => format!("{:.0}%", evict_stats.hit_rate_pct()), + "accesses" => evict_stats.accesses, + "evict_time_ms" => evict_stats.evict_time.as_millis()); + } + let BlockState { deterministic_errors, persisted_data_sources, @@ -386,6 +398,8 @@ where mut entity_cache, .. } = block_state; + let first_error = deterministic_errors.first().cloned(); + let has_errors = first_error.is_some(); // Avoid writing to store if block stream has been canceled if self.is_canceled() { @@ -412,20 +426,10 @@ where modifications: mut mods, entity_lfu_cache: cache, evict_stats, - } = entity_cache - .as_modifications(block_ptr.number) - .map_err(|e| ProcessingError::Unknown(e.into()))?; + } = entity_cache.as_modifications(block_ptr.number).classify()?; section.end(); - trace!(self.logger, "Entity cache statistics"; - "weight" => evict_stats.new_weight, - "evicted_weight" => evict_stats.evicted_weight, - "count" => evict_stats.new_count, - "evicted_count" => evict_stats.evicted_count, - "stale_update" => evict_stats.stale_update, - "hit_rate" => format!("{:.0}%", evict_stats.hit_rate_pct()), - "accesses" => evict_stats.accesses, - "evict_time_ms" => evict_stats.evict_time.as_millis()); + log_evict_stats(&self.logger, &evict_stats); mods.extend(offchain_mods); @@ -463,8 +467,6 @@ where ); } - let first_error = deterministic_errors.first().cloned(); - let is_caught_up = self.is_caught_up(&block_ptr).await.non_deterministic()?; self.inputs @@ -509,7 +511,30 @@ where .flush_metrics_to_store(&logger, block_ptr, self.inputs.deployment.id) .non_deterministic()?; - Ok(has_errors) + if has_errors { + self.maybe_cancel()?; + } + + Ok(()) + } + + /// Cancel the subgraph if `disable_fail_fast` is not set and it is not + /// synced + fn maybe_cancel(&self) -> Result<(), ProcessingError> { + // To prevent a buggy pending version from replacing a current version, if errors are + // present the subgraph will be unassigned. + let store = &self.inputs.store; + if !ENV_VARS.disable_fail_fast && !store.is_deployment_synced() { + store + .unassign_subgraph() + .map_err(|e| ProcessingError::Unknown(e.into()))?; + + // Use `Canceled` to avoiding setting the subgraph health to failed, an error was + // just transacted so it will be already be set to unhealthy. + Err(ProcessingError::Canceled.into()) + } else { + Ok(()) + } } /// Processes a block and returns the updated context and a boolean flag indicating @@ -795,31 +820,17 @@ where .persisted_data_sources .extend(persisted_off_chain_data_sources); - let has_errors = self - .transact_block_state( - &logger, - block_ptr.clone(), - firehose_cursor.clone(), - block.timestamp(), - block_state, - proof_of_indexing, - offchain_mods, - processed_offchain_data_sources, - ) - .await?; - - // To prevent a buggy pending version from replacing a current version, if errors are - // present the subgraph will be unassigned. - let store = &self.inputs.store; - if has_errors && !ENV_VARS.disable_fail_fast && !store.is_deployment_synced() { - store - .unassign_subgraph() - .map_err(|e| ProcessingError::Unknown(e.into()))?; - - // Use `Canceled` to avoiding setting the subgraph health to failed, an error was - // just transacted so it will be already be set to unhealthy. - return Err(ProcessingError::Canceled); - } + self.transact_block_state( + &logger, + block_ptr.clone(), + firehose_cursor.clone(), + block.timestamp(), + block_state, + proof_of_indexing, + offchain_mods, + processed_offchain_data_sources, + ) + .await?; match needs_restart { true => Ok(Action::Restart), @@ -1389,31 +1400,17 @@ where } }; - let has_errors = self - .transact_block_state( - &logger, - block_ptr.clone(), - cursor.clone(), - block_time, - block_state, - proof_of_indexing, - vec![], - vec![], - ) - .await?; - - // To prevent a buggy pending version from replacing a current version, if errors are - // present the subgraph will be unassigned. - let store = &self.inputs.store; - if has_errors && !ENV_VARS.disable_fail_fast && !store.is_deployment_synced() { - store - .unassign_subgraph() - .map_err(|e| ProcessingError::Unknown(e.into()))?; - - // Use `Canceled` to avoiding setting the subgraph health to failed, an error was - // just transacted so it will be already be set to unhealthy. - return Err(ProcessingError::Canceled.into()); - }; + self.transact_block_state( + &logger, + block_ptr.clone(), + cursor.clone(), + block_time, + block_state, + proof_of_indexing, + vec![], + vec![], + ) + .await?; Ok(Action::Continue) } diff --git a/graph/src/components/subgraph/instance.rs b/graph/src/components/subgraph/instance.rs index 11b473a878d..c6d3f0c7e85 100644 --- a/graph/src/components/subgraph/instance.rs +++ b/graph/src/components/subgraph/instance.rs @@ -131,10 +131,6 @@ impl BlockState { write_capacity_remaining.saturating_sub(other.write_capacity_remaining); } - pub fn has_errors(&self) -> bool { - !self.deterministic_errors.is_empty() - } - pub fn has_created_data_sources(&self) -> bool { assert!(!self.in_handler); !self.created_data_sources.is_empty() From c3194635c7ef07832671f2ca234ce5200cc43cc3 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 17:43:38 -0700 Subject: [PATCH 129/160] core: Factor refetching blocks into a method --- core/src/subgraph/runner.rs | 66 ++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index b56d2289b3f..c6347aba664 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -689,29 +689,18 @@ where vec![], )); - let block: Arc = if self.inputs.chain.is_refetch_block_required() { - let cur = firehose_cursor.clone(); - let log = logger.cheap_clone(); - let chain = self.inputs.chain.cheap_clone(); - Arc::new( - retry( - "refetch firehose block after dynamic datasource was added", - &logger, - ) - .limit(5) - .no_timeout() - .run(move || { - let cur = cur.clone(); - let log = log.cheap_clone(); - let chain = chain.cheap_clone(); - async move { chain.refetch_firehose_block(&log, cur).await } - }) - .await - .non_deterministic()?, - ) - } else { - block.cheap_clone() - }; + // TODO: We have to pass a reference to `block` to + // `refetch_block`, otherwise the call to + // handle_offchain_triggers below gets an error that `block` + // has moved. That is extremely fishy since it means that + // `handle_offchain_triggers` uses the non-refetched block + // + // It's also not clear why refetching needs to happen inside + // the loop; will firehose really return something diffrent + // each time even though the cursor doesn't change? + let block = self + .refetch_block(&logger, &block, &firehose_cursor) + .await?; // Reprocess the triggers from this block that match the new data sources let block_with_triggers = self @@ -838,6 +827,37 @@ where } } + /// Refetch the block if it that is needed. Otherwise return the block as is. + async fn refetch_block( + &mut self, + logger: &Logger, + block: &Arc, + firehose_cursor: &FirehoseCursor, + ) -> Result, ProcessingError> { + if !self.inputs.chain.is_refetch_block_required() { + return Ok(block.cheap_clone()); + } + + let cur = firehose_cursor.clone(); + let log = logger.cheap_clone(); + let chain = self.inputs.chain.cheap_clone(); + let block = retry( + "refetch firehose block after dynamic datasource was added", + logger, + ) + .limit(5) + .no_timeout() + .run(move || { + let cur = cur.clone(); + let log = log.cheap_clone(); + let chain = chain.cheap_clone(); + async move { chain.refetch_firehose_block(&log, cur).await } + }) + .await + .non_deterministic()?; + Ok(Arc::new(block)) + } + async fn process_wasm_block( &mut self, proof_of_indexing: &SharedProofOfIndexing, From f4232ec640da630f2830f9efc83898a0b45cc69b Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 17:48:29 -0700 Subject: [PATCH 130/160] core: A little more noise reduction --- core/src/subgraph/runner.rs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index c6347aba664..925d61101af 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -544,6 +544,14 @@ where block: BlockWithTriggers, firehose_cursor: FirehoseCursor, ) -> Result { + fn log_triggers_found(logger: &Logger, triggers: &[Trigger]) { + if triggers.len() == 1 { + info!(logger, "1 trigger found in this block"); + } else if triggers.len() > 1 { + info!(logger, "{} triggers found in this block", triggers.len()); + } + } + let triggers = block.trigger_data; let block = Arc::new(block.block); let block_ptr = block.ptr(); @@ -711,19 +719,7 @@ where .non_deterministic()?; let triggers = block_with_triggers.trigger_data; - - if triggers.len() == 1 { - info!( - &logger, - "1 trigger found in this block for the new data sources" - ); - } else if triggers.len() > 1 { - info!( - &logger, - "{} triggers found in this block for the new data sources", - triggers.len() - ); - } + log_triggers_found(&logger, &triggers); // Add entity operations for the new data sources to the block state // and add runtimes for the data sources to the subgraph instance. From 2f365c3d5df1fad750421fde6ba2446ab5f43d84 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 18:31:44 -0700 Subject: [PATCH 131/160] core: Reduce some noise around match_and_decode_many --- core/src/subgraph/runner.rs | 57 +++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index 925d61101af..fcd8fa30fbb 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -16,6 +16,7 @@ use graph::blockchain::{ }; use graph::components::store::{EmptyStore, GetScope, ReadStore, StoredDynamicDataSource}; use graph::components::subgraph::InstanceDSTemplate; +use graph::components::trigger_processor::RunnableTriggers; use graph::components::{ store::ModificationsAndCache, subgraph::{MappingError, PoICausalityRegion, ProofOfIndexing, SharedProofOfIndexing}, @@ -537,6 +538,33 @@ where } } + async fn match_and_decode_many<'a, F>( + &'a self, + logger: &Logger, + block: &Arc, + triggers: Vec>, + hosts_filter: F, + ) -> Result>, MappingError> + where + F: Fn(&TriggerData) -> Box + Send + 'a>, + { + let triggers = triggers.into_iter().map(|t| match t { + Trigger::Chain(t) => TriggerData::Onchain(t), + Trigger::Subgraph(t) => TriggerData::Subgraph(t), + }); + + self.ctx + .decoder + .match_and_decode_many( + &logger, + &block, + triggers, + hosts_filter, + &self.metrics.subgraph, + ) + .await + } + /// Processes a block and returns the updated context and a boolean flag indicating /// whether new dynamic data sources have been added to the subgraph. async fn process_block( @@ -584,18 +612,7 @@ where // Match and decode all triggers in the block let hosts_filter = |trigger: &TriggerData| self.ctx.instance.hosts_for_trigger(trigger); let match_res = self - .ctx - .decoder - .match_and_decode_many( - &logger, - &block, - triggers.into_iter().map(|t| match t { - Trigger::Chain(t) => TriggerData::Onchain(t), - Trigger::Subgraph(t) => TriggerData::Subgraph(t), - }), - hosts_filter, - &self.metrics.subgraph, - ) + .match_and_decode_many(&logger, &block, triggers, hosts_filter) .await; // Process events one after the other, passing in entity operations @@ -727,19 +744,11 @@ where // Process the triggers in each host in the same order the // corresponding data sources have been created. + let hosts_filter = |_: &'_ TriggerData| -> Box + Send> { + Box::new(runtime_hosts.iter().map(Arc::as_ref)) + }; let match_res: Result, _> = self - .ctx - .decoder - .match_and_decode_many( - &logger, - &block, - triggers.into_iter().map(|t| match t { - Trigger::Chain(t) => TriggerData::Onchain(t), - Trigger::Subgraph(_) => unreachable!(), // TODO(krishna): Re-evaulate this - }), - |_| Box::new(runtime_hosts.iter().map(Arc::as_ref)), - &self.metrics.subgraph, - ) + .match_and_decode_many(&logger, &block, triggers, hosts_filter) .await; let mut res = Ok(block_state); From a10ac6dac7f115a1794663ee183d578cdc9d5b47 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 15 Apr 2025 14:55:19 -0700 Subject: [PATCH 132/160] graph, store: 'graphman info': Do not get confused by copies Because the code in primary::queries::fill_assignments used the deployment hash to reference a deployment, it would get confused by copies since for those several deployments have the same hash --- graph/src/components/store/mod.rs | 33 ++++++++++++++++++++++++++++++- store/postgres/src/primary.rs | 15 +++++++------- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/graph/src/components/store/mod.rs b/graph/src/components/store/mod.rs index efe16c90ee6..a4ed96b0ba1 100644 --- a/graph/src/components/store/mod.rs +++ b/graph/src/components/store/mod.rs @@ -3,6 +3,11 @@ mod err; mod traits; pub mod write; +use diesel::deserialize::FromSql; +use diesel::pg::Pg; +use diesel::serialize::{Output, ToSql}; +use diesel::sql_types::Integer; +use diesel_derives::{AsExpression, FromSqlRow}; pub use entity_cache::{EntityCache, EntityLfuCache, GetScope, ModificationsAndCache}; use slog::Logger; @@ -691,7 +696,20 @@ pub struct StoredDynamicDataSource { /// identifier only has meaning in the context of a specific instance of /// graph-node. Only store code should ever construct or consume it; all /// other code passes it around as an opaque token. -#[derive(Copy, Clone, CheapClone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[derive( + Copy, + Clone, + CheapClone, + Debug, + Serialize, + Deserialize, + PartialEq, + Eq, + Hash, + AsExpression, + FromSqlRow, +)] +#[diesel(sql_type = Integer)] pub struct DeploymentId(pub i32); impl Display for DeploymentId { @@ -706,6 +724,19 @@ impl DeploymentId { } } +impl FromSql for DeploymentId { + fn from_sql(bytes: diesel::pg::PgValue) -> diesel::deserialize::Result { + let id = >::from_sql(bytes)?; + Ok(DeploymentId(id)) + } +} + +impl ToSql for DeploymentId { + fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, Pg>) -> diesel::serialize::Result { + >::to_sql(&self.0, out) + } +} + /// A unique identifier for a deployment that specifies both its external /// identifier (`hash`) and its unique internal identifier (`id`) which /// ensures we are talking about a unique location for the deployment's data diff --git a/store/postgres/src/primary.rs b/store/postgres/src/primary.rs index 8d49153d214..6b22b8c8e35 100644 --- a/store/postgres/src/primary.rs +++ b/store/postgres/src/primary.rs @@ -450,6 +450,7 @@ mod queries { use diesel::sql_types::Text; use graph::prelude::NodeId; use graph::{ + components::store::DeploymentId as GraphDeploymentId, data::subgraph::status, internal_error, prelude::{DeploymentHash, StoreError, SubgraphName}, @@ -646,18 +647,18 @@ mod queries { conn: &mut PgConnection, infos: &mut [status::Info], ) -> Result<(), StoreError> { - let ids: Vec<_> = infos.iter().map(|info| &info.subgraph).collect(); + let ids: Vec<_> = infos.iter().map(|info| &info.id).collect(); let nodes: HashMap<_, _> = a::table .inner_join(ds::table.on(ds::id.eq(a::id))) - .filter(ds::subgraph.eq_any(ids)) - .select((ds::subgraph, a::node_id, a::paused_at.is_not_null())) - .load::<(String, String, bool)>(conn)? + .filter(ds::id.eq_any(ids)) + .select((ds::id, a::node_id, a::paused_at.is_not_null())) + .load::<(GraphDeploymentId, String, bool)>(conn)? .into_iter() - .map(|(subgraph, node, paused)| (subgraph, (node, paused))) + .map(|(id, node, paused)| (id, (node, paused))) .collect(); for info in infos { - info.node = nodes.get(&info.subgraph).map(|(node, _)| node.clone()); - info.paused = nodes.get(&info.subgraph).map(|(_, paused)| *paused); + info.node = nodes.get(&info.id).map(|(node, _)| node.clone()); + info.paused = nodes.get(&info.id).map(|(_, paused)| *paused); } Ok(()) } From 2985a10084a03398c2460a1ce5f5b3ad7fb21a31 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 8 Apr 2025 15:48:17 -0700 Subject: [PATCH 133/160] graph: Introduce a StoreResult --- graph/src/components/store/err.rs | 2 ++ graph/src/components/store/mod.rs | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/graph/src/components/store/err.rs b/graph/src/components/store/err.rs index 264c1b80df2..b8743658030 100644 --- a/graph/src/components/store/err.rs +++ b/graph/src/components/store/err.rs @@ -7,6 +7,8 @@ use diesel::result::Error as DieselError; use thiserror::Error; use tokio::task::JoinError; +pub type StoreResult = Result; + #[derive(Error, Debug)] pub enum StoreError { #[error("store error: {0:#}")] diff --git a/graph/src/components/store/mod.rs b/graph/src/components/store/mod.rs index a4ed96b0ba1..88cc49d024a 100644 --- a/graph/src/components/store/mod.rs +++ b/graph/src/components/store/mod.rs @@ -12,7 +12,7 @@ pub use entity_cache::{EntityCache, EntityLfuCache, GetScope, ModificationsAndCa use slog::Logger; pub use super::subgraph::Entity; -pub use err::StoreError; +pub use err::{StoreError, StoreResult}; use itertools::Itertools; use strum_macros::Display; pub use traits::*; From ddf9042babbd7d5cdffff6878e320e6302c2a235 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 16 Apr 2025 10:54:04 -0700 Subject: [PATCH 134/160] node: Extract some formatting helpers into a module --- node/src/manager/commands/copy.rs | 28 ++--------- node/src/manager/commands/prune.rs | 7 +-- node/src/manager/commands/stats.rs | 16 +----- node/src/manager/fmt.rs | 78 ++++++++++++++++++++++++++++++ node/src/manager/mod.rs | 1 + 5 files changed, 87 insertions(+), 43 deletions(-) create mode 100644 node/src/manager/fmt.rs diff --git a/node/src/manager/commands/copy.rs b/node/src/manager/commands/copy.rs index a7857476c58..c09630ae261 100644 --- a/node/src/manager/commands/copy.rs +++ b/node/src/manager/commands/copy.rs @@ -1,5 +1,5 @@ use diesel::{ExpressionMethods, JoinOnDsl, OptionalExtension, QueryDsl, RunQueryDsl}; -use std::{collections::HashMap, sync::Arc, time::SystemTime}; +use std::{collections::HashMap, sync::Arc}; use graph::{ components::store::{BlockStore as _, DeploymentId, DeploymentLocator}, @@ -19,8 +19,8 @@ use graph_store_postgres::{ }; use graph_store_postgres::{ConnectionPool, Shard, Store, SubgraphStore}; -use crate::manager::deployment::DeploymentSearch; use crate::manager::display::List; +use crate::manager::{deployment::DeploymentSearch, fmt}; type UtcDateTime = DateTime; @@ -260,26 +260,6 @@ pub fn status(pools: HashMap, dst: &DeploymentSearch) -> use catalog::active_copies as ac; use catalog::deployment_schemas as ds; - fn duration(start: &UtcDateTime, end: &Option) -> String { - let start = *start; - let end = *end; - - let end = end.unwrap_or(UtcDateTime::from(SystemTime::now())); - let duration = end - start; - - human_duration(duration) - } - - fn human_duration(duration: Duration) -> String { - if duration.num_seconds() < 5 { - format!("{}ms", duration.num_milliseconds()) - } else if duration.num_minutes() < 5 { - format!("{}s", duration.num_seconds()) - } else { - format!("{}m", duration.num_minutes()) - } - } - let primary = pools .get(&*PRIMARY_SHARD) .ok_or_else(|| anyhow!("can not find deployment with id {}", dst))?; @@ -336,7 +316,7 @@ pub fn status(pools: HashMap, dst: &DeploymentSearch) -> state.dst.to_string(), state.target_block_number.to_string(), on_sync.to_str().to_string(), - duration(&state.started_at, &state.finished_at), + fmt::duration(&state.started_at, &state.finished_at), progress, ]; match (cancelled_at, state.cancelled_at) { @@ -378,7 +358,7 @@ pub fn status(pools: HashMap, dst: &DeploymentSearch) -> table.next_vid, table.target_vid, table.batch_size, - human_duration(Duration::milliseconds(table.duration_ms)), + fmt::human_duration(Duration::milliseconds(table.duration_ms)), ); } diff --git a/node/src/manager/commands/prune.rs b/node/src/manager/commands/prune.rs index dbf114453e8..4ca058525e2 100644 --- a/node/src/manager/commands/prune.rs +++ b/node/src/manager/commands/prune.rs @@ -16,10 +16,7 @@ use graph::{ }; use graph_store_postgres::{ConnectionPool, Store}; -use crate::manager::{ - commands::stats::{abbreviate_table_name, show_stats}, - deployment::DeploymentSearch, -}; +use crate::manager::{commands::stats::show_stats, deployment::DeploymentSearch, fmt}; struct Progress { start: Instant, @@ -66,7 +63,7 @@ fn print_batch( }; print!( "\r{:<30} | {:>10} | {:>9}s {phase}", - abbreviate_table_name(table, 30), + fmt::abbreviate(table, 30), total_rows, elapsed.as_secs() ); diff --git a/node/src/manager/commands/stats.rs b/node/src/manager/commands/stats.rs index bb3a928b1ad..abb02fdb77c 100644 --- a/node/src/manager/commands/stats.rs +++ b/node/src/manager/commands/stats.rs @@ -3,6 +3,7 @@ use std::collections::HashSet; use std::sync::Arc; use crate::manager::deployment::DeploymentSearch; +use crate::manager::fmt; use diesel::r2d2::ConnectionManager; use diesel::r2d2::PooledConnection; use diesel::PgConnection; @@ -51,19 +52,6 @@ pub async fn account_like( Ok(()) } -pub fn abbreviate_table_name(table: &str, size: usize) -> String { - if table.len() > size { - let fragment = size / 2 - 2; - let last = table.len() - fragment; - let mut table = table.to_string(); - table.replace_range(fragment..last, ".."); - let table = table.trim().to_string(); - table - } else { - table.to_string() - } -} - pub fn show_stats( stats: &[VersionStats], account_like: HashSet, @@ -83,7 +71,7 @@ pub fn show_stats( fn print_stats(s: &VersionStats, account_like: bool) { println!( "{:<26} {:3} | {:>10} | {:>10} | {:>5.1}%", - abbreviate_table_name(&s.tablename, 26), + fmt::abbreviate(&s.tablename, 26), if account_like { "(a)" } else { " " }, s.entities, s.versions, diff --git a/node/src/manager/fmt.rs b/node/src/manager/fmt.rs new file mode 100644 index 00000000000..549d173ca1a --- /dev/null +++ b/node/src/manager/fmt.rs @@ -0,0 +1,78 @@ +use std::time::SystemTime; + +use graph::prelude::chrono::{DateTime, Duration, Local, Utc}; + +pub const NULL: &str = "ø"; +const CHECK: &str = "✓"; + +pub fn null() -> String { + NULL.to_string() +} + +pub fn check() -> String { + CHECK.to_string() +} + +pub trait MapOrNull { + fn map_or_null(&self, f: F) -> String + where + F: FnOnce(&T) -> String; +} + +impl MapOrNull for Option { + fn map_or_null(&self, f: F) -> String + where + F: FnOnce(&T) -> String, + { + self.as_ref() + .map(|value| f(value)) + .unwrap_or_else(|| NULL.to_string()) + } +} + +/// Return the duration from `start` to `end` formatted using +/// `human_duration`. Use now if `end` is `None` +pub fn duration(start: &DateTime, end: &Option>) -> String { + let start = *start; + let end = *end; + + let end = end.unwrap_or(DateTime::::from(SystemTime::now())); + let duration = end - start; + + human_duration(duration) +} + +/// Format a duration using ms/s/m as units depending on how long the +/// duration was +pub fn human_duration(duration: Duration) -> String { + if duration.num_seconds() < 5 { + format!("{}ms", duration.num_milliseconds()) + } else if duration.num_minutes() < 5 { + format!("{}s", duration.num_seconds()) + } else { + format!("{}m", duration.num_minutes()) + } +} + +/// Abbreviate a long name to fit into `size` characters. The abbreviation +/// is done by replacing the middle of the name with `..`. For example, if +/// `name` is `foo_bar_baz` and `size` is 10, the result will be +/// `foo.._baz`. If the name is shorter than `size`, it is returned +/// unchanged. +pub fn abbreviate(name: &str, size: usize) -> String { + if name.len() > size { + let fragment = size / 2 - 2; + let last = name.len() - fragment; + let mut name = name.to_string(); + name.replace_range(fragment..last, ".."); + let table = name.trim().to_string(); + table + } else { + name.to_string() + } +} + +pub fn date_time(date: &DateTime) -> String { + let date = DateTime::::from(*date); + date.format("%Y-%m-%d %H:%M:%S%Z").to_string() +} diff --git a/node/src/manager/mod.rs b/node/src/manager/mod.rs index 6a332653ca8..d95e5fbadc1 100644 --- a/node/src/manager/mod.rs +++ b/node/src/manager/mod.rs @@ -8,6 +8,7 @@ pub mod color; pub mod commands; pub mod deployment; mod display; +pub mod fmt; pub mod prompt; /// A dummy subscription manager that always panics From 4a15ec43a4b037b60e0841d72cb00259cc8ab138 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 8 Apr 2025 15:48:35 -0700 Subject: [PATCH 135/160] store: Track pruning progress in the database Also adds graphman commands to view the progress --- graph/src/components/store/mod.rs | 4 + node/src/bin/manager.rs | 154 ++++-- node/src/manager/commands/prune.rs | 242 ++++++++- .../down.sql | 2 + .../2025-04-08-224710_add_prune_state/up.sql | 60 +++ store/postgres/src/deployment_store.rs | 15 +- store/postgres/src/lib.rs | 1 + store/postgres/src/relational.rs | 2 +- store/postgres/src/relational/prune.rs | 486 +++++++++++++++++- store/postgres/src/subgraph_store.rs | 11 + 10 files changed, 903 insertions(+), 74 deletions(-) create mode 100644 store/postgres/migrations/2025-04-08-224710_add_prune_state/down.sql create mode 100644 store/postgres/migrations/2025-04-08-224710_add_prune_state/up.sql diff --git a/graph/src/components/store/mod.rs b/graph/src/components/store/mod.rs index 88cc49d024a..ab30caeda75 100644 --- a/graph/src/components/store/mod.rs +++ b/graph/src/components/store/mod.rs @@ -1003,6 +1003,9 @@ pub struct PruneRequest { pub earliest_block: BlockNumber, /// The last block that contains final entities not subject to a reorg pub final_block: BlockNumber, + /// The first block for which the deployment contained entities when the + /// request was made + pub first_block: BlockNumber, /// The latest block, i.e., the subgraph head pub latest_block: BlockNumber, /// Use the rebuild strategy when removing more than this fraction of @@ -1066,6 +1069,7 @@ impl PruneRequest { earliest_block, final_block, latest_block, + first_block, rebuild_threshold, delete_threshold, }) diff --git a/node/src/bin/manager.rs b/node/src/bin/manager.rs index 803625a6021..81c794485d4 100644 --- a/node/src/bin/manager.rs +++ b/node/src/bin/manager.rs @@ -297,35 +297,13 @@ pub enum Command { #[clap(subcommand)] Index(IndexCommand), - /// Prune a deployment + /// Prune subgraphs by removing old entity versions /// /// Keep only entity versions that are needed to respond to queries at /// block heights that are within `history` blocks of the subgraph head; /// all other entity versions are removed. - /// - /// Unless `--once` is given, this setting is permanent and the subgraph - /// will periodically be pruned to remove history as the subgraph head - /// moves forward. - Prune { - /// The deployment to prune (see `help info`) - deployment: DeploymentSearch, - /// Prune by rebuilding tables when removing more than this fraction - /// of history. Defaults to GRAPH_STORE_HISTORY_REBUILD_THRESHOLD - #[clap(long, short)] - rebuild_threshold: Option, - /// Prune by deleting when removing more than this fraction of - /// history but less than rebuild_threshold. Defaults to - /// GRAPH_STORE_HISTORY_DELETE_THRESHOLD - #[clap(long, short)] - delete_threshold: Option, - /// How much history to keep in blocks. Defaults to - /// GRAPH_MIN_HISTORY_BLOCKS - #[clap(long, short = 'y')] - history: Option, - /// Prune only this once - #[clap(long, short)] - once: bool, - }, + #[clap(subcommand)] + Prune(PruneCommand), /// General database management #[clap(subcommand)] @@ -694,6 +672,67 @@ pub enum StatsCommand { }, } +#[derive(Clone, Debug, Subcommand)] +pub enum PruneCommand { + /// Prune a deployment in the foreground + /// + /// Unless `--once` is given, this setting is permanent and the subgraph + /// will periodically be pruned to remove history as the subgraph head + /// moves forward. + Run { + /// The deployment to prune (see `help info`) + deployment: DeploymentSearch, + /// Prune by rebuilding tables when removing more than this fraction + /// of history. Defaults to GRAPH_STORE_HISTORY_REBUILD_THRESHOLD + #[clap(long, short)] + rebuild_threshold: Option, + /// Prune by deleting when removing more than this fraction of + /// history but less than rebuild_threshold. Defaults to + /// GRAPH_STORE_HISTORY_DELETE_THRESHOLD + #[clap(long, short)] + delete_threshold: Option, + /// How much history to keep in blocks. Defaults to + /// GRAPH_MIN_HISTORY_BLOCKS + #[clap(long, short = 'y')] + history: Option, + /// Prune only this once + #[clap(long, short)] + once: bool, + }, + /// Prune a deployment in the background + /// + /// Set the amount of history the subgraph should retain. The actual + /// data removal happens in the background and can be monitored with + /// `prune status`. It can take several minutes of the first pruning to + /// start, during which time `prune status` will not return any + /// information + Set { + /// The deployment to prune (see `help info`) + deployment: DeploymentSearch, + /// Prune by rebuilding tables when removing more than this fraction + /// of history. Defaults to GRAPH_STORE_HISTORY_REBUILD_THRESHOLD + #[clap(long, short)] + rebuild_threshold: Option, + /// Prune by deleting when removing more than this fraction of + /// history but less than rebuild_threshold. Defaults to + /// GRAPH_STORE_HISTORY_DELETE_THRESHOLD + #[clap(long, short)] + delete_threshold: Option, + /// How much history to keep in blocks. Defaults to + /// GRAPH_MIN_HISTORY_BLOCKS + #[clap(long, short = 'y')] + history: Option, + }, + /// Show the status of a pruning operation + Status { + /// The number of the pruning run + #[clap(long, short)] + run: Option, + /// The deployment to check (see `help info`) + deployment: DeploymentSearch, + }, +} + #[derive(Clone, Debug, Subcommand)] pub enum IndexCommand { /// Creates a new database index. @@ -1613,25 +1652,52 @@ async fn main() -> anyhow::Result<()> { } } } - Prune { - deployment, - history, - rebuild_threshold, - delete_threshold, - once, - } => { - let (store, primary_pool) = ctx.store_and_primary(); - let history = history.unwrap_or(ENV_VARS.min_history_blocks.try_into()?); - commands::prune::run( - store, - primary_pool, - deployment, - history, - rebuild_threshold, - delete_threshold, - once, - ) - .await + Prune(cmd) => { + use PruneCommand::*; + match cmd { + Run { + deployment, + history, + rebuild_threshold, + delete_threshold, + once, + } => { + let (store, primary_pool) = ctx.store_and_primary(); + let history = history.unwrap_or(ENV_VARS.min_history_blocks.try_into()?); + commands::prune::run( + store, + primary_pool, + deployment, + history, + rebuild_threshold, + delete_threshold, + once, + ) + .await + } + Set { + deployment, + rebuild_threshold, + delete_threshold, + history, + } => { + let (store, primary_pool) = ctx.store_and_primary(); + let history = history.unwrap_or(ENV_VARS.min_history_blocks.try_into()?); + commands::prune::set( + store, + primary_pool, + deployment, + history, + rebuild_threshold, + delete_threshold, + ) + .await + } + Status { run, deployment } => { + let (store, primary_pool) = ctx.store_and_primary(); + commands::prune::status(store, primary_pool, deployment, run).await + } + } } Drop { deployment, diff --git a/node/src/manager/commands/prune.rs b/node/src/manager/commands/prune.rs index 4ca058525e2..0fc5538fc71 100644 --- a/node/src/manager/commands/prune.rs +++ b/node/src/manager/commands/prune.rs @@ -6,7 +6,7 @@ use std::{ }; use graph::{ - components::store::{PrunePhase, PruneRequest}, + components::store::{DeploymentLocator, PrunePhase, PruneRequest}, env::ENV_VARS, }; use graph::{ @@ -14,9 +14,16 @@ use graph::{ data::subgraph::status, prelude::{anyhow, BlockNumber}, }; -use graph_store_postgres::{ConnectionPool, Store}; +use graph_store_postgres::{ + command_support::{Phase, PruneTableState}, + ConnectionPool, Store, +}; -use crate::manager::{commands::stats::show_stats, deployment::DeploymentSearch, fmt}; +use crate::manager::{ + commands::stats::show_stats, + deployment::DeploymentSearch, + fmt::{self, MapOrNull as _}, +}; struct Progress { start: Instant, @@ -153,15 +160,19 @@ impl PruneReporter for Progress { } } -pub async fn run( - store: Arc, +struct Args { + history: BlockNumber, + deployment: DeploymentLocator, + earliest_block: BlockNumber, + latest_block: BlockNumber, +} + +fn check_args( + store: &Arc, primary_pool: ConnectionPool, search: DeploymentSearch, history: usize, - rebuild_threshold: Option, - delete_threshold: Option, - once: bool, -) -> Result<(), anyhow::Error> { +) -> Result { let history = history as BlockNumber; let deployment = search.locate_unique(&primary_pool)?; let mut info = store @@ -178,22 +189,38 @@ pub async fn run( .chains .pop() .ok_or_else(|| anyhow!("deployment {} does not index any chain", deployment))?; - let latest = status.latest_block.map(|ptr| ptr.number()).unwrap_or(0); - if latest <= history { - return Err(anyhow!("deployment {deployment} has only indexed up to block {latest} and we can't preserve {history} blocks of history")); + let latest_block = status.latest_block.map(|ptr| ptr.number()).unwrap_or(0); + if latest_block <= history { + return Err(anyhow!("deployment {deployment} has only indexed up to block {latest_block} and we can't preserve {history} blocks of history")); } + Ok(Args { + history, + deployment, + earliest_block: status.earliest_block_number, + latest_block, + }) +} - println!("prune {deployment}"); - println!(" latest: {latest}"); - println!(" final: {}", latest - ENV_VARS.reorg_threshold()); - println!(" earliest: {}\n", latest - history); +async fn first_prune( + store: &Arc, + args: &Args, + rebuild_threshold: Option, + delete_threshold: Option, +) -> Result<(), anyhow::Error> { + println!("prune {}", args.deployment); + println!( + " range: {} - {} ({} blocks)", + args.earliest_block, + args.latest_block, + args.latest_block - args.earliest_block + ); let mut req = PruneRequest::new( - &deployment, - history, + &args.deployment, + args.history, ENV_VARS.reorg_threshold(), - status.earliest_block_number, - latest, + args.earliest_block, + args.latest_block, )?; if let Some(rebuild_threshold) = rebuild_threshold { req.rebuild_threshold = rebuild_threshold; @@ -206,17 +233,186 @@ pub async fn run( store .subgraph_store() - .prune(reporter, &deployment, req) + .prune(reporter, &args.deployment, req) .await?; + Ok(()) +} + +async fn run_inner( + store: Arc, + primary_pool: ConnectionPool, + search: DeploymentSearch, + history: usize, + rebuild_threshold: Option, + delete_threshold: Option, + once: bool, + do_first_prune: bool, +) -> Result<(), anyhow::Error> { + let args = check_args(&store, primary_pool, search, history)?; + + if do_first_prune { + first_prune(&store, &args, rebuild_threshold, delete_threshold).await?; + } // Only after everything worked out, make the history setting permanent if !once { store.subgraph_store().set_history_blocks( - &deployment, - history, + &args.deployment, + args.history, ENV_VARS.reorg_threshold(), )?; } Ok(()) } + +pub async fn run( + store: Arc, + primary_pool: ConnectionPool, + search: DeploymentSearch, + history: usize, + rebuild_threshold: Option, + delete_threshold: Option, + once: bool, +) -> Result<(), anyhow::Error> { + run_inner( + store, + primary_pool, + search, + history, + rebuild_threshold, + delete_threshold, + once, + true, + ) + .await +} + +pub async fn set( + store: Arc, + primary_pool: ConnectionPool, + search: DeploymentSearch, + history: usize, + rebuild_threshold: Option, + delete_threshold: Option, +) -> Result<(), anyhow::Error> { + run_inner( + store, + primary_pool, + search, + history, + rebuild_threshold, + delete_threshold, + false, + false, + ) + .await +} + +pub async fn status( + store: Arc, + primary_pool: ConnectionPool, + search: DeploymentSearch, + run: Option, +) -> Result<(), anyhow::Error> { + fn percentage(left: Option, x: Option, right: Option) -> String { + match (left, x, right) { + (Some(left), Some(x), Some(right)) => { + let range = right - left; + if range == 0 { + return fmt::null(); + } + let percent = (x - left) as f64 / range as f64 * 100.0; + format!("{:.0}%", percent.min(100.0)) + } + _ => fmt::null(), + } + } + + let deployment = search.locate_unique(&primary_pool)?; + + let viewer = store.subgraph_store().prune_viewer(&deployment).await?; + let runs = viewer.runs()?; + if runs.is_empty() { + return Err(anyhow!("No prune runs found for deployment {deployment}")); + } + let run = run.unwrap_or(*runs.last().unwrap()); + let Some((state, table_states)) = viewer.state(run)? else { + let runs = match runs.len() { + 0 => unreachable!("we checked that runs is not empty"), + 1 => format!("There is only one prune run #{}", runs[0]), + _ => format!( + "Only prune runs #{} up to #{} exist", + runs[0], + runs.last().unwrap() + ), + }; + return Err(anyhow!( + "No information about prune run #{run} found for deployment {deployment}. {runs}" + )); + }; + println!("prune {deployment} (run #{run})"); + println!( + " range: {} - {} ({} blocks, should keep {} blocks)", + state.first_block, + state.latest_block, + state.latest_block - state.first_block, + state.history_blocks + ); + println!(" started: {}", fmt::date_time(&state.started_at)); + match &state.finished_at { + Some(finished_at) => println!(" finished: {}", fmt::date_time(finished_at)), + None => println!(" finished: still running"), + } + println!( + " duration: {}", + fmt::duration(&state.started_at, &state.finished_at) + ); + + println!( + "\n{:^30} | {:^22} | {:^8} | {:^11} | {:^8}", + "table", "status", "rows", "batch_size", "duration" + ); + println!( + "{:-^30}-+-{:-^22}-+-{:-^8}-+-{:-^11}-+-{:-^8}", + "", "", "", "", "" + ); + for ts in table_states { + #[allow(unused_variables)] + let PruneTableState { + vid: _, + id: _, + run: _, + table_name, + strategy, + phase, + start_vid, + final_vid, + nonfinal_vid, + rows, + next_vid, + batch_size, + started_at, + finished_at, + } = ts; + + let complete = match phase { + Phase::Queued | Phase::Started => "0%".to_string(), + Phase::CopyFinal => percentage(start_vid, next_vid, final_vid), + Phase::CopyNonfinal | Phase::Delete => percentage(start_vid, next_vid, nonfinal_vid), + Phase::Done => fmt::check(), + Phase::Unknown => fmt::null(), + }; + + let table_name = fmt::abbreviate(&table_name, 30); + let rows = rows.map_or_null(|rows| rows.to_string()); + let batch_size = batch_size.map_or_null(|b| b.to_string()); + let duration = started_at.map_or_null(|s| fmt::duration(&s, &finished_at)); + let phase = phase.as_str(); + println!( + "{table_name:<30} | {:<15} {complete:>6} | {rows:>8} | {batch_size:>11} | {duration:>8}", + format!("{strategy}/{phase}") + ); + } + Ok(()) +} diff --git a/store/postgres/migrations/2025-04-08-224710_add_prune_state/down.sql b/store/postgres/migrations/2025-04-08-224710_add_prune_state/down.sql new file mode 100644 index 00000000000..324bc18f154 --- /dev/null +++ b/store/postgres/migrations/2025-04-08-224710_add_prune_state/down.sql @@ -0,0 +1,2 @@ +drop table subgraphs.prune_table_state; +drop table subgraphs.prune_state; diff --git a/store/postgres/migrations/2025-04-08-224710_add_prune_state/up.sql b/store/postgres/migrations/2025-04-08-224710_add_prune_state/up.sql new file mode 100644 index 00000000000..8c767ed7384 --- /dev/null +++ b/store/postgres/migrations/2025-04-08-224710_add_prune_state/up.sql @@ -0,0 +1,60 @@ +create table subgraphs.prune_state( + -- diesel can't deal with composite primary keys + vid int primary key + generated always as identity, + + -- id of the deployment + id int not null, + -- how many times the deployment has been pruned + run int not null, + + -- from PruneRequest + first_block int not null, + final_block int not null, + latest_block int not null, + history_blocks int not null, + + started_at timestamptz not null, + finished_at timestamptz, + + constraint prune_state_id_run_uq unique(id, run) +); + +create table subgraphs.prune_table_state( + -- diesel can't deal with composite primary keys + vid int primary key + generated always as identity, + + id int not null, + run int not null, + table_name text not null, + -- 'r' (rebuild) or 'd' (delete) + strategy char not null, + phase text not null, + + start_vid int8, + final_vid int8, + nonfinal_vid int8, + rows int8, + + next_vid int8, + batch_size int8, + + started_at timestamptz, + finished_at timestamptz, + + constraint prune_table_state_id_run_table_name_uq + unique(id, run, table_name), + + constraint prune_table_state_strategy_ck + check(strategy in ('r', 'd')), + + constraint prune_table_state_phase_ck + check(phase in ('queued', 'started', 'copy_final', + 'copy_nonfinal', 'delete', 'done')), + + constraint prune_table_state_id_run_fk + foreign key(id, run) + references subgraphs.prune_state(id, run) + on delete cascade +); diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index bb7f842843b..1cb569730a0 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -53,7 +53,7 @@ use crate::detail::ErrorDetail; use crate::dynds::DataSourcesTable; use crate::primary::{DeploymentId, Primary}; use crate::relational::index::{CreateIndex, IndexList, Method}; -use crate::relational::{Layout, LayoutCache, SqlName, Table}; +use crate::relational::{self, Layout, LayoutCache, SqlName, Table}; use crate::relational_queries::FromEntityData; use crate::{advisory_lock, catalog, retry}; use crate::{detail, ConnectionPool}; @@ -876,6 +876,19 @@ impl DeploymentStore { }) .await } + + pub(crate) async fn prune_viewer( + self: &Arc, + site: Arc, + ) -> Result { + let store = self.cheap_clone(); + let layout = self + .pool + .with_conn(move |conn, _| store.layout(conn, site.clone()).map_err(|e| e.into())) + .await?; + + Ok(relational::prune::Viewer::new(self.pool.clone(), layout)) + } } /// Methods that back the trait `WritableStore`, but have small variations in their signatures diff --git a/store/postgres/src/lib.rs b/store/postgres/src/lib.rs index 25c8c285910..baf4d523ed5 100644 --- a/store/postgres/src/lib.rs +++ b/store/postgres/src/lib.rs @@ -85,5 +85,6 @@ pub mod command_support { } pub use crate::deployment::{on_sync, OnSync}; pub use crate::primary::Namespace; + pub use crate::relational::prune::{Phase, PruneState, PruneTableState, Viewer}; pub use crate::relational::{Catalog, Column, ColumnType, Layout, SqlName}; } diff --git a/store/postgres/src/relational.rs b/store/postgres/src/relational.rs index 27cee515265..35e35a35746 100644 --- a/store/postgres/src/relational.rs +++ b/store/postgres/src/relational.rs @@ -16,7 +16,7 @@ mod query_tests; pub(crate) mod dsl; pub(crate) mod index; -mod prune; +pub(crate) mod prune; mod rollup; pub(crate) mod value; diff --git a/store/postgres/src/relational/prune.rs b/store/postgres/src/relational/prune.rs index 5c3035ce172..37a053f6b2b 100644 --- a/store/postgres/src/relational/prune.rs +++ b/store/postgres/src/relational/prune.rs @@ -28,6 +28,8 @@ use super::{ Catalog, Layout, Namespace, }; +pub use status::{Phase, PruneState, PruneTableState, Viewer}; + /// Utility to copy relevant data out of a source table and into a new /// destination table and replace the source table with the destination /// table @@ -90,6 +92,7 @@ impl TablePair { &self, conn: &mut PgConnection, reporter: &mut dyn PruneReporter, + tracker: &status::Tracker, earliest_block: BlockNumber, final_block: BlockNumber, cancel: &CancelHandle, @@ -99,6 +102,7 @@ impl TablePair { // Determine the last vid that we need to copy let range = VidRange::for_prune(conn, &self.src, earliest_block, final_block)?; let mut batcher = VidBatcher::load(conn, &self.src_nsp, &self.src, range)?; + tracker.start_copy_final(conn, &self.src, range)?; while !batcher.finished() { let (_, rows) = batcher.step(|start, end| { @@ -132,11 +136,13 @@ impl TablePair { .map_err(StoreError::from) }) })?; + let rows = rows.unwrap_or(0); + tracker.copy_final_batch(conn, &self.src, rows, &batcher)?; cancel.check_cancel()?; reporter.prune_batch( self.src.name.as_str(), - rows.unwrap_or(0), + rows, PrunePhase::CopyFinal, batcher.finished(), ); @@ -151,6 +157,7 @@ impl TablePair { &self, conn: &mut PgConnection, reporter: &mut dyn PruneReporter, + tracker: &status::Tracker, final_block: BlockNumber, ) -> Result<(), StoreError> { let column_list = self.column_list(); @@ -158,6 +165,7 @@ impl TablePair { // Determine the last vid that we need to copy let range = VidRange::for_prune(conn, &self.src, final_block + 1, BLOCK_NUMBER_MAX)?; let mut batcher = VidBatcher::load(conn, &self.src.nsp, &self.src, range)?; + tracker.start_copy_nonfinal(conn, &self.src, range)?; while !batcher.finished() { let (_, rows) = batcher.step(|start, end| { @@ -186,10 +194,13 @@ impl TablePair { .map_err(StoreError::from) }) })?; + let rows = rows.unwrap_or(0); + + tracker.copy_nonfinal_batch(conn, &self.src, rows as i64, &batcher)?; reporter.prune_batch( self.src.name.as_str(), - rows.unwrap_or(0), + rows, PrunePhase::CopyNonfinal, batcher.finished(), ); @@ -352,18 +363,21 @@ impl Layout { /// time. The rebuild strategy never blocks reads, it only ever blocks /// writes. pub fn prune( - &self, + self: Arc, logger: &Logger, reporter: &mut dyn PruneReporter, conn: &mut PgConnection, req: &PruneRequest, cancel: &CancelHandle, ) -> Result<(), CancelableError> { + let tracker = status::Tracker::new(conn, self.clone())?; + reporter.start(req); let stats = self.version_stats(conn, reporter, true, cancel)?; let prunable_tables: Vec<_> = self.prunable_tables(&stats, req).into_iter().collect(); + tracker.start(conn, req, &prunable_tables)?; // create a shadow namespace where we will put the copies of our // tables, but only create it in the database if we really need it @@ -382,6 +396,7 @@ impl Layout { // is the definition of 'final' for (table, strat) in &prunable_tables { reporter.start_table(table.name.as_str()); + tracker.start_table(conn, table)?; match strat { PruningStrategy::Rebuild => { if recreate_dst_nsp { @@ -401,6 +416,7 @@ impl Layout { pair.copy_final_entities( conn, reporter, + &tracker, req.earliest_block, req.final_block, cancel, @@ -410,7 +426,7 @@ impl Layout { // see also: deployment-lock-for-update reporter.start_switch(); deployment::with_lock(conn, &self.site, |conn| -> Result<_, StoreError> { - pair.copy_nonfinal_entities(conn, reporter, req.final_block)?; + pair.copy_nonfinal_entities(conn, reporter, &tracker, req.final_block)?; cancel.check_cancel().map_err(CancelableError::from)?; conn.transaction(|conn| pair.switch(logger, conn))?; @@ -426,6 +442,7 @@ impl Layout { let range = VidRange::for_prune(conn, &table, 0, req.earliest_block)?; let mut batcher = VidBatcher::load(conn, &self.site.namespace, &table, range)?; + tracker.start_delete(conn, table, range, &batcher)?; while !batcher.finished() { let (_, rows) = batcher.step(|start, end| {sql_query(format!( "/* controller=prune,phase=delete,start_vid={start},batch_size={batch_size} */ \ @@ -439,10 +456,13 @@ impl Layout { .bind::(start) .bind::(end) .execute(conn).map_err(StoreError::from)})?; + let rows = rows.unwrap_or(0); + + tracker.delete_batch(conn, table, rows, &batcher)?; reporter.prune_batch( table.name.as_str(), - rows.unwrap_or(0), + rows, PrunePhase::Delete, batcher.finished(), ); @@ -450,6 +470,7 @@ impl Layout { } } reporter.finish_table(table.name.as_str()); + tracker.finish_table(conn, table)?; } // Get rid of the temporary prune schema if we actually created it if !recreate_dst_nsp { @@ -465,7 +486,462 @@ impl Layout { self.analyze_tables(conn, reporter, tables, cancel)?; reporter.finish(); + tracker.finish(conn)?; Ok(()) } } + +mod status { + use std::sync::Arc; + + use chrono::{DateTime, Utc}; + use diesel::{ + deserialize::FromSql, + dsl::insert_into, + pg::{Pg, PgValue}, + query_builder::QueryFragment, + serialize::{Output, ToSql}, + sql_types::Text, + table, update, AsChangeset, Connection, ExpressionMethods as _, OptionalExtension, + PgConnection, QueryDsl as _, RunQueryDsl as _, + }; + use graph::{ + components::store::{PruneRequest, PruningStrategy, StoreResult}, + prelude::StoreError, + }; + + use crate::{ + relational::{Layout, Table}, + vid_batcher::{VidBatcher, VidRange}, + ConnectionPool, + }; + + table! { + subgraphs.prune_state(vid) { + vid -> Integer, + // Deployment id (sgd) + id -> Integer, + run -> Integer, + // The first block in the subgraph when the prune started + first_block -> Integer, + final_block -> Integer, + latest_block -> Integer, + // The amount of history configured + history_blocks -> Integer, + + started_at -> Timestamptz, + finished_at -> Nullable, + } + } + + table! { + subgraphs.prune_table_state(vid) { + vid -> Integer, + // Deployment id (sgd) + id -> Integer, + run -> Integer, + table_name -> Text, + + strategy -> Char, + // see enum Phase + phase -> Text, + + start_vid -> Nullable, + final_vid -> Nullable, + nonfinal_vid -> Nullable, + rows -> Nullable, + + next_vid -> Nullable, + batch_size -> Nullable, + + started_at -> Nullable, + finished_at -> Nullable, + } + } + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow)] + #[diesel(sql_type = Text)] + pub enum Phase { + Queued, + Started, + /// Only used when strategy is Rebuild + CopyFinal, + /// Only used when strategy is Rebuild + CopyNonfinal, + /// Only used when strategy is Delete + Delete, + Done, + /// Not a real phase, indicates that the database has an invalid + /// value + Unknown, + } + + impl Phase { + pub fn from_str(phase: &str) -> Self { + use Phase::*; + match phase { + "queued" => Queued, + "started" => Started, + "copy_final" => CopyFinal, + "copy_nonfinal" => CopyNonfinal, + "delete" => Delete, + "done" => Done, + _ => Unknown, + } + } + + pub fn as_str(&self) -> &str { + use Phase::*; + match self { + Queued => "queued", + Started => "started", + CopyFinal => "copy_final", + CopyNonfinal => "copy_nonfinal", + Delete => "delete", + Done => "done", + Unknown => "*unknown*", + } + } + } + + impl ToSql for Phase { + fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, Pg>) -> diesel::serialize::Result { + let phase = self.as_str(); + >::to_sql(phase, &mut out.reborrow()) + } + } + + impl FromSql for Phase { + fn from_sql(bytes: PgValue) -> diesel::deserialize::Result { + Ok(Phase::from_str(std::str::from_utf8(bytes.as_bytes())?)) + } + } + + /// Information about one pruning run for a deployment + #[derive(Queryable)] + pub struct PruneState { + pub vid: i32, + pub id: i32, + pub run: i32, + pub first_block: i32, + pub final_block: i32, + pub latest_block: i32, + pub history_blocks: i32, + + pub started_at: DateTime, + pub finished_at: Option>, + } + + /// Per-table information about the pruning run for a deployment + #[derive(Queryable)] + pub struct PruneTableState { + pub vid: i32, + pub id: i32, + pub run: i32, + pub table_name: String, + + // 'r' for rebuild or 'd' for delete + pub strategy: String, + pub phase: Phase, + + pub start_vid: Option, + pub final_vid: Option, + pub nonfinal_vid: Option, + pub rows: Option, + + pub next_vid: Option, + pub batch_size: Option, + + pub started_at: Option>, + pub finished_at: Option>, + } + + /// A helper to persist pruning progress in the database + pub(super) struct Tracker { + layout: Arc, + run: i32, + } + + impl Tracker { + pub(super) fn new(conn: &mut PgConnection, layout: Arc) -> StoreResult { + use prune_state as ps; + let run = ps::table + .filter(ps::id.eq(layout.site.id)) + .order(ps::run.desc()) + .select(ps::run) + .get_result::(conn) + .optional() + .map_err(StoreError::from)? + .unwrap_or(0) + + 1; + + Ok(Tracker { layout, run }) + } + + pub(super) fn start( + &self, + conn: &mut PgConnection, + req: &PruneRequest, + prunable_tables: &[(&Arc
, PruningStrategy)], + ) -> StoreResult<()> { + use prune_state as ps; + use prune_table_state as pts; + + conn.transaction(|conn| { + insert_into(ps::table) + .values(( + ps::id.eq(self.layout.site.id), + ps::run.eq(self.run), + ps::first_block.eq(req.first_block), + ps::final_block.eq(req.final_block), + ps::latest_block.eq(req.latest_block), + ps::history_blocks.eq(req.history_blocks), + ps::started_at.eq(diesel::dsl::now), + )) + .execute(conn)?; + + for (table, strat) in prunable_tables { + let strat = match strat { + PruningStrategy::Rebuild => "r", + PruningStrategy::Delete => "d", + }; + insert_into(pts::table) + .values(( + pts::id.eq(self.layout.site.id), + pts::run.eq(self.run), + pts::table_name.eq(table.name.as_str()), + pts::strategy.eq(strat), + pts::phase.eq(Phase::Queued), + )) + .execute(conn)?; + } + Ok(()) + }) + } + + pub(crate) fn start_table( + &self, + conn: &mut PgConnection, + table: &Table, + ) -> StoreResult<()> { + use prune_table_state as pts; + + self.update_table_state( + conn, + table, + ( + pts::started_at.eq(diesel::dsl::now), + pts::phase.eq(Phase::Started), + ), + )?; + + Ok(()) + } + + pub(crate) fn start_copy_final( + &self, + conn: &mut PgConnection, + table: &Table, + range: VidRange, + ) -> StoreResult<()> { + use prune_table_state as pts; + + let values = ( + pts::phase.eq(Phase::CopyFinal), + pts::start_vid.eq(range.min), + pts::next_vid.eq(range.min), + pts::final_vid.eq(range.max), + pts::rows.eq(0), + ); + + self.update_table_state(conn, table, values) + } + + pub(crate) fn copy_final_batch( + &self, + conn: &mut PgConnection, + table: &Table, + rows: usize, + batcher: &VidBatcher, + ) -> StoreResult<()> { + use prune_table_state as pts; + + let values = ( + pts::next_vid.eq(batcher.next_vid()), + pts::batch_size.eq(batcher.batch_size() as i64), + pts::rows.eq(pts::rows + (rows as i64)), + ); + + self.update_table_state(conn, table, values) + } + + pub(crate) fn start_copy_nonfinal( + &self, + conn: &mut PgConnection, + table: &Table, + range: VidRange, + ) -> StoreResult<()> { + use prune_table_state as pts; + + let values = ( + pts::phase.eq(Phase::CopyNonfinal), + pts::nonfinal_vid.eq(range.max), + ); + self.update_table_state(conn, table, values) + } + + pub(crate) fn copy_nonfinal_batch( + &self, + conn: &mut PgConnection, + src: &Table, + rows: i64, + batcher: &VidBatcher, + ) -> StoreResult<()> { + use prune_table_state as pts; + + let values = ( + pts::next_vid.eq(batcher.next_vid()), + pts::batch_size.eq(batcher.batch_size() as i64), + pts::rows.eq(pts::rows + rows), + ); + + self.update_table_state(conn, src, values) + } + + pub(crate) fn finish_table( + &self, + conn: &mut PgConnection, + table: &Table, + ) -> StoreResult<()> { + use prune_table_state as pts; + + let values = ( + pts::finished_at.eq(diesel::dsl::now), + pts::phase.eq(Phase::Done), + ); + + self.update_table_state(conn, table, values) + } + + pub(crate) fn start_delete( + &self, + conn: &mut PgConnection, + table: &Table, + range: VidRange, + batcher: &VidBatcher, + ) -> StoreResult<()> { + use prune_table_state as pts; + + let values = ( + pts::phase.eq(Phase::Delete), + pts::start_vid.eq(range.min), + pts::final_vid.eq(range.max), + pts::nonfinal_vid.eq(range.max), + pts::rows.eq(0), + pts::next_vid.eq(range.min), + pts::batch_size.eq(batcher.batch_size() as i64), + ); + + self.update_table_state(conn, table, values) + } + + pub(crate) fn delete_batch( + &self, + conn: &mut PgConnection, + table: &Table, + rows: usize, + batcher: &VidBatcher, + ) -> StoreResult<()> { + use prune_table_state as pts; + + let values = ( + pts::next_vid.eq(batcher.next_vid()), + pts::batch_size.eq(batcher.batch_size() as i64), + pts::rows.eq(pts::rows - (rows as i64)), + ); + + self.update_table_state(conn, table, values) + } + + fn update_table_state( + &self, + conn: &mut PgConnection, + table: &Table, + values: V, + ) -> StoreResult<()> + where + V: AsChangeset, + C: QueryFragment, + { + use prune_table_state as pts; + + update(pts::table) + .filter(pts::id.eq(self.layout.site.id)) + .filter(pts::run.eq(self.run)) + .filter(pts::table_name.eq(table.name.as_str())) + .set(values) + .execute(conn)?; + Ok(()) + } + + pub(crate) fn finish(&self, conn: &mut PgConnection) -> StoreResult<()> { + use prune_state as ps; + + update(ps::table) + .filter(ps::id.eq(self.layout.site.id)) + .filter(ps::run.eq(self.run)) + .set((ps::finished_at.eq(diesel::dsl::now),)) + .execute(conn)?; + Ok(()) + } + } + + /// A helper to read pruning progress from the database + pub struct Viewer { + pool: ConnectionPool, + layout: Arc, + } + + impl Viewer { + pub fn new(pool: ConnectionPool, layout: Arc) -> Self { + Self { pool, layout } + } + + pub fn runs(&self) -> StoreResult> { + use prune_state as ps; + + let mut conn = self.pool.get()?; + let runs = ps::table + .filter(ps::id.eq(self.layout.site.id)) + .select(ps::run) + .order(ps::run.asc()) + .load::(&mut conn) + .map_err(StoreError::from)?; + let runs = runs.into_iter().map(|run| run as usize).collect::>(); + Ok(runs) + } + + pub fn state(&self, run: usize) -> StoreResult)>> { + use prune_state as ps; + use prune_table_state as pts; + + let mut conn = self.pool.get()?; + + let ptss = pts::table + .filter(pts::id.eq(self.layout.site.id)) + .filter(pts::run.eq(run as i32)) + .order(pts::table_name.asc()) + .load::(&mut conn) + .map_err(StoreError::from)?; + + ps::table + .filter(ps::id.eq(self.layout.site.id)) + .filter(ps::run.eq(run as i32)) + .first::(&mut conn) + .optional() + .map_err(StoreError::from) + .map(|state| state.map(|state| (state, ptss))) + } + } +} diff --git a/store/postgres/src/subgraph_store.rs b/store/postgres/src/subgraph_store.rs index a4e7ffda659..d19cc68f44a 100644 --- a/store/postgres/src/subgraph_store.rs +++ b/store/postgres/src/subgraph_store.rs @@ -40,6 +40,7 @@ use crate::{ deployment::{OnSync, SubgraphHealth}, primary::{self, DeploymentId, Mirror as PrimaryMirror, Primary, Site}, relational::{ + self, index::{IndexList, Method}, Layout, }, @@ -1249,6 +1250,16 @@ impl SubgraphStoreInner { store.prune(reporter, site, req).await } + pub async fn prune_viewer( + &self, + deployment: &DeploymentLocator, + ) -> Result { + let site = self.find_site(deployment.id.into())?; + let store = self.for_site(&site)?; + + store.prune_viewer(site).await + } + pub fn set_history_blocks( &self, deployment: &DeploymentLocator, From b3d423e8aca4fef2ab42484f3741c867bba38db8 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 16 Apr 2025 12:56:38 -0700 Subject: [PATCH 136/160] store: Limit for how many prune runs we keep status info --- graph/src/env/store.rs | 6 ++++++ node/src/manager/commands/prune.rs | 6 ++++-- store/postgres/src/relational/prune.rs | 10 ++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/graph/src/env/store.rs b/graph/src/env/store.rs index 8197d07b6bc..1c768f45bed 100644 --- a/graph/src/env/store.rs +++ b/graph/src/env/store.rs @@ -111,6 +111,9 @@ pub struct EnvVarsStore { /// blocks) than its history limit. The default value is 1.2 and the /// value must be at least 1.01 pub history_slack_factor: f64, + /// For how many prune runs per deployment to keep status information. + /// Set by `GRAPH_STORE_HISTORY_KEEP_STATUS`. The default is 5 + pub prune_keep_history: usize, /// How long to accumulate changes into a batch before a write has to /// happen. Set by the environment variable /// `GRAPH_STORE_WRITE_BATCH_DURATION` in seconds. The default is 300s. @@ -184,6 +187,7 @@ impl TryFrom for EnvVarsStore { rebuild_threshold: x.rebuild_threshold.0, delete_threshold: x.delete_threshold.0, history_slack_factor: x.history_slack_factor.0, + prune_keep_history: x.prune_keep_status, write_batch_duration: Duration::from_secs(x.write_batch_duration_in_secs), write_batch_size: x.write_batch_size * 1_000, create_gin_indexes: x.create_gin_indexes, @@ -257,6 +261,8 @@ pub struct InnerStore { delete_threshold: ZeroToOneF64, #[envconfig(from = "GRAPH_STORE_HISTORY_SLACK_FACTOR", default = "1.2")] history_slack_factor: HistorySlackF64, + #[envconfig(from = "GRAPH_STORE_HISTORY_KEEP_STATUS", default = "5")] + prune_keep_status: usize, #[envconfig(from = "GRAPH_STORE_WRITE_BATCH_DURATION", default = "300")] write_batch_duration_in_secs: u64, #[envconfig(from = "GRAPH_STORE_WRITE_BATCH_SIZE", default = "10000")] diff --git a/node/src/manager/commands/prune.rs b/node/src/manager/commands/prune.rs index 0fc5538fc71..05b1730806d 100644 --- a/node/src/manager/commands/prune.rs +++ b/node/src/manager/commands/prune.rs @@ -341,14 +341,16 @@ pub async fn status( let runs = match runs.len() { 0 => unreachable!("we checked that runs is not empty"), 1 => format!("There is only one prune run #{}", runs[0]), + 2 => format!("Only prune runs #{} and #{} exist", runs[0], runs[1]), _ => format!( - "Only prune runs #{} up to #{} exist", + "Only prune runs #{} and #{} up to #{} exist", runs[0], + runs[1], runs.last().unwrap() ), }; return Err(anyhow!( - "No information about prune run #{run} found for deployment {deployment}. {runs}" + "No information about prune run #{run} found for deployment {deployment}.\n {runs}" )); }; println!("prune {deployment} (run #{run})"); diff --git a/store/postgres/src/relational/prune.rs b/store/postgres/src/relational/prune.rs index 37a053f6b2b..3748dde587c 100644 --- a/store/postgres/src/relational/prune.rs +++ b/store/postgres/src/relational/prune.rs @@ -508,6 +508,7 @@ mod status { }; use graph::{ components::store::{PruneRequest, PruningStrategy, StoreResult}, + env::ENV_VARS, prelude::StoreError, }; @@ -676,6 +677,15 @@ mod status { .unwrap_or(0) + 1; + // Delete old prune state. Keep the initial run and the last + // `prune_keep_history` runs (including this one) + diesel::delete(ps::table) + .filter(ps::id.eq(layout.site.id)) + .filter(ps::run.gt(1)) + .filter(ps::run.lt(run - (ENV_VARS.store.prune_keep_history - 1) as i32)) + .execute(conn) + .map_err(StoreError::from)?; + Ok(Tracker { layout, run }) } From 29ac576259a74f339c33b660afa19993d1f8d1f5 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 16 Apr 2025 13:04:18 -0700 Subject: [PATCH 137/160] store: Map the pruning status tables in the sharded schema in the primary --- store/postgres/src/pool/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/store/postgres/src/pool/mod.rs b/store/postgres/src/pool/mod.rs index 5fcc7b0cd1c..628a977ff9b 100644 --- a/store/postgres/src/pool/mod.rs +++ b/store/postgres/src/pool/mod.rs @@ -66,6 +66,8 @@ const SHARDED_TABLES: [(&str, &[&str]); 2] = [ "subgraph", "subgraph_version", "subgraph_deployment_assignment", + "prune_state", + "prune_table_state", ], ), ]; From 5e565a72eb7ea4246c5ae3324d33a85a5787ca5d Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 16 Apr 2025 14:41:49 -0700 Subject: [PATCH 138/160] node: Format larger durations as days/hours/minutes --- node/src/manager/fmt.rs | 47 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/node/src/manager/fmt.rs b/node/src/manager/fmt.rs index 549d173ca1a..6aaa12192a7 100644 --- a/node/src/manager/fmt.rs +++ b/node/src/manager/fmt.rs @@ -50,7 +50,20 @@ pub fn human_duration(duration: Duration) -> String { } else if duration.num_minutes() < 5 { format!("{}s", duration.num_seconds()) } else { - format!("{}m", duration.num_minutes()) + let minutes = duration.num_minutes(); + if minutes < 90 { + format!("{}m", duration.num_minutes()) + } else { + let hours = minutes / 60; + let minutes = minutes % 60; + if hours < 24 { + format!("{}h {}m", hours, minutes) + } else { + let days = hours / 24; + let hours = hours % 24; + format!("{}d {}h {}m", days, hours, minutes) + } + } } } @@ -76,3 +89,35 @@ pub fn date_time(date: &DateTime) -> String { let date = DateTime::::from(*date); date.format("%Y-%m-%d %H:%M:%S%Z").to_string() } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_human_duration() { + let duration = Duration::seconds(1); + assert_eq!(human_duration(duration), "1000ms"); + + let duration = Duration::seconds(10); + assert_eq!(human_duration(duration), "10s"); + + let duration = Duration::minutes(5); + assert_eq!(human_duration(duration), "5m"); + + let duration = Duration::hours(1); + assert_eq!(human_duration(duration), "60m"); + + let duration = Duration::minutes(100); + assert_eq!(human_duration(duration), "1h 40m"); + + let duration = Duration::days(1); + assert_eq!(human_duration(duration), "1d 0h 0m"); + + let duration = Duration::days(1) + Duration::minutes(35); + assert_eq!(human_duration(duration), "1d 0h 35m"); + + let duration = Duration::days(1) + Duration::minutes(95); + assert_eq!(human_duration(duration), "1d 1h 35m"); + } +} From 0912ff58da3b9a2de3f428b0238fd2e8a90c20e3 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 16 Apr 2025 16:04:39 -0700 Subject: [PATCH 139/160] Cargo.toml: Enable line wrapping for help text in clap --- Cargo.lock | 13 ++++++++++++- Cargo.toml | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec8c31e1233..abf9368d3e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -724,6 +724,7 @@ dependencies = [ "anstyle", "clap_lex", "strsim", + "terminal_size 0.3.0", ] [[package]] @@ -773,7 +774,7 @@ dependencies = [ "lazy_static", "libc", "regex", - "terminal_size", + "terminal_size 0.1.17", "unicode-width", "winapi", "winapi-util", @@ -5235,6 +5236,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "terminal_size" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +dependencies = [ + "rustix", + "windows-sys 0.48.0", +] + [[package]] name = "test-store" version = "0.36.0" diff --git a/Cargo.toml b/Cargo.toml index ffc3961d405..e258a84082a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,7 @@ async-graphql-axum = "7.0.15" axum = "0.8.1" chrono = "0.4.38" bs58 = "0.5.1" -clap = { version = "4.5.4", features = ["derive", "env"] } +clap = { version = "4.5.4", features = ["derive", "env", "wrap_help"] } derivative = "2.2.0" diesel = { version = "2.2.7", features = [ "postgres", From 28fa4445d0ca42e0ea234826ea553533c8e9a4da Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Thu, 17 Apr 2025 18:58:52 -0700 Subject: [PATCH 140/160] store: Address review comments for tracking pruning status --- store/postgres/src/relational/prune.rs | 46 +++----------------------- 1 file changed, 5 insertions(+), 41 deletions(-) diff --git a/store/postgres/src/relational/prune.rs b/store/postgres/src/relational/prune.rs index 3748dde587c..1a1236e2aaf 100644 --- a/store/postgres/src/relational/prune.rs +++ b/store/postgres/src/relational/prune.rs @@ -137,7 +137,7 @@ impl TablePair { }) })?; let rows = rows.unwrap_or(0); - tracker.copy_final_batch(conn, &self.src, rows, &batcher)?; + tracker.finish_batch(conn, &self.src, rows as i64, &batcher)?; cancel.check_cancel()?; reporter.prune_batch( @@ -196,7 +196,7 @@ impl TablePair { })?; let rows = rows.unwrap_or(0); - tracker.copy_nonfinal_batch(conn, &self.src, rows as i64, &batcher)?; + tracker.finish_batch(conn, &self.src, rows as i64, &batcher)?; reporter.prune_batch( self.src.name.as_str(), @@ -458,7 +458,7 @@ impl Layout { .execute(conn).map_err(StoreError::from)})?; let rows = rows.unwrap_or(0); - tracker.delete_batch(conn, table, rows, &batcher)?; + tracker.finish_batch(conn, table, -(rows as i64), &batcher)?; reporter.prune_batch( table.name.as_str(), @@ -682,7 +682,7 @@ mod status { diesel::delete(ps::table) .filter(ps::id.eq(layout.site.id)) .filter(ps::run.gt(1)) - .filter(ps::run.lt(run - (ENV_VARS.store.prune_keep_history - 1) as i32)) + .filter(ps::run.lt(run - (ENV_VARS.store.prune_keep_history as i32 - 1))) .execute(conn) .map_err(StoreError::from)?; @@ -768,24 +768,6 @@ mod status { self.update_table_state(conn, table, values) } - pub(crate) fn copy_final_batch( - &self, - conn: &mut PgConnection, - table: &Table, - rows: usize, - batcher: &VidBatcher, - ) -> StoreResult<()> { - use prune_table_state as pts; - - let values = ( - pts::next_vid.eq(batcher.next_vid()), - pts::batch_size.eq(batcher.batch_size() as i64), - pts::rows.eq(pts::rows + (rows as i64)), - ); - - self.update_table_state(conn, table, values) - } - pub(crate) fn start_copy_nonfinal( &self, conn: &mut PgConnection, @@ -801,7 +783,7 @@ mod status { self.update_table_state(conn, table, values) } - pub(crate) fn copy_nonfinal_batch( + pub(crate) fn finish_batch( &self, conn: &mut PgConnection, src: &Table, @@ -856,24 +838,6 @@ mod status { self.update_table_state(conn, table, values) } - pub(crate) fn delete_batch( - &self, - conn: &mut PgConnection, - table: &Table, - rows: usize, - batcher: &VidBatcher, - ) -> StoreResult<()> { - use prune_table_state as pts; - - let values = ( - pts::next_vid.eq(batcher.next_vid()), - pts::batch_size.eq(batcher.batch_size() as i64), - pts::rows.eq(pts::rows - (rows as i64)), - ); - - self.update_table_state(conn, table, values) - } - fn update_table_state( &self, conn: &mut PgConnection, From eaf6f0d3d6939044c190f2edfe4ffe5f85c57152 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Fri, 18 Apr 2025 14:39:38 -0700 Subject: [PATCH 141/160] node: Accept a bare number as a deployment id in graphman --- node/src/manager/deployment.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/node/src/manager/deployment.rs b/node/src/manager/deployment.rs index f4f2a3b2533..a7cedbd33f2 100644 --- a/node/src/manager/deployment.rs +++ b/node/src/manager/deployment.rs @@ -18,7 +18,7 @@ lazy_static! { // `Qm...` optionally follow by `:$shard` static ref HASH_RE: Regex = Regex::new("\\A(?PQm[^:]+)(:(?P[a-z0-9_]+))?\\z").unwrap(); // `sgdNNN` - static ref DEPLOYMENT_RE: Regex = Regex::new("\\A(?Psgd[0-9]+)\\z").unwrap(); + static ref DEPLOYMENT_RE: Regex = Regex::new("\\A(?P(sgd)?[0-9]+)\\z").unwrap(); } /// A search for one or multiple deployments to make it possible to search @@ -58,7 +58,12 @@ impl FromStr for DeploymentSearch { Ok(DeploymentSearch::Hash { hash, shard }) } else if let Some(caps) = DEPLOYMENT_RE.captures(s) { let namespace = caps.name("nsp").unwrap().as_str().to_string(); - Ok(DeploymentSearch::Deployment { namespace }) + if namespace.starts_with("sgd") { + Ok(DeploymentSearch::Deployment { namespace }) + } else { + let namespace = format!("sgd{namespace}"); + Ok(DeploymentSearch::Deployment { namespace }) + } } else { Ok(DeploymentSearch::Name { name: s.to_string(), From 2f47ed52dc1a34d5c7630267952716820c1d6ee9 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 22 Apr 2025 14:52:28 -0700 Subject: [PATCH 142/160] store: Drop the vid sequence with 'cascade' Since it is used for the default value, we get an error sequence sgdNNN.
_vid_seq membership in replication set default depends on sequence sgdNNN.
_vid_seq That makes all pruning by rebuilding tables fail --- store/postgres/src/relational/prune.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/store/postgres/src/relational/prune.rs b/store/postgres/src/relational/prune.rs index 1a1236e2aaf..6d5295e5535 100644 --- a/store/postgres/src/relational/prune.rs +++ b/store/postgres/src/relational/prune.rs @@ -233,6 +233,7 @@ impl TablePair { query, "select setval('{dst_nsp}.{vid_seq}', nextval('{src_nsp}.{vid_seq}'));" )?; + writeln!(query, "drop sequence {src_nsp}.{vid_seq} cascade;")?; } writeln!(query, "drop table {src_qname};")?; From 03d4611de11aa4cd8b11981645bf4667b47eb17d Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Sat, 19 Apr 2025 22:24:49 -0700 Subject: [PATCH 143/160] chain: Simplify EthereumAdapter.load_blocks --- chain/ethereum/src/adapter.rs | 3 +- chain/ethereum/src/chain.rs | 5 --- chain/ethereum/src/ethereum_adapter.rs | 53 ++++++++++++-------------- 3 files changed, 26 insertions(+), 35 deletions(-) diff --git a/chain/ethereum/src/adapter.rs b/chain/ethereum/src/adapter.rs index 469e8932b5e..8421bb0422e 100644 --- a/chain/ethereum/src/adapter.rs +++ b/chain/ethereum/src/adapter.rs @@ -26,7 +26,6 @@ use graph::prelude::*; use graph::{ blockchain as bc, components::metrics::{CounterVec, GaugeVec, HistogramVec}, - futures01::Stream, petgraph::{self, graphmap::GraphMap}, }; @@ -1107,7 +1106,7 @@ pub trait EthereumAdapter: Send + Sync + 'static { logger: Logger, chain_store: Arc, block_hashes: HashSet, - ) -> Box, Error = Error> + Send>; + ) -> Result>, Error>; /// Find a block by its hash. fn block_by_hash( diff --git a/chain/ethereum/src/chain.rs b/chain/ethereum/src/chain.rs index 911d4d3ebfe..0408771f23e 100644 --- a/chain/ethereum/src/chain.rs +++ b/chain/ethereum/src/chain.rs @@ -10,7 +10,6 @@ use graph::components::network_provider::ChainName; use graph::components::store::{DeploymentCursorTracker, SourceableStore}; use graph::data::subgraph::UnifiedMappingApiVersion; use graph::firehose::{FirehoseEndpoint, ForkStep}; -use graph::futures03::compat::Future01CompatExt; use graph::futures03::TryStreamExt; use graph::prelude::{ retry, BlockHash, ComponentLoggerConfig, ElasticComponentLoggerConfig, EthereumBlock, @@ -1060,7 +1059,6 @@ impl TriggersAdapterTrait for TriggersAdapter { } async fn parent_ptr(&self, block: &BlockPtr) -> Result, Error> { - use graph::futures01::stream::Stream; use graph::prelude::LightEthereumBlockExt; let block = match self.chain_client.as_ref() { @@ -1111,9 +1109,6 @@ impl TriggersAdapterTrait for TriggersAdapter { self.chain_store.cheap_clone(), HashSet::from_iter(Some(block.hash_as_h256())), ) - .await - .collect() - .compat() .await?; assert_eq!(blocks.len(), 1); diff --git a/chain/ethereum/src/ethereum_adapter.rs b/chain/ethereum/src/ethereum_adapter.rs index e0714c24f02..713177abfef 100644 --- a/chain/ethereum/src/ethereum_adapter.rs +++ b/chain/ethereum/src/ethereum_adapter.rs @@ -1734,7 +1734,7 @@ impl EthereumAdapterTrait for EthereumAdapter { logger: Logger, chain_store: Arc, block_hashes: HashSet, - ) -> Box, Error = Error> + Send> { + ) -> Result>, Error> { let block_hashes: Vec<_> = block_hashes.iter().cloned().collect(); // Search for the block in the store first then use json-rpc as a backup. let mut blocks: Vec> = chain_store @@ -1756,27 +1756,25 @@ impl EthereumAdapterTrait for EthereumAdapter { // Return a stream that lazily loads batches of blocks. debug!(logger, "Requesting {} block(s)", missing_blocks.len()); - Box::new( - self.load_blocks_rpc(logger.clone(), missing_blocks) - .collect() - .map(move |new_blocks| { - let upsert_blocks: Vec<_> = new_blocks - .iter() - .map(|block| BlockFinality::Final(block.clone())) - .collect(); - let block_refs: Vec<_> = upsert_blocks - .iter() - .map(|block| block as &dyn graph::blockchain::Block) - .collect(); - if let Err(e) = chain_store.upsert_light_blocks(block_refs.as_slice()) { - error!(logger, "Error writing to block cache {}", e); - } - blocks.extend(new_blocks); - blocks.sort_by_key(|block| block.number); - stream::iter_ok(blocks) - }) - .flatten_stream(), - ) + let new_blocks = self + .load_blocks_rpc(logger.clone(), missing_blocks) + .collect() + .compat() + .await?; + let upsert_blocks: Vec<_> = new_blocks + .iter() + .map(|block| BlockFinality::Final(block.clone())) + .collect(); + let block_refs: Vec<_> = upsert_blocks + .iter() + .map(|block| block as &dyn graph::blockchain::Block) + .collect(); + if let Err(e) = chain_store.upsert_light_blocks(block_refs.as_slice()) { + error!(logger, "Error writing to block cache {}", e); + } + blocks.extend(new_blocks); + blocks.sort_by_key(|block| block.number); + Ok(blocks) } } @@ -1911,10 +1909,11 @@ pub(crate) async fn blocks_with_triggers( let logger2 = logger.cheap_clone(); - let blocks = eth + let blocks: Vec<_> = eth .load_blocks(logger.cheap_clone(), chain_store.clone(), block_hashes) - .await - .and_then( + .await? + .into_iter() + .map( move |block| match triggers_by_block.remove(&(block.number() as BlockNumber)) { Some(triggers) => Ok(BlockWithTriggers::new( BlockFinality::Final(block), @@ -1927,9 +1926,7 @@ pub(crate) async fn blocks_with_triggers( )), }, ) - .collect() - .compat() - .await?; + .collect::>()?; // Filter out call triggers that come from unsuccessful transactions let futures = blocks.into_iter().map(|block| { From 7dd404d685edde8c77b28e0f1d9e1461c8dcae93 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Sat, 19 Apr 2025 22:27:28 -0700 Subject: [PATCH 144/160] chain: Modernize EthereumAdapter.latest_block_header --- chain/ethereum/src/adapter.rs | 4 +- chain/ethereum/src/ethereum_adapter.rs | 51 ++++++++++++-------------- chain/ethereum/src/ingestor.rs | 1 - 3 files changed, 25 insertions(+), 31 deletions(-) diff --git a/chain/ethereum/src/adapter.rs b/chain/ethereum/src/adapter.rs index 8421bb0422e..4e0ccfe32fb 100644 --- a/chain/ethereum/src/adapter.rs +++ b/chain/ethereum/src/adapter.rs @@ -1088,10 +1088,10 @@ pub trait EthereumAdapter: Send + Sync + 'static { ) -> Box + Send + Unpin>; /// Get the latest block, with only the header and transaction hashes. - fn latest_block_header( + async fn latest_block_header( &self, logger: &Logger, - ) -> Box, Error = bc::IngestorError> + Send>; + ) -> Result, bc::IngestorError>; fn load_block( &self, diff --git a/chain/ethereum/src/ethereum_adapter.rs b/chain/ethereum/src/ethereum_adapter.rs index 713177abfef..3ac0f4bd77b 100644 --- a/chain/ethereum/src/ethereum_adapter.rs +++ b/chain/ethereum/src/ethereum_adapter.rs @@ -1258,39 +1258,34 @@ impl EthereumAdapterTrait for EthereumAdapter { Ok(ident) } - fn latest_block_header( + async fn latest_block_header( &self, logger: &Logger, - ) -> Box, Error = IngestorError> + Send> { + ) -> Result, IngestorError> { let web3 = self.web3.clone(); - Box::new( - retry("eth_getBlockByNumber(latest) no txs RPC call", logger) - .redact_log_urls(true) - .no_limit() - .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) - .run(move || { - let web3 = web3.cheap_clone(); - async move { - let block_opt = web3 - .eth() - .block(Web3BlockNumber::Latest.into()) - .await - .map_err(|e| { - anyhow!("could not get latest block from Ethereum: {}", e) - })?; + retry("eth_getBlockByNumber(latest) no txs RPC call", logger) + .redact_log_urls(true) + .no_limit() + .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) + .run(move || { + let web3 = web3.cheap_clone(); + async move { + let block_opt = web3 + .eth() + .block(Web3BlockNumber::Latest.into()) + .await + .map_err(|e| anyhow!("could not get latest block from Ethereum: {}", e))?; - block_opt - .ok_or_else(|| anyhow!("no latest block returned from Ethereum").into()) - } - }) - .map_err(move |e| { - e.into_inner().unwrap_or_else(move || { - anyhow!("Ethereum node took too long to return latest block").into() - }) + block_opt + .ok_or_else(|| anyhow!("no latest block returned from Ethereum").into()) + } + }) + .map_err(move |e| { + e.into_inner().unwrap_or_else(move || { + anyhow!("Ethereum node took too long to return latest block").into() }) - .boxed() - .compat(), - ) + }) + .await } fn latest_block( diff --git a/chain/ethereum/src/ingestor.rs b/chain/ethereum/src/ingestor.rs index e0fc8c5becd..fdbbac0d3a7 100644 --- a/chain/ethereum/src/ingestor.rs +++ b/chain/ethereum/src/ingestor.rs @@ -210,7 +210,6 @@ impl PollingBlockIngestor { ) -> Result { eth_adapter .latest_block_header(&logger) - .compat() .await .map(|block| block.into()) } From 20139ab43cac22f029a3078ced3bfba82c970f0b Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Sat, 19 Apr 2025 22:29:49 -0700 Subject: [PATCH 145/160] chain: Modernize EthereumAdapter.latest_block --- chain/ethereum/src/adapter.rs | 6 +-- chain/ethereum/src/ethereum_adapter.rs | 52 +++++++++++--------------- 2 files changed, 23 insertions(+), 35 deletions(-) diff --git a/chain/ethereum/src/adapter.rs b/chain/ethereum/src/adapter.rs index 4e0ccfe32fb..55d1e41d3f2 100644 --- a/chain/ethereum/src/adapter.rs +++ b/chain/ethereum/src/adapter.rs @@ -17,7 +17,6 @@ use prost_types::Any; use std::cmp; use std::collections::{HashMap, HashSet}; use std::fmt; -use std::marker::Unpin; use thiserror::Error; use tiny_keccak::keccak256; use web3::types::{Address, Log, H256}; @@ -1082,10 +1081,7 @@ pub trait EthereumAdapter: Send + Sync + 'static { async fn net_identifiers(&self) -> Result; /// Get the latest block, including full transactions. - fn latest_block( - &self, - logger: &Logger, - ) -> Box + Send + Unpin>; + async fn latest_block(&self, logger: &Logger) -> Result; /// Get the latest block, with only the header and transaction hashes. async fn latest_block_header( diff --git a/chain/ethereum/src/ethereum_adapter.rs b/chain/ethereum/src/ethereum_adapter.rs index 3ac0f4bd77b..29ced61b7fb 100644 --- a/chain/ethereum/src/ethereum_adapter.rs +++ b/chain/ethereum/src/ethereum_adapter.rs @@ -1288,38 +1288,30 @@ impl EthereumAdapterTrait for EthereumAdapter { .await } - fn latest_block( - &self, - logger: &Logger, - ) -> Box + Send + Unpin> { + async fn latest_block(&self, logger: &Logger) -> Result { let web3 = self.web3.clone(); - Box::new( - retry("eth_getBlockByNumber(latest) with txs RPC call", logger) - .redact_log_urls(true) - .no_limit() - .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) - .run(move || { - let web3 = web3.cheap_clone(); - async move { - let block_opt = web3 - .eth() - .block_with_txs(Web3BlockNumber::Latest.into()) - .await - .map_err(|e| { - anyhow!("could not get latest block from Ethereum: {}", e) - })?; - block_opt - .ok_or_else(|| anyhow!("no latest block returned from Ethereum").into()) - } - }) - .map_err(move |e| { - e.into_inner().unwrap_or_else(move || { - anyhow!("Ethereum node took too long to return latest block").into() - }) + retry("eth_getBlockByNumber(latest) with txs RPC call", logger) + .redact_log_urls(true) + .no_limit() + .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) + .run(move || { + let web3 = web3.cheap_clone(); + async move { + let block_opt = web3 + .eth() + .block_with_txs(Web3BlockNumber::Latest.into()) + .await + .map_err(|e| anyhow!("could not get latest block from Ethereum: {}", e))?; + block_opt + .ok_or_else(|| anyhow!("no latest block returned from Ethereum").into()) + } + }) + .map_err(move |e| { + e.into_inner().unwrap_or_else(move || { + anyhow!("Ethereum node took too long to return latest block").into() }) - .boxed() - .compat(), - ) + }) + .await } fn load_block( From 6d6b754b7a4d2e50c2993a7f935c3276d6595bb8 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Sat, 19 Apr 2025 22:33:42 -0700 Subject: [PATCH 146/160] chain: Modernize EthereumAdapter.load_block, block_by_hash and by_number --- chain/ethereum/src/adapter.rs | 12 +-- chain/ethereum/src/ethereum_adapter.rs | 113 +++++++++++----------- chain/ethereum/src/ingestor.rs | 2 - node/src/manager/commands/chain.rs | 2 - node/src/manager/commands/check_blocks.rs | 2 - 5 files changed, 60 insertions(+), 71 deletions(-) diff --git a/chain/ethereum/src/adapter.rs b/chain/ethereum/src/adapter.rs index 55d1e41d3f2..bc489334ba6 100644 --- a/chain/ethereum/src/adapter.rs +++ b/chain/ethereum/src/adapter.rs @@ -1089,11 +1089,11 @@ pub trait EthereumAdapter: Send + Sync + 'static { logger: &Logger, ) -> Result, bc::IngestorError>; - fn load_block( + async fn load_block( &self, logger: &Logger, block_hash: H256, - ) -> Box + Send>; + ) -> Result; /// Load Ethereum blocks in bulk, returning results as they come back as a Stream. /// May use the `chain_store` as a cache. @@ -1105,17 +1105,17 @@ pub trait EthereumAdapter: Send + Sync + 'static { ) -> Result>, Error>; /// Find a block by its hash. - fn block_by_hash( + async fn block_by_hash( &self, logger: &Logger, block_hash: H256, - ) -> Box, Error = Error> + Send>; + ) -> Result, Error>; - fn block_by_number( + async fn block_by_number( &self, logger: &Logger, block_number: BlockNumber, - ) -> Box, Error = Error> + Send>; + ) -> Result, Error>; /// Load full information for the specified `block` (in particular, transaction receipts). fn load_full_block( diff --git a/chain/ethereum/src/ethereum_adapter.rs b/chain/ethereum/src/ethereum_adapter.rs index 29ced61b7fb..2c2ce691c51 100644 --- a/chain/ethereum/src/ethereum_adapter.rs +++ b/chain/ethereum/src/ethereum_adapter.rs @@ -1314,92 +1314,87 @@ impl EthereumAdapterTrait for EthereumAdapter { .await } - fn load_block( + async fn load_block( &self, logger: &Logger, block_hash: H256, - ) -> Box + Send> { - Box::new( - self.block_by_hash(logger, block_hash) - .and_then(move |block_opt| { - block_opt.ok_or_else(move || { - anyhow!( - "Ethereum node could not find block with hash {}", - block_hash - ) - }) - }), - ) + ) -> Result { + self.block_by_hash(logger, block_hash) + .await? + .ok_or_else(move || { + anyhow!( + "Ethereum node could not find block with hash {}", + block_hash + ) + }) } - fn block_by_hash( + async fn block_by_hash( &self, logger: &Logger, block_hash: H256, - ) -> Box, Error = Error> + Send> { + ) -> Result, Error> { let web3 = self.web3.clone(); let logger = logger.clone(); let retry_log_message = format!( "eth_getBlockByHash RPC call for block hash {:?}", block_hash ); - Box::new( - retry(retry_log_message, &logger) - .redact_log_urls(true) - .limit(ENV_VARS.request_retries) - .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) - .run(move || { - Box::pin(web3.eth().block_with_txs(BlockId::Hash(block_hash))) - .compat() - .from_err() - .compat() - }) - .map_err(move |e| { - e.into_inner().unwrap_or_else(move || { - anyhow!("Ethereum node took too long to return block {}", block_hash) - }) + + retry(retry_log_message, &logger) + .redact_log_urls(true) + .limit(ENV_VARS.request_retries) + .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) + .run(move || { + let web3 = web3.cheap_clone(); + async move { + web3.eth() + .block_with_txs(BlockId::Hash(block_hash)) + .await + .map_err(Error::from) + } + }) + .map_err(move |e| { + e.into_inner().unwrap_or_else(move || { + anyhow!("Ethereum node took too long to return block {}", block_hash) }) - .boxed() - .compat(), - ) + }) + .await } - fn block_by_number( + async fn block_by_number( &self, logger: &Logger, block_number: BlockNumber, - ) -> Box, Error = Error> + Send> { + ) -> Result, Error> { let web3 = self.web3.clone(); let logger = logger.clone(); let retry_log_message = format!( "eth_getBlockByNumber RPC call for block number {}", block_number ); - Box::new( - retry(retry_log_message, &logger) - .redact_log_urls(true) - .no_limit() - .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) - .run(move || { - let web3 = web3.cheap_clone(); - async move { - web3.eth() - .block_with_txs(BlockId::Number(block_number.into())) - .await - .map_err(Error::from) - } - }) - .map_err(move |e| { - e.into_inner().unwrap_or_else(move || { - anyhow!( - "Ethereum node took too long to return block {}", - block_number - ) - }) + retry(retry_log_message, &logger) + .redact_log_urls(true) + .no_limit() + .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) + .run(move || { + let web3 = web3.cheap_clone(); + async move { + web3.eth() + .block_with_txs(BlockId::Number(block_number.into())) + .await + .map_err(Error::from) + } + }) + .map_err(move |e| { + e.into_inner().unwrap_or_else(move || { + anyhow!( + "Ethereum node took too long to return block {}", + block_number + ) }) - .boxed() - .compat(), - ) + }) + .await } fn load_full_block( diff --git a/chain/ethereum/src/ingestor.rs b/chain/ethereum/src/ingestor.rs index fdbbac0d3a7..935cb525936 100644 --- a/chain/ethereum/src/ingestor.rs +++ b/chain/ethereum/src/ingestor.rs @@ -3,7 +3,6 @@ use crate::{EthereumAdapter, EthereumAdapterTrait as _}; use graph::blockchain::client::ChainClient; use graph::blockchain::BlockchainKind; use graph::components::network_provider::ChainName; -use graph::futures03::compat::Future01CompatExt as _; use graph::slog::o; use graph::util::backoff::ExponentialBackoff; use graph::{ @@ -175,7 +174,6 @@ impl PollingBlockIngestor { // Get the fully populated block let block = eth_adapter .block_by_hash(logger, block_hash) - .compat() .await? .ok_or(IngestorError::BlockUnavailable(block_hash))?; let ethereum_block = eth_adapter.load_full_block(&logger, block).await?; diff --git a/node/src/manager/commands/chain.rs b/node/src/manager/commands/chain.rs index 2c07c3d37b8..e1f460a7581 100644 --- a/node/src/manager/commands/chain.rs +++ b/node/src/manager/commands/chain.rs @@ -10,7 +10,6 @@ use graph::cheap_clone::CheapClone; use graph::components::network_provider::ChainIdentifierStore; use graph::components::network_provider::ChainName; use graph::components::store::StoreError; -use graph::futures03::compat::Future01CompatExt as _; use graph::prelude::BlockNumber; use graph::prelude::ChainStore as _; use graph::prelude::LightEthereumBlockExt; @@ -273,7 +272,6 @@ pub async fn ingest( ) -> Result<(), Error> { let Some(block) = ethereum_adapter .block_by_number(logger, number) - .compat() .await .map_err(|e| anyhow!("error getting block number {number}: {}", e))? else { diff --git a/node/src/manager/commands/check_blocks.rs b/node/src/manager/commands/check_blocks.rs index 6a82c67c3e6..0afa54bd7d3 100644 --- a/node/src/manager/commands/check_blocks.rs +++ b/node/src/manager/commands/check_blocks.rs @@ -153,7 +153,6 @@ async fn handle_multiple_block_hashes( mod steps { use super::*; - use graph::futures03::compat::Future01CompatExt; use graph::{ anyhow::bail, prelude::serde_json::{self, Value}, @@ -204,7 +203,6 @@ mod steps { ) -> anyhow::Result { let provider_block = ethereum_adapter .block_by_hash(logger, *block_hash) - .compat() .await .with_context(|| format!("failed to fetch block {block_hash}"))? .ok_or_else(|| anyhow!("JRPC provider found no block with hash {block_hash:?}"))?; From 470244400613179bfadd6be1d7622f9dc13c5304 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Sat, 19 Apr 2025 22:39:43 -0700 Subject: [PATCH 147/160] chain: Modernize EthereumAdapter.load_full_block --- chain/ethereum/src/adapter.rs | 6 ++-- chain/ethereum/src/ethereum_adapter.rs | 44 +++++++++++--------------- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/chain/ethereum/src/adapter.rs b/chain/ethereum/src/adapter.rs index bc489334ba6..cd174926066 100644 --- a/chain/ethereum/src/adapter.rs +++ b/chain/ethereum/src/adapter.rs @@ -1118,13 +1118,11 @@ pub trait EthereumAdapter: Send + Sync + 'static { ) -> Result, Error>; /// Load full information for the specified `block` (in particular, transaction receipts). - fn load_full_block( + async fn load_full_block( &self, logger: &Logger, block: LightEthereumBlock, - ) -> Pin< - Box> + Send + '_>, - >; + ) -> Result; /// Find a block by its number, according to the Ethereum node. /// diff --git a/chain/ethereum/src/ethereum_adapter.rs b/chain/ethereum/src/ethereum_adapter.rs index 2c2ce691c51..716879c83c9 100644 --- a/chain/ethereum/src/ethereum_adapter.rs +++ b/chain/ethereum/src/ethereum_adapter.rs @@ -1397,12 +1397,11 @@ impl EthereumAdapterTrait for EthereumAdapter { .await } - fn load_full_block( + async fn load_full_block( &self, logger: &Logger, block: LightEthereumBlock, - ) -> Pin> + Send + '_>> - { + ) -> Result { let web3 = Arc::clone(&self.web3); let logger = logger.clone(); let block_hash = block.hash.expect("block is missing block hash"); @@ -1411,36 +1410,29 @@ impl EthereumAdapterTrait for EthereumAdapter { // request an empty batch which is not valid in JSON-RPC. if block.transactions.is_empty() { trace!(logger, "Block {} contains no transactions", block_hash); - return Box::pin(std::future::ready(Ok(EthereumBlock { + return Ok(EthereumBlock { block: Arc::new(block), transaction_receipts: Vec::new(), - }))); + }); } let hashes: Vec<_> = block.transactions.iter().map(|txn| txn.hash).collect(); - let supports_block_receipts_future = self.check_block_receipt_support_and_update_cache( - web3.clone(), - block_hash, - self.supports_eip_1898, - self.call_only, - logger.clone(), - ); + let supports_block_receipts = self + .check_block_receipt_support_and_update_cache( + web3.clone(), + block_hash, + self.supports_eip_1898, + self.call_only, + logger.clone(), + ) + .await; - let receipts_future = supports_block_receipts_future - .then(move |supports_block_receipts| { - fetch_receipts_with_retry(web3, hashes, block_hash, logger, supports_block_receipts) + fetch_receipts_with_retry(web3, hashes, block_hash, logger, supports_block_receipts) + .await + .map(|transaction_receipts| EthereumBlock { + block: Arc::new(block), + transaction_receipts, }) - .boxed(); - - let block_future = - futures03::TryFutureExt::map_ok(receipts_future, move |transaction_receipts| { - EthereumBlock { - block: Arc::new(block), - transaction_receipts, - } - }); - - Box::pin(block_future) } fn block_hash_by_block_number( From 15601633c8792b244cbc944cb23684dc0073f873 Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Sat, 19 Apr 2025 22:41:09 -0700 Subject: [PATCH 148/160] chain: Modernize EthereumAdapter.block_hash_by_block_number --- chain/ethereum/src/adapter.rs | 4 +- chain/ethereum/src/ethereum_adapter.rs | 51 ++++++++++++-------------- 2 files changed, 26 insertions(+), 29 deletions(-) diff --git a/chain/ethereum/src/adapter.rs b/chain/ethereum/src/adapter.rs index cd174926066..cab6682ce40 100644 --- a/chain/ethereum/src/adapter.rs +++ b/chain/ethereum/src/adapter.rs @@ -1133,11 +1133,11 @@ pub trait EthereumAdapter: Send + Sync + 'static { /// those confirmations. /// If the Ethereum node is far behind in processing blocks, even old blocks can be subject to /// reorgs. - fn block_hash_by_block_number( + async fn block_hash_by_block_number( &self, logger: &Logger, block_number: BlockNumber, - ) -> Box, Error = Error> + Send>; + ) -> Result, Error>; /// Finds the hash and number of the lowest non-null block with height greater than or equal to /// the given number. diff --git a/chain/ethereum/src/ethereum_adapter.rs b/chain/ethereum/src/ethereum_adapter.rs index 716879c83c9..02b7efe7f11 100644 --- a/chain/ethereum/src/ethereum_adapter.rs +++ b/chain/ethereum/src/ethereum_adapter.rs @@ -1435,42 +1435,39 @@ impl EthereumAdapterTrait for EthereumAdapter { }) } - fn block_hash_by_block_number( + async fn block_hash_by_block_number( &self, logger: &Logger, block_number: BlockNumber, - ) -> Box, Error = Error> + Send> { + ) -> Result, Error> { let web3 = self.web3.clone(); let retry_log_message = format!( "eth_getBlockByNumber RPC call for block number {}", block_number ); - Box::new( - retry(retry_log_message, logger) - .redact_log_urls(true) - .no_limit() - .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) - .run(move || { - let web3 = web3.cheap_clone(); - async move { - web3.eth() - .block(BlockId::Number(block_number.into())) - .await - .map(|block_opt| block_opt.and_then(|block| block.hash)) - .map_err(Error::from) - } + retry(retry_log_message, logger) + .redact_log_urls(true) + .no_limit() + .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) + .run(move || { + let web3 = web3.cheap_clone(); + async move { + web3.eth() + .block(BlockId::Number(block_number.into())) + .await + .map(|block_opt| block_opt.and_then(|block| block.hash)) + .map_err(Error::from) + } + }) + .await + .map_err(move |e| { + e.into_inner().unwrap_or_else(move || { + anyhow!( + "Ethereum node took too long to return data for block #{}", + block_number + ) }) - .boxed() - .compat() - .map_err(move |e| { - e.into_inner().unwrap_or_else(move || { - anyhow!( - "Ethereum node took too long to return data for block #{}", - block_number - ) - }) - }), - ) + }) } fn get_balance( From 45dda04d4ae18231439a995252ca3e8b926c750d Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Sat, 19 Apr 2025 22:44:53 -0700 Subject: [PATCH 149/160] chain: Modernize EthereumAdapter.get_balance and get_code --- chain/ethereum/src/adapter.rs | 9 +++---- chain/ethereum/src/ethereum_adapter.rs | 26 +++++++++---------- chain/ethereum/src/runtime/runtime_adapter.rs | 15 +++-------- 3 files changed, 19 insertions(+), 31 deletions(-) diff --git a/chain/ethereum/src/adapter.rs b/chain/ethereum/src/adapter.rs index cab6682ce40..93a7fc60781 100644 --- a/chain/ethereum/src/adapter.rs +++ b/chain/ethereum/src/adapter.rs @@ -7,7 +7,6 @@ use graph::data_source::common::ContractCall; use graph::firehose::CallToFilter; use graph::firehose::CombinedFilter; use graph::firehose::LogFilter; -use graph::futures01::Future; use graph::prelude::web3::types::Bytes; use graph::prelude::web3::types::H160; use graph::prelude::web3::types::U256; @@ -1170,20 +1169,20 @@ pub trait EthereumAdapter: Send + Sync + 'static { cache: Arc, ) -> Result>, call::Source)>, ContractCallError>; - fn get_balance( + async fn get_balance( &self, logger: &Logger, address: H160, block_ptr: BlockPtr, - ) -> Box + Send>; + ) -> Result; // Returns the compiled bytecode of a smart contract - fn get_code( + async fn get_code( &self, logger: &Logger, address: H160, block_ptr: BlockPtr, - ) -> Box + Send>; + ) -> Result; } #[cfg(test)] diff --git a/chain/ethereum/src/ethereum_adapter.rs b/chain/ethereum/src/ethereum_adapter.rs index 02b7efe7f11..1c1d214f6a5 100644 --- a/chain/ethereum/src/ethereum_adapter.rs +++ b/chain/ethereum/src/ethereum_adapter.rs @@ -500,12 +500,12 @@ impl EthereumAdapter { } } - fn code( + async fn code( &self, logger: &Logger, address: Address, block_ptr: BlockPtr, - ) -> impl Future + Send { + ) -> Result { let web3 = self.web3.clone(); let logger = Logger::new(&logger, o!("provider" => self.provider.clone())); @@ -531,17 +531,16 @@ impl EthereumAdapter { } } }) + .await .map_err(|e| e.into_inner().unwrap_or(EthereumRpcError::Timeout)) - .boxed() - .compat() } - fn balance( + async fn balance( &self, logger: &Logger, address: Address, block_ptr: BlockPtr, - ) -> impl Future + Send { + ) -> Result { let web3 = self.web3.clone(); let logger = Logger::new(&logger, o!("provider" => self.provider.clone())); @@ -567,9 +566,8 @@ impl EthereumAdapter { } } }) + .await .map_err(|e| e.into_inner().unwrap_or(EthereumRpcError::Timeout)) - .boxed() - .compat() } async fn call( @@ -1470,32 +1468,32 @@ impl EthereumAdapterTrait for EthereumAdapter { }) } - fn get_balance( + async fn get_balance( &self, logger: &Logger, address: H160, block_ptr: BlockPtr, - ) -> Box + Send> { + ) -> Result { debug!( logger, "eth_getBalance"; "address" => format!("{}", address), "block" => format!("{}", block_ptr) ); - Box::new(self.balance(logger, address, block_ptr)) + self.balance(logger, address, block_ptr).await } - fn get_code( + async fn get_code( &self, logger: &Logger, address: H160, block_ptr: BlockPtr, - ) -> Box + Send> { + ) -> Result { debug!( logger, "eth_getCode"; "address" => format!("{}", address), "block" => format!("{}", block_ptr) ); - Box::new(self.code(logger, address, block_ptr)) + self.code(logger, address, block_ptr).await } async fn next_existing_ptr_to_number( diff --git a/chain/ethereum/src/runtime/runtime_adapter.rs b/chain/ethereum/src/runtime/runtime_adapter.rs index 01f148bdd4c..951958d786b 100644 --- a/chain/ethereum/src/runtime/runtime_adapter.rs +++ b/chain/ethereum/src/runtime/runtime_adapter.rs @@ -14,7 +14,6 @@ use graph::data::store::scalar::BigInt; use graph::data::subgraph::API_VERSION_0_0_9; use graph::data_source; use graph::data_source::common::{ContractCall, MappingABI}; -use graph::futures03::compat::Future01CompatExt; use graph::prelude::web3::types::H160; use graph::runtime::gas::Gas; use graph::runtime::{AscIndexId, IndexForAscTypeId}; @@ -227,11 +226,7 @@ fn eth_get_balance( let address: H160 = asc_get(ctx.heap, wasm_ptr.into(), &ctx.gas, 0)?; - let result = graph::block_on( - eth_adapter - .get_balance(logger, address, block_ptr.clone()) - .compat(), - ); + let result = graph::block_on(eth_adapter.get_balance(logger, address, block_ptr.clone())); match result { Ok(v) => { @@ -265,12 +260,8 @@ fn eth_has_code( let address: H160 = asc_get(ctx.heap, wasm_ptr.into(), &ctx.gas, 0)?; - let result = graph::block_on( - eth_adapter - .get_code(logger, address, block_ptr.clone()) - .compat(), - ) - .map(|v| !v.0.is_empty()); + let result = graph::block_on(eth_adapter.get_code(logger, address, block_ptr.clone())) + .map(|v| !v.0.is_empty()); match result { Ok(v) => Ok(asc_new(ctx.heap, &AscWrapped { inner: v }, &ctx.gas)?), From 72834fdfb1519d794025788ff2235102d2c6a40d Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Wed, 23 Apr 2025 11:01:14 -0700 Subject: [PATCH 150/160] graph, store: Make sure vid batching works with large vids Changing to the new vid scheme of `block_num << 32 + sequence_num` revealed some numerical problems in the batching code. --- graph/src/util/ogive.rs | 50 +++++++++++---- store/postgres/src/vid_batcher.rs | 101 +++++++++++++++++++++++++++++- 2 files changed, 137 insertions(+), 14 deletions(-) diff --git a/graph/src/util/ogive.rs b/graph/src/util/ogive.rs index 38300e088e6..29938b03b17 100644 --- a/graph/src/util/ogive.rs +++ b/graph/src/util/ogive.rs @@ -19,7 +19,7 @@ use crate::{internal_error, prelude::StoreError}; /// more fun to say. pub struct Ogive { /// The breakpoints of the piecewise linear function - points: Vec, + points: Vec, /// The size of each bin; the linear piece from `points[i]` to /// `points[i+1]` rises by this much bin_size: f64, @@ -46,7 +46,6 @@ impl Ogive { let bins = points.len() - 1; let bin_size = total as f64 / bins as f64; let range = points[0]..=points[bins]; - let points = points.into_iter().map(|p| p as f64).collect(); Ok(Self { points, bin_size, @@ -90,7 +89,6 @@ impl Ogive { fn interval_start(&self, point: i64) -> Result { self.check_in_range(point)?; - let point = point as f64; let idx = self .points .iter() @@ -102,16 +100,22 @@ impl Ogive { /// Return the value of the ogive at `point`, i.e., `f(point)`. It is an /// error if `point` is outside the range of points of this ogive. + /// + /// If `i` is such that + /// `points[i] <= point < points[i+1]`, then + /// ```text + /// f(point) = i * bin_size + (point - points[i]) / (points[i+1] - points[i]) * bin_size + /// ``` + // See the comment on `inverse` for numerical considerations fn value(&self, point: i64) -> Result { if self.points.len() == 1 { return Ok(*self.range.end()); } let idx = self.interval_start(point)?; - let bin_size = self.bin_size as f64; let (a, b) = (self.points[idx], self.points[idx + 1]); - let point = point as f64; - let value = (idx as f64 + (point - a) / (b - a)) * bin_size; + let offset = (point - a) as f64 / (b - a) as f64; + let value = (idx as f64 + offset) * self.bin_size; Ok(value as i64) } @@ -119,18 +123,38 @@ impl Ogive { /// It is an error if `value` is negative. If `value` is greater than /// the total count of the ogive, the maximum point of the ogive is /// returned. + /// + /// For `points[j] <= v < points[j+1]`, the value of `g(v)` is + /// ```text + /// g(v) = (1-lambda)*points[j] + lambda * points[j+1] + /// ``` + /// where `lambda = (v - j * bin_size) / bin_size` + /// + // Note that in the definition of `lambda`, the numerator is + // `v.rem_euclid(bin_size)` + // + // Numerical consideration: in these calculations, we need to be careful + // to never convert one of the points directly to f64 since they can be + // so large that the conversion from i64 to f64 loses precision. That + // loss of precision can cause the convex combination of `points[j]` and + // `points[j+1]` above to lie outside of that interval when `(points[j] + // as f64) as i64 < points[j]` + // + // We therefore try to only convert differences between points to f64 + // which are much smaller. fn inverse(&self, value: i64) -> Result { - let value = value as f64; - if value < 0.0 { + if value < 0 { return Err(internal_error!("value {} can not be negative", value)); } - let idx = (value / self.bin_size) as usize; - if idx >= self.points.len() - 1 { + let j = (value / self.bin_size as i64) as usize; + if j >= self.points.len() - 1 { return Ok(*self.range.end()); } - let (a, b) = (self.points[idx] as f64, self.points[idx + 1] as f64); - let lambda = (value - idx as f64 * self.bin_size) / self.bin_size; - let x = (1.0 - lambda) * a + lambda * b; + let (a, b) = (self.points[j], self.points[j + 1]); + // This is the same calculation as in the comment above, but + // rewritten to be more friendly to lossy calculations with f64 + let offset = (value as f64).rem_euclid(self.bin_size) * (b - a) as f64; + let x = a + (offset / self.bin_size) as i64; Ok(x as i64) } diff --git a/store/postgres/src/vid_batcher.rs b/store/postgres/src/vid_batcher.rs index 93197b5a85d..c1e69ebe017 100644 --- a/store/postgres/src/vid_batcher.rs +++ b/store/postgres/src/vid_batcher.rs @@ -244,7 +244,7 @@ impl VidBatcher { } } -#[derive(Copy, Clone, QueryableByName)] +#[derive(Debug, Copy, Clone, QueryableByName)] pub(crate) struct VidRange { #[diesel(sql_type = BigInt, column_name = "min_vid")] pub min: i64, @@ -470,4 +470,103 @@ mod tests { assert_eq!(1, ogive.start()); assert_eq!(100_000, ogive.end()); } + + #[test] + fn vid_batcher_handles_large_vid() { + // An example with very large `vid` values which come from the new + // schema of setting the `vid` to `block_num << 32 + sequence_num`. + // These values are taken from an actual example subgraph and cuased + // errors because of numerical roundoff issues + const MIN: i64 = 186155521970012263; + const MAX: i64 = 187989601854423140; + const BOUNDS: &[i64] = &[ + 186155521970012263, + 186155552034783334, + 186166744719556711, + 187571594162339943, + 187571628522078310, + 187576619274076263, + 187576649338847334, + 187580570643988583, + 187590242910339175, + 187590268680142950, + 187963647367053415, + 187970828552372324, + 187986749996138596, + 187989601854423140, + ]; + + // The start, end, and batch size we expect when we run through the + // `vid_batcher` we set up below with `MIN`, `MAX` and `BOUNDS` + const STEPS: &[(i64, i64, i64)] = &[ + (186155521970012263, 186155521970012265, 2), + (186155521970012266, 186155521970012269, 3), + (186155521970012270, 186155521970012276, 6), + (186155521970012277, 186155521970012289, 12), + (186155521970012290, 186155521970012312, 22), + (186155521970012313, 186155521970012353, 40), + (186155521970012354, 186155521970012426, 72), + (186155521970012427, 186155521970012557, 130), + (186155521970012558, 186155521970012792, 234), + (186155521970012793, 186155521970013215, 422), + (186155521970013216, 186155521970013976, 760), + (186155521970013977, 186155521970015346, 1369), + (186155521970015347, 186155521970017812, 2465), + (186155521970017813, 186155521970022250, 4437), + (186155521970022251, 186155521970030238, 7987), + (186155521970030239, 186155521970044616, 14377), + (186155521970044617, 186155521970070495, 25878), + (186155521970070496, 186155521970117077, 46581), + (186155521970117078, 186155521970200925, 83847), + (186155521970200926, 186155521970351851, 150925), + (186155521970351852, 186155521970623517, 271665), + (186155521970623518, 186155521971112515, 488997), + (186155521971112516, 186155521971992710, 880194), + (186155521971992711, 186155521973577061, 1584350), + (186155521973577062, 186155521976428893, 2851831), + (186155521976428894, 186155521981562190, 5133296), + (186155521981562191, 186155521990802124, 9239933), + (186155521990802125, 186155522007434004, 16631879), + (186155522007434005, 186155522037371388, 29937383), + (186155522037371389, 186155522091258678, 53887289), + (186155522091258679, 186155522188255800, 96997121), + (186155522188255801, 186155522362850619, 174594818), + (186155522362850620, 186155522677121292, 314270672), + (186155522677121293, 186155523242808503, 565687210), + (186155523242808504, 186155524261045483, 1018236979), + (186155524261045484, 186155526093872046, 1832826562), + (186155526093872047, 186155529392959859, 3299087812), + (186155529392959860, 186155535331317922, 5938358062), + (186155535331317923, 186155546020362436, 10689044513), + (186155546020362437, 186160475833232786, 4929812870349), + (186160475833232787, 186998193536485260, 837717703252473), + (186998193536485261, 187574948946679478, 576755410194217), + (187574948946679479, 187590253155585376, 15304208905897), + (187590253155585377, 187989601854423140, 399348698837763), + ]; + + let vid_range = VidRange::new(MIN, MAX); + let batch_size = AdaptiveBatchSize { + size: 10000, + target: Duration::from_secs(180), + }; + + let mut vid_batcher = VidBatcher::new(BOUNDS.to_vec(), vid_range, batch_size).unwrap(); + vid_batcher.step_timer.set(Duration::from_secs(100)); + + // Run through the entire `vid_batcher`, collecting start and end in + // `steps` + let steps = std::iter::from_fn(|| { + vid_batcher + .step(|start, end| Ok((start, end, end - start))) + .unwrap() + .1 + }) + .fold(Vec::new(), |mut steps, (start, end, step)| { + steps.push((start, end, step)); + steps + }); + + assert_eq!(STEPS, &steps); + } } From 08c4130d3eff98e048f6f4998b3faf1914a73dfe Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Mon, 21 Apr 2025 11:08:22 -0700 Subject: [PATCH 151/160] node: 'graphman copy status': Use bigger columns for next and target The vids for huge subgraphs can go into the billions; this change makes the columns big enough for that Also expand the duration column a little for very long running durations --- node/src/manager/commands/copy.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/node/src/manager/commands/copy.rs b/node/src/manager/commands/copy.rs index c09630ae261..57f207b5b98 100644 --- a/node/src/manager/commands/copy.rs +++ b/node/src/manager/commands/copy.rs @@ -336,10 +336,10 @@ pub fn status(pools: HashMap, dst: &DeploymentSearch) -> println!(); println!( - "{:^30} | {:^8} | {:^8} | {:^8} | {:^8}", + "{:^30} | {:^10} | {:^10} | {:^8} | {:^10}", "entity type", "next", "target", "batch", "duration" ); - println!("{:-<74}", "-"); + println!("{:-<80}", "-"); for table in tables { let status = match &table.finished_at { // table finished @@ -352,7 +352,7 @@ pub fn status(pools: HashMap, dst: &DeploymentSearch) -> None => ".", }; println!( - "{} {:<28} | {:>8} | {:>8} | {:>8} | {:>8}", + "{} {:<28} | {:>10} | {:>10} | {:>8} | {:>10}", status, table.entity_type, table.next_vid, From 87933b295dca02384bd531fb3d17ef3b54c476d4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 05:03:52 +0100 Subject: [PATCH 152/160] build(deps): bump anyhow from 1.0.86 to 1.0.98 (#5966) Bumps [anyhow](https://github.com/dtolnay/anyhow) from 1.0.86 to 1.0.98. - [Release notes](https://github.com/dtolnay/anyhow/releases) - [Commits](https://github.com/dtolnay/anyhow/compare/1.0.86...1.0.98) --- updated-dependencies: - dependency-name: anyhow dependency-version: 1.0.98 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- store/postgres/Cargo.toml | 2 +- tests/Cargo.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index abf9368d3e4..cd30024ca1b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -129,9 +129,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] name = "arbitrary" diff --git a/store/postgres/Cargo.toml b/store/postgres/Cargo.toml index c95b3cb83a4..00bcc5f0e25 100644 --- a/store/postgres/Cargo.toml +++ b/store/postgres/Cargo.toml @@ -27,7 +27,7 @@ rand = "0.8.4" serde = { workspace = true } serde_json = { workspace = true } stable-hash_legacy = { git = "https://github.com/graphprotocol/stable-hash", branch = "old", package = "stable-hash" } -anyhow = "1.0.86" +anyhow = "1.0.98" git-testament = "0.2.6" itertools = "0.13.0" hex = "0.4.3" diff --git a/tests/Cargo.toml b/tests/Cargo.toml index 6f5e317fa8b..42d3aaf7d48 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -26,5 +26,5 @@ tokio = { version = "1.44.2", features = ["rt", "macros", "process"] } secp256k1 = { version = "0.21", features = ["recovery"] } [dev-dependencies] -anyhow = "1.0.86" +anyhow = "1.0.98" tokio-stream = "0.1" From e717caf168987d43c5d7f461447d02edbf56dfd8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 05:04:13 +0100 Subject: [PATCH 153/160] build(deps): bump tower-test from `39adf5c` to `abb375d` (#5965) Bumps [tower-test](https://github.com/tower-rs/tower) from `39adf5c` to `abb375d`. - [Release notes](https://github.com/tower-rs/tower/releases) - [Commits](https://github.com/tower-rs/tower/compare/39adf5c509a1b2141f679654d8317524ca96b58b...abb375d08cf0ba34c1fe76f66f1aba3dc4341013) --- updated-dependencies: - dependency-name: tower-test dependency-version: abb375d08cf0ba34c1fe76f66f1aba3dc4341013 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 102 ++++++++++++++++++++++++++--------------------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cd30024ca1b..be707f464de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -233,7 +233,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util 0.7.11", - "tower-service 0.3.3", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -361,9 +361,9 @@ dependencies = [ "rustversion", "serde", "sync_wrapper 1.0.1", - "tower 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)", - "tower-layer 0.3.3", - "tower-service 0.3.3", + "tower 0.4.13", + "tower-layer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -397,9 +397,9 @@ dependencies = [ "sync_wrapper 1.0.1", "tokio", "tokio-tungstenite", - "tower 0.5.2", - "tower-layer 0.3.3", - "tower-service 0.3.3", + "tower 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", + "tower-layer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "tracing", ] @@ -419,8 +419,8 @@ dependencies = [ "pin-project-lite", "rustversion", "sync_wrapper 0.1.2", - "tower-layer 0.3.3", - "tower-service 0.3.3", + "tower-layer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -438,8 +438,8 @@ dependencies = [ "pin-project-lite", "rustversion", "sync_wrapper 1.0.1", - "tower-layer 0.3.3", - "tower-service 0.3.3", + "tower-layer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "tracing", ] @@ -2036,7 +2036,7 @@ dependencies = [ "graph-chain-substreams", "graph-runtime-wasm", "serde_yaml", - "tower 0.4.13 (git+https://github.com/tower-rs/tower.git)", + "tower 0.5.2 (git+https://github.com/tower-rs/tower.git)", "tower-test", "wiremock", ] @@ -2583,7 +2583,7 @@ dependencies = [ "pin-project-lite", "socket2", "tokio", - "tower-service 0.3.3", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "tracing", "want", ] @@ -2624,7 +2624,7 @@ dependencies = [ "rustls-pki-types", "tokio", "tokio-rustls", - "tower-service 0.3.3", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2637,7 +2637,7 @@ dependencies = [ "hyper-util", "pin-project-lite", "tokio", - "tower-service 0.3.3", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2653,7 +2653,7 @@ dependencies = [ "native-tls", "tokio", "tokio-native-tls", - "tower-service 0.3.3", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2671,7 +2671,7 @@ dependencies = [ "pin-project-lite", "socket2", "tokio", - "tower-service 0.3.3", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "tracing", ] @@ -4317,8 +4317,8 @@ dependencies = [ "tokio-native-tls", "tokio-rustls", "tokio-util 0.7.11", - "tower 0.5.2", - "tower-service 0.3.3", + "tower 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "url", "wasm-bindgen", "wasm-bindgen-futures", @@ -5634,9 +5634,9 @@ dependencies = [ "tokio", "tokio-rustls", "tokio-stream", - "tower 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)", - "tower-layer 0.3.3", - "tower-service 0.3.3", + "tower 0.4.13", + "tower-layer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "tracing", ] @@ -5669,43 +5669,43 @@ dependencies = [ "slab", "tokio", "tokio-util 0.7.11", - "tower-layer 0.3.3", - "tower-service 0.3.3", + "tower-layer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "tracing", ] [[package]] name = "tower" -version = "0.4.13" -source = "git+https://github.com/tower-rs/tower.git#39adf5c509a1b2141f679654d8317524ca96b58b" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", - "hdrhistogram", - "indexmap 1.9.3", "pin-project-lite", - "slab", - "sync_wrapper 0.1.2", + "sync_wrapper 1.0.1", "tokio", - "tokio-util 0.7.11", - "tower-layer 0.3.2", - "tower-service 0.3.2", + "tower-layer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "tracing", ] [[package]] name = "tower" version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +source = "git+https://github.com/tower-rs/tower.git#abb375d08cf0ba34c1fe76f66f1aba3dc4341013" dependencies = [ "futures-core", "futures-util", + "hdrhistogram", + "indexmap 2.9.0", "pin-project-lite", + "slab", "sync_wrapper 1.0.1", "tokio", - "tower-layer 0.3.3", - "tower-service 0.3.3", + "tokio-util 0.7.11", + "tower-layer 0.3.3 (git+https://github.com/tower-rs/tower.git)", + "tower-service 0.3.3 (git+https://github.com/tower-rs/tower.git)", "tracing", ] @@ -5721,15 +5721,10 @@ dependencies = [ "http-body 1.0.0", "http-body-util", "pin-project-lite", - "tower-layer 0.3.3", - "tower-service 0.3.3", + "tower-layer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "tower-layer" -version = "0.3.2" -source = "git+https://github.com/tower-rs/tower.git#39adf5c509a1b2141f679654d8317524ca96b58b" - [[package]] name = "tower-layer" version = "0.3.3" @@ -5737,9 +5732,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" [[package]] -name = "tower-service" -version = "0.3.2" -source = "git+https://github.com/tower-rs/tower.git#39adf5c509a1b2141f679654d8317524ca96b58b" +name = "tower-layer" +version = "0.3.3" +source = "git+https://github.com/tower-rs/tower.git#abb375d08cf0ba34c1fe76f66f1aba3dc4341013" [[package]] name = "tower-service" @@ -5747,17 +5742,22 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" +[[package]] +name = "tower-service" +version = "0.3.3" +source = "git+https://github.com/tower-rs/tower.git#abb375d08cf0ba34c1fe76f66f1aba3dc4341013" + [[package]] name = "tower-test" -version = "0.4.0" -source = "git+https://github.com/tower-rs/tower.git#39adf5c509a1b2141f679654d8317524ca96b58b" +version = "0.4.1" +source = "git+https://github.com/tower-rs/tower.git#abb375d08cf0ba34c1fe76f66f1aba3dc4341013" dependencies = [ "futures-util", "pin-project-lite", "tokio", "tokio-test", - "tower-layer 0.3.2", - "tower-service 0.3.2", + "tower-layer 0.3.3 (git+https://github.com/tower-rs/tower.git)", + "tower-service 0.3.3 (git+https://github.com/tower-rs/tower.git)", ] [[package]] From 65cf0d97c54b60e08b4abf44ef501a145a64c6cd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 05:32:09 +0100 Subject: [PATCH 154/160] build(deps): bump blake3 from 1.6.1 to 1.8.2 (#5963) Bumps [blake3](https://github.com/BLAKE3-team/BLAKE3) from 1.6.1 to 1.8.2. - [Release notes](https://github.com/BLAKE3-team/BLAKE3/releases) - [Commits](https://github.com/BLAKE3-team/BLAKE3/compare/1.6.1...1.8.2) --- updated-dependencies: - dependency-name: blake3 dependency-version: 1.8.2 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 8 ++++---- server/index-node/Cargo.toml | 2 +- store/postgres/Cargo.toml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index be707f464de..87984502768 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -573,9 +573,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.6.1" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "675f87afced0413c9bb02843499dbbd3882a237645883f71a2b59644a6d2f753" +checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" dependencies = [ "arrayref", "arrayvec 0.7.4", @@ -2146,7 +2146,7 @@ dependencies = [ name = "graph-server-index-node" version = "0.36.0" dependencies = [ - "blake3 1.6.1", + "blake3 1.8.2", "git-testament", "graph", "graph-chain-arweave", @@ -2179,7 +2179,7 @@ dependencies = [ "Inflector", "anyhow", "async-trait", - "blake3 1.6.1", + "blake3 1.8.2", "chrono", "clap", "derive_more 2.0.1", diff --git a/server/index-node/Cargo.toml b/server/index-node/Cargo.toml index 63c68a311a8..f5274c75f40 100644 --- a/server/index-node/Cargo.toml +++ b/server/index-node/Cargo.toml @@ -4,7 +4,7 @@ version.workspace = true edition.workspace = true [dependencies] -blake3 = "1.6" +blake3 = "1.8" graph = { path = "../../graph" } graph-graphql = { path = "../../graphql" } graph-chain-arweave = { path = "../../chain/arweave" } diff --git a/store/postgres/Cargo.toml b/store/postgres/Cargo.toml index 00bcc5f0e25..13fbf14ba14 100644 --- a/store/postgres/Cargo.toml +++ b/store/postgres/Cargo.toml @@ -5,7 +5,7 @@ edition.workspace = true [dependencies] async-trait = "0.1.50" -blake3 = "1.6" +blake3 = "1.8" chrono = { workspace = true } derive_more = { version = "2.0.1", features = ["full"] } diesel = { workspace = true } From 8afd4607952e107fbe18c8b531972e74c34435b5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 05:32:37 +0100 Subject: [PATCH 155/160] build(deps): bump proc-macro2 from 1.0.94 to 1.0.95 (#5962) Bumps [proc-macro2](https://github.com/dtolnay/proc-macro2) from 1.0.94 to 1.0.95. - [Release notes](https://github.com/dtolnay/proc-macro2/releases) - [Commits](https://github.com/dtolnay/proc-macro2/compare/1.0.94...1.0.95) --- updated-dependencies: - dependency-name: proc-macro2 dependency-version: 1.0.95 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- graph/derive/Cargo.toml | 2 +- runtime/derive/Cargo.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 87984502768..139867f62a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3907,9 +3907,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.94" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] diff --git a/graph/derive/Cargo.toml b/graph/derive/Cargo.toml index f43691ba463..773fd059d43 100644 --- a/graph/derive/Cargo.toml +++ b/graph/derive/Cargo.toml @@ -14,7 +14,7 @@ proc-macro = true [dependencies] syn = { workspace = true } quote = "1.0" -proc-macro2 = "1.0.94" +proc-macro2 = "1.0.95" heck = "0.5" [dev-dependencies] diff --git a/runtime/derive/Cargo.toml b/runtime/derive/Cargo.toml index bc3f74ec9f6..d9b3a282995 100644 --- a/runtime/derive/Cargo.toml +++ b/runtime/derive/Cargo.toml @@ -9,5 +9,5 @@ proc-macro = true [dependencies] syn = { workspace = true } quote = "1.0" -proc-macro2 = "1.0.94" +proc-macro2 = "1.0.95" heck = "0.5" From 948a1ec39994495f4c7bfc7e9a50ba3818270d33 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 05:32:56 +0100 Subject: [PATCH 156/160] build(deps): bump postgres-openssl from 0.5.0 to 0.5.1 (#5961) Bumps [postgres-openssl](https://github.com/sfackler/rust-postgres) from 0.5.0 to 0.5.1. - [Release notes](https://github.com/sfackler/rust-postgres/releases) - [Commits](https://github.com/sfackler/rust-postgres/compare/postgres-openssl-v0.5.0...postgres-openssl-v0.5.1) --- updated-dependencies: - dependency-name: postgres-openssl dependency-version: 0.5.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 23 +++++++++++------------ store/postgres/Cargo.toml | 2 +- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 139867f62a5..a3fa9422cc2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3780,11 +3780,10 @@ dependencies = [ [[package]] name = "postgres-openssl" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1de0ea6504e07ca78355a6fb88ad0f36cafe9e696cbc6717f16a207f3a60be72" +checksum = "fb14e4bbc2c0b3d165bf30b79c7a9c10412dff9d98491ffdd64ed810ab891d21" dependencies = [ - "futures 0.3.30", "openssl", "tokio", "tokio-openssl", @@ -3793,27 +3792,27 @@ dependencies = [ [[package]] name = "postgres-protocol" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b6c5ef183cd3ab4ba005f1ca64c21e8bd97ce4699cfea9e8d9a2c4958ca520" +checksum = "76ff0abab4a9b844b93ef7b81f1efc0a366062aaef2cd702c76256b5dc075c54" dependencies = [ - "base64 0.21.7", + "base64 0.22.1", "byteorder", "bytes", "fallible-iterator 0.2.0", "hmac", "md-5", "memchr", - "rand 0.8.5", + "rand 0.9.0", "sha2", "stringprep", ] [[package]] name = "postgres-types" -version = "0.2.6" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d2234cdee9408b523530a9b6d2d6b373d1db34f6a8e51dc03ded1828d7fb67c" +checksum = "613283563cd90e1dfc3518d548caee47e0e725455ed619881f5cf21f36de4b48" dependencies = [ "bytes", "fallible-iterator 0.2.0", @@ -5440,9 +5439,9 @@ dependencies = [ [[package]] name = "tokio-postgres" -version = "0.7.10" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d340244b32d920260ae7448cb72b6e238bddc3d4f7603394e7dd46ed8e48f5b8" +checksum = "6c95d533c83082bb6490e0189acaa0bbeef9084e60471b696ca6988cd0541fb0" dependencies = [ "async-trait", "byteorder", @@ -5457,7 +5456,7 @@ dependencies = [ "pin-project-lite", "postgres-protocol", "postgres-types", - "rand 0.8.5", + "rand 0.9.0", "socket2", "tokio", "tokio-util 0.7.11", diff --git a/store/postgres/Cargo.toml b/store/postgres/Cargo.toml index 13fbf14ba14..160826ee601 100644 --- a/store/postgres/Cargo.toml +++ b/store/postgres/Cargo.toml @@ -22,7 +22,7 @@ lru_time_cache = "0.11" maybe-owned = "0.3.4" postgres = "0.19.1" openssl = "0.10.72" -postgres-openssl = "0.5.0" +postgres-openssl = "0.5.1" rand = "0.8.4" serde = { workspace = true } serde_json = { workspace = true } From 8f05617350f4dbb722fa083b70a322f1b6f956b1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 05:33:15 +0100 Subject: [PATCH 157/160] build(deps): bump axum from 0.8.1 to 0.8.3 (#5959) Bumps [axum](https://github.com/tokio-rs/axum) from 0.8.1 to 0.8.3. - [Release notes](https://github.com/tokio-rs/axum/releases) - [Changelog](https://github.com/tokio-rs/axum/blob/main/CHANGELOG.md) - [Commits](https://github.com/tokio-rs/axum/commits/axum-v0.8.3) --- updated-dependencies: - dependency-name: axum dependency-version: 0.8.3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 16 ++++++++-------- Cargo.toml | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a3fa9422cc2..431f996931f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -226,7 +226,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6bf2882c816094fef6e39d381b8e9b710e5943e7bdef5198496441d5083164fa" dependencies = [ "async-graphql", - "axum 0.8.1", + "axum 0.8.3", "bytes", "futures-util", "serde_json", @@ -368,11 +368,11 @@ dependencies = [ [[package]] name = "axum" -version = "0.8.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8" +checksum = "de45108900e1f9b9242f7f2e254aa3e2c029c921c258fe9e6b4217eeebd54288" dependencies = [ - "axum-core 0.5.0", + "axum-core 0.5.2", "base64 0.22.1", "bytes", "form_urlencoded", @@ -425,12 +425,12 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.5.0" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1362f362fd16024ae199c1970ce98f9661bf5ef94b9808fee734bc3698b733" +checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6" dependencies = [ "bytes", - "futures-util", + "futures-core", "http 1.1.0", "http-body 1.0.0", "http-body-util", @@ -2264,7 +2264,7 @@ dependencies = [ "anyhow", "async-graphql", "async-graphql-axum", - "axum 0.8.1", + "axum 0.8.3", "chrono", "diesel", "graph", diff --git a/Cargo.toml b/Cargo.toml index e258a84082a..f180a44e9f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,7 +42,7 @@ license = "MIT OR Apache-2.0" anyhow = "1.0" async-graphql = { version = "7.0.15", features = ["chrono"] } async-graphql-axum = "7.0.15" -axum = "0.8.1" +axum = "0.8.3" chrono = "0.4.38" bs58 = "0.5.1" clap = { version = "4.5.4", features = ["derive", "env", "wrap_help"] } From 0f0808ed708d652aa578c1ee2cbc333c861ec771 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 05:33:48 +0100 Subject: [PATCH 158/160] build(deps): bump quote from 1.0.36 to 1.0.40 (#5958) Bumps [quote](https://github.com/dtolnay/quote) from 1.0.36 to 1.0.40. - [Release notes](https://github.com/dtolnay/quote/releases) - [Commits](https://github.com/dtolnay/quote/compare/1.0.36...1.0.40) --- updated-dependencies: - dependency-name: quote dependency-version: 1.0.40 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 431f996931f..3ef66a807f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4092,9 +4092,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.36" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] From fc08039a9c96feb23dfb58fc70b211cb3f5466b2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 05:34:11 +0100 Subject: [PATCH 159/160] build(deps): bump json-structural-diff from 0.1.0 to 0.2.0 (#5957) Bumps [json-structural-diff](https://github.com/Luni-4/json-structural-diff) from 0.1.0 to 0.2.0. - [Release notes](https://github.com/Luni-4/json-structural-diff/releases) - [Commits](https://github.com/Luni-4/json-structural-diff/compare/v0.1.0...v0.2.0) --- updated-dependencies: - dependency-name: json-structural-diff dependency-version: 0.2.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 52 +++++++++++++++++++++++++------------------------ node/Cargo.toml | 2 +- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3ef66a807f8..c47e24263ef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -724,7 +724,7 @@ dependencies = [ "anstyle", "clap_lex", "strsim", - "terminal_size 0.3.0", + "terminal_size", ] [[package]] @@ -766,18 +766,15 @@ dependencies = [ [[package]] name = "console" -version = "0.13.0" +version = "0.15.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a50aab2529019abfabfa93f1e6c41ef392f91fbf179b347a7e96abb524884a08" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" dependencies = [ "encode_unicode", - "lazy_static", "libc", - "regex", - "terminal_size 0.1.17", - "unicode-width", - "winapi", - "winapi-util", + "once_cell", + "unicode-width 0.2.0", + "windows-sys 0.59.0", ] [[package]] @@ -1422,9 +1419,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "encode_unicode" -version = "0.3.6" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] name = "encoding_rs" @@ -3032,9 +3029,9 @@ dependencies = [ [[package]] name = "json-structural-diff" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25c7940d3c84d2079306c176c7b2b37622b6bc5e43fbd1541b1e4a4e1fd02045" +checksum = "e878e36a8a44c158505c2c818abdc1350413ad83dcb774a0459f6a7ef2b65cbf" dependencies = [ "console", "difflib", @@ -3571,7 +3568,7 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2ad9b889f1b12e0b9ee24db044b5129150d5eada288edc800f789928dc8c0e3" dependencies = [ - "unicode-width", + "unicode-width 0.1.13", ] [[package]] @@ -5225,16 +5222,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "terminal_size" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "terminal_size" version = "0.3.0" @@ -5915,6 +5902,12 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "unicode-xid" version = "0.2.4" @@ -6476,7 +6469,7 @@ dependencies = [ "bumpalo", "leb128", "memchr", - "unicode-width", + "unicode-width 0.1.13", "wasm-encoder 0.212.0", ] @@ -6662,6 +6655,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-targets" version = "0.48.5" diff --git a/node/Cargo.toml b/node/Cargo.toml index 444b18784fc..4e2f4ddbbfb 100644 --- a/node/Cargo.toml +++ b/node/Cargo.toml @@ -39,4 +39,4 @@ shellexpand = "3.1.0" termcolor = "1.4.1" diesel = { workspace = true } prometheus = { version = "0.13.4", features = ["push"] } -json-structural-diff = { version = "0.1", features = ["colorize"] } +json-structural-diff = { version = "0.2", features = ["colorize"] } From f02dfa2c2f251c7a8ea62916fbbd044e6cd83ca4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Apr 2025 08:00:09 +0100 Subject: [PATCH 160/160] build(deps): bump rand from 0.8.5 to 0.9.0 (#5964) * build(deps): bump rand from 0.8.5 to 0.9.0 Bumps [rand](https://github.com/rust-random/rand) from 0.8.5 to 0.9.0. - [Release notes](https://github.com/rust-random/rand/releases) - [Changelog](https://github.com/rust-random/rand/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-random/rand/compare/0.8.5...0.9.0) --- updated-dependencies: - dependency-name: rand dependency-version: 0.9.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * fix api changes * fix example changes --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Filipe Azevedo --- Cargo.lock | 41 ++++++------------------- Cargo.toml | 1 + chain/ethereum/src/network.rs | 10 +++--- graph/Cargo.toml | 9 ++++-- graph/examples/stress.rs | 42 +++++++++++++++++--------- graph/src/data/graphql/load_manager.rs | 4 +-- graph/src/data/subgraph/schema.rs | 8 ++--- graph/src/util/backoff.rs | 2 +- runtime/test/Cargo.toml | 2 +- runtime/test/src/test_padding.rs | 4 +-- store/postgres/Cargo.toml | 2 +- store/postgres/src/deployment_store.rs | 4 +-- 12 files changed, 61 insertions(+), 68 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c47e24263ef..4515f57f7b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -45,7 +45,7 @@ dependencies = [ "cfg-if 1.0.0", "once_cell", "version_check", - "zerocopy 0.7.35", + "zerocopy", ] [[package]] @@ -1903,7 +1903,7 @@ dependencies = [ "prometheus", "prost", "prost-types", - "rand 0.8.5", + "rand 0.9.1", "regex", "reqwest", "semver", @@ -2104,7 +2104,7 @@ dependencies = [ "graph-chain-ethereum", "graph-runtime-derive", "graph-runtime-wasm", - "rand 0.8.5", + "rand 0.9.1", "semver", "test-store", "wasmtime", @@ -2199,7 +2199,7 @@ dependencies = [ "postgres", "postgres-openssl", "pretty_assertions", - "rand 0.8.5", + "rand 0.9.1", "serde", "serde_json", "stable-hash 0.3.4", @@ -3800,7 +3800,7 @@ dependencies = [ "hmac", "md-5", "memchr", - "rand 0.9.0", + "rand 0.9.1", "sha2", "stringprep", ] @@ -4126,13 +4126,12 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" +checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.3", - "zerocopy 0.8.21", ] [[package]] @@ -5443,7 +5442,7 @@ dependencies = [ "pin-project-lite", "postgres-protocol", "postgres-types", - "rand 0.9.0", + "rand 0.9.1", "socket2", "tokio", "tokio-util 0.7.11", @@ -5812,7 +5811,7 @@ dependencies = [ "http 1.1.0", "httparse", "log", - "rand 0.9.0", + "rand 0.9.1", "sha1", "thiserror 2.0.12", "utf-8", @@ -6980,16 +6979,7 @@ version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ - "zerocopy-derive 0.7.35", -] - -[[package]] -name = "zerocopy" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf01143b2dd5d134f11f545cf9f1431b13b749695cb33bcce051e7568f99478" -dependencies = [ - "zerocopy-derive 0.8.21", + "zerocopy-derive", ] [[package]] @@ -7003,17 +6993,6 @@ dependencies = [ "syn 2.0.87", ] -[[package]] -name = "zerocopy-derive" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712c8386f4f4299382c9abee219bee7084f78fb939d88b6840fcc1320d5f6da2" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.87", -] - [[package]] name = "zerofrom" version = "0.1.6" diff --git a/Cargo.toml b/Cargo.toml index f180a44e9f0..78694e06d1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -94,6 +94,7 @@ wasmtime = "15.0.1" substreams = "=0.6.0" substreams-entity-change = "2" substreams-near-core = "=0.10.2" +rand = { version = "0.9.1", features = ["os_rng"] } # Incremental compilation on Rust 1.58 causes an ICE on build. As soon as graph node builds again, these can be removed. [profile.test] diff --git a/chain/ethereum/src/network.rs b/chain/ethereum/src/network.rs index d654db71276..59a698ab20b 100644 --- a/chain/ethereum/src/network.rs +++ b/chain/ethereum/src/network.rs @@ -196,11 +196,9 @@ impl EthereumNetworkAdapters { required_capabilities: &NodeCapabilities, retest_percent: f64, ) -> Result, Error> { - let retest_rng: f64 = (&mut rand::thread_rng()).gen(); + let retest_rng: f64 = (&mut rand::rng()).random(); - let cheapest = input - .into_iter() - .choose_multiple(&mut rand::thread_rng(), 3); + let cheapest = input.into_iter().choose_multiple(&mut rand::rng(), 3); let cheapest = cheapest.iter(); // If request falls below the retest threshold, use this request to try and @@ -231,7 +229,7 @@ impl EthereumNetworkAdapters { let cheapest = self.all_unverified_cheapest_with(required_capabilities); Self::cheapest_from( - cheapest.choose_multiple(&mut rand::thread_rng(), 3), + cheapest.choose_multiple(&mut rand::rng(), 3), required_capabilities, self.retest_percent, ) @@ -245,7 +243,7 @@ impl EthereumNetworkAdapters { let cheapest = self .all_cheapest_with(required_capabilities) .await - .choose_multiple(&mut rand::thread_rng(), 3); + .choose_multiple(&mut rand::rng(), 3); Self::cheapest_from(cheapest, required_capabilities, self.retest_percent) } diff --git a/graph/Cargo.toml b/graph/Cargo.toml index 6547d0281c6..631edb29b71 100644 --- a/graph/Cargo.toml +++ b/graph/Cargo.toml @@ -12,7 +12,9 @@ atomic_refcell = "0.1.13" # We require this precise version of bigdecimal. Updating to later versions # has caused PoI differences; if you update this version, you will need to # make sure that it does not cause PoI changes -old_bigdecimal = { version = "=0.1.2", features = ["serde"], package = "bigdecimal" } +old_bigdecimal = { version = "=0.1.2", features = [ + "serde", +], package = "bigdecimal" } bytes = "1.0.1" bs58 = { workspace = true } cid = "0.11.1" @@ -40,7 +42,7 @@ lazy_static = "1.5.0" num-bigint = { version = "=0.2.6", features = ["serde"] } num-integer = { version = "=0.1.46" } num-traits = "=0.2.19" -rand = "0.8.4" +rand.workspace = true regex = "1.5.4" semver = { version = "1.0.23", features = ["serde"] } serde = { workspace = true } @@ -93,7 +95,8 @@ defer = "0.2" # Our fork contains patches to make some fields optional for Celo and Fantom compatibility. # Without the "arbitrary_precision" feature, we get the error `data did not match any variant of untagged enum Response`. web3 = { git = "https://github.com/graphprotocol/rust-web3", branch = "graph-patches-onto-0.18", features = [ - "arbitrary_precision", "test" + "arbitrary_precision", + "test", ] } serde_plain = "1.0.2" csv = "1.3.0" diff --git a/graph/examples/stress.rs b/graph/examples/stress.rs index 7e96d914fea..5534f2263b3 100644 --- a/graph/examples/stress.rs +++ b/graph/examples/stress.rs @@ -9,8 +9,8 @@ use clap::Parser; use graph::data::value::{Object, Word}; use graph::object; use graph::prelude::{lazy_static, q, r, BigDecimal, BigInt, QueryResult}; -use rand::SeedableRng; use rand::{rngs::SmallRng, Rng}; +use rand::{RngCore, SeedableRng}; use graph::util::cache_weight::CacheWeight; use graph::util::lfu_cache::LfuCache; @@ -240,8 +240,8 @@ impl Template for BigInt { fn create(size: usize, rng: Option<&mut SmallRng>) -> Self { let f = match rng { Some(rng) => { - let mag = rng.gen_range(1..100); - if rng.gen_bool(0.5) { + let mag = rng.random_range(1..100); + if rng.random_bool(0.5) { mag } else { -mag @@ -261,8 +261,8 @@ impl Template for BigDecimal { fn create(size: usize, mut rng: Option<&mut SmallRng>) -> Self { let f = match rng.as_deref_mut() { Some(rng) => { - let mag = rng.gen_range(1i32..100); - if rng.gen_bool(0.5) { + let mag = rng.random_range(1i32..100); + if rng.random_bool(0.5) { mag } else { -mag @@ -271,7 +271,7 @@ impl Template for BigDecimal { None => 1, }; let exp = match rng { - Some(rng) => rng.gen_range(-100..=100), + Some(rng) => rng.random_range(-100..=100), None => 1, }; let bi = BigInt::from(3u64).pow(size as u8).unwrap() * BigInt::from(f); @@ -307,7 +307,7 @@ fn make_object(size: usize, mut rng: Option<&mut SmallRng>) -> Object { for i in 0..size { let kind = rng .as_deref_mut() - .map(|rng| rng.gen_range(0..modulus)) + .map(|rng| rng.random_range(0..modulus)) .unwrap_or(i % modulus); let value = match kind { @@ -334,7 +334,11 @@ fn make_object(size: usize, mut rng: Option<&mut SmallRng>) -> Object { _ => unreachable!(), }; - let key = rng.as_deref_mut().map(|rng| rng.gen()).unwrap_or(i) % modulus; + let key = rng + .as_deref_mut() + .map(|rng| rng.next_u32() as usize) + .unwrap_or(i) + % modulus; obj.push((Word::from(format!("val{}", key)), value)); } Object::from_iter(obj) @@ -406,7 +410,7 @@ impl ValueMap { for i in 0..size { let kind = rng .as_deref_mut() - .map(|rng| rng.gen_range(0..modulus)) + .map(|rng| rng.random_range(0..modulus)) .unwrap_or(i % modulus); let value = match kind { @@ -431,7 +435,11 @@ impl ValueMap { _ => unreachable!(), }; - let key = rng.as_deref_mut().map(|rng| rng.gen()).unwrap_or(i) % modulus; + let key = rng + .as_deref_mut() + .map(|rng| rng.next_u32() as usize) + .unwrap_or(i) + % modulus; map.insert(format!("val{}", key), value); } MapMeasure(map) @@ -466,7 +474,10 @@ impl UsizeMap { fn make_map(size: usize, mut rng: Option<&mut SmallRng>) -> Self { let mut map = BTreeMap::new(); for i in 0..size { - let key = rng.as_deref_mut().map(|rng| rng.gen()).unwrap_or(2 * i); + let key = rng + .as_deref_mut() + .map(|rng| rng.next_u32() as usize) + .unwrap_or(2 * i); map.insert(key, i * 3); } MapMeasure(map) @@ -563,7 +574,10 @@ fn maybe_rng<'a>(opt: &'a Opt, rng: &'a mut SmallRng) -> Option<&'a mut SmallRng fn stress(opt: &Opt) { let mut rng = match opt.seed { - None => SmallRng::from_entropy(), + None => { + let mut rng = rand::rng(); + SmallRng::from_rng(&mut rng) + } Some(seed) => SmallRng::seed_from_u64(seed), }; @@ -624,7 +638,7 @@ fn stress(opt: &Opt) { let size = if opt.fixed || opt.obj_size == 0 { opt.obj_size } else { - rng.gen_range(0..opt.obj_size) + rng.random_range(0..opt.obj_size) }; let before = ALLOCATED.load(SeqCst); let sample = template.sample(size, maybe_rng(opt, &mut rng)); @@ -638,7 +652,7 @@ fn stress(opt: &Opt) { cache.insert(key, Entry::from(*sample)); // Do a few random reads from the cache for _attempt in 0..5 { - let read = rng.gen_range(0..=key); + let read = rng.random_range(0..=key); let _v = cache.get(&read); } } diff --git a/graph/src/data/graphql/load_manager.rs b/graph/src/data/graphql/load_manager.rs index 5e314d1607a..12fa565d321 100644 --- a/graph/src/data/graphql/load_manager.rs +++ b/graph/src/data/graphql/load_manager.rs @@ -1,7 +1,7 @@ //! Utilities to keep moving statistics about queries use prometheus::core::GenericCounter; -use rand::{prelude::Rng, thread_rng}; +use rand::{prelude::Rng, rng}; use std::collections::{HashMap, HashSet}; use std::iter::FromIterator; use std::sync::{Arc, RwLock}; @@ -439,7 +439,7 @@ impl LoadManager { // that cause at least 20% of the effort let kill_rate = self.update_kill_rate(shard, kill_rate, last_update, overloaded, wait_ms); let decline = - thread_rng().gen_bool((kill_rate * query_effort / total_effort).min(1.0).max(0.0)); + rng().random_bool((kill_rate * query_effort / total_effort).min(1.0).max(0.0)); if decline { if ENV_VARS.load_simulate { debug!(self.logger, "Declining query"; diff --git a/graph/src/data/subgraph/schema.rs b/graph/src/data/subgraph/schema.rs index ef2dbc4e47d..75922d810f2 100644 --- a/graph/src/data/subgraph/schema.rs +++ b/graph/src/data/subgraph/schema.rs @@ -4,7 +4,7 @@ use anyhow::{anyhow, bail, Error}; use chrono::{DateTime, Utc}; use hex; use rand::rngs::OsRng; -use rand::Rng; +use rand::TryRngCore as _; use std::collections::BTreeSet; use std::str::FromStr; use std::{fmt, fmt::Display}; @@ -272,11 +272,9 @@ impl_stable_hash!(SubgraphError { }); pub fn generate_entity_id() -> String { - // Fast crypto RNG from operating system - let mut rng = OsRng::default(); - // 128 random bits - let id_bytes: [u8; 16] = rng.gen(); + let mut id_bytes = [0u8; 16]; + OsRng.try_fill_bytes(&mut id_bytes).unwrap(); // 32 hex chars // Comparable to uuidv4, but without the hyphens, diff --git a/graph/src/util/backoff.rs b/graph/src/util/backoff.rs index ffe8d5bb5d3..6e6361e0d67 100644 --- a/graph/src/util/backoff.rs +++ b/graph/src/util/backoff.rs @@ -51,7 +51,7 @@ impl ExponentialBackoff { if delay > self.ceiling { delay = self.ceiling; } - let jitter = rand::Rng::gen_range(&mut rand::thread_rng(), -self.jitter..=self.jitter); + let jitter = rand::Rng::random_range(&mut rand::rng(), -self.jitter..=self.jitter); delay.mul_f64(1.0 + jitter) } diff --git a/runtime/test/Cargo.toml b/runtime/test/Cargo.toml index 57002d98c41..be03619a7a9 100644 --- a/runtime/test/Cargo.toml +++ b/runtime/test/Cargo.toml @@ -10,7 +10,7 @@ graph = { path = "../../graph" } graph-chain-ethereum = { path = "../../chain/ethereum" } graph-runtime-derive = { path = "../derive" } graph-runtime-wasm = { path = "../wasm" } -rand = "0.8.5" +rand.workspace = true [dev-dependencies] diff --git a/runtime/test/src/test_padding.rs b/runtime/test/src/test_padding.rs index a68f27f8c61..fc6e922692f 100644 --- a/runtime/test/src/test_padding.rs +++ b/runtime/test/src/test_padding.rs @@ -8,8 +8,8 @@ const WASM_FILE_NAME: &str = "test_padding.wasm"; //for tests, to run in parallel, sub graph name has be unique fn rnd_sub_graph_name(size: usize) -> String { - use rand::{distributions::Alphanumeric, Rng}; - rand::thread_rng() + use rand::{distr::Alphanumeric, Rng}; + rand::rng() .sample_iter(&Alphanumeric) .take(size) .map(char::from) diff --git a/store/postgres/Cargo.toml b/store/postgres/Cargo.toml index 160826ee601..027a46414d9 100644 --- a/store/postgres/Cargo.toml +++ b/store/postgres/Cargo.toml @@ -23,7 +23,7 @@ maybe-owned = "0.3.4" postgres = "0.19.1" openssl = "0.10.72" postgres-openssl = "0.5.1" -rand = "0.8.4" +rand.workspace = true serde = { workspace = true } serde_json = { workspace = true } stable-hash_legacy = { git = "https://github.com/graphprotocol/stable-hash", branch = "old", package = "stable-hash" } diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index 1cb569730a0..b02d076fcb0 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -26,7 +26,7 @@ use graph::semver::Version; use graph::tokio::task::JoinHandle; use itertools::Itertools; use lru_time_cache::LruCache; -use rand::{seq::SliceRandom, thread_rng}; +use rand::{rng, seq::SliceRandom}; use std::collections::{BTreeMap, HashMap}; use std::convert::Into; use std::ops::{Bound, DerefMut}; @@ -156,7 +156,7 @@ impl DeploymentStore { vec![replica; *weight] }) .collect(); - let mut rng = thread_rng(); + let mut rng = rng(); replica_order.shuffle(&mut rng); debug!(logger, "Using postgres host order {:?}", replica_order);