diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0fa6d58bbb7..24993639945 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -153,7 +153,7 @@ jobs: - name: Install Foundry uses: foundry-rs/foundry-toolchain@v1 - name: Start anvil - run: anvil --gas-limit 100000000000 --base-fee 1 --block-time 2 --port 3021 & + run: anvil --gas-limit 100000000000 --base-fee 1 --block-time 2 --timestamp 1743944919 --port 3021 & - name: Install graph CLI run: curl -sSL http://cli.thegraph.com/install.sh | sudo bash diff --git a/Cargo.lock b/Cargo.lock index c511d00601f..abf9368d3e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,6 +57,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -199,7 +205,7 @@ dependencies = [ "futures-util", "handlebars", "http 1.1.0", - "indexmap 2.2.6", + "indexmap 2.9.0", "mime", "multer", "num-traits", @@ -266,7 +272,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "741110dda927420a28fbc1c310543d3416f789a6ba96859c2c265843a0a96887" dependencies = [ "bytes", - "indexmap 2.2.6", + "indexmap 2.9.0", "serde", "serde_json", ] @@ -718,6 +724,7 @@ dependencies = [ "anstyle", "clap_lex", "strsim", + "terminal_size 0.3.0", ] [[package]] @@ -767,7 +774,7 @@ dependencies = [ "lazy_static", "libc", "regex", - "terminal_size", + "terminal_size 0.1.17", "unicode-width", "winapi", "winapi-util", @@ -791,6 +798,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +[[package]] +name = "convert_case" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb402b8d4c85569410425650ce3eddc7d698ed96d39a73f941b08fb63082f1e7" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -977,9 +993,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.13" +version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" dependencies = [ "crossbeam-utils", ] @@ -1182,17 +1198,39 @@ dependencies = [ [[package]] name = "derive_more" -version = "0.99.18" +version = "0.99.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" +checksum = "3da29a38df43d6f156149c9b43ded5e018ddff2a855cf2cfd62e8cd7d079c69f" dependencies = [ - "convert_case", + "convert_case 0.4.0", "proc-macro2", "quote", "rustc_version", "syn 2.0.87", ] +[[package]] +name = "derive_more" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" +dependencies = [ + "convert_case 0.7.1", + "proc-macro2", + "quote", + "syn 2.0.87", + "unicode-xid", +] + [[package]] name = "diesel" version = "2.2.7" @@ -1351,6 +1389,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "dsl_auto_type" version = "0.1.1" @@ -1411,22 +1460,22 @@ dependencies = [ [[package]] name = "envconfig" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea81cc7e21f55a9d9b1efb6816904978d0bfbe31a50347cb24b2e75564bcac9b" +checksum = "3c1d02ec9fdd0a585580bdc8fb7ad01675eee5e3b7336cedbabe3aab4a026dbc" dependencies = [ "envconfig_derive", ] [[package]] name = "envconfig_derive" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dfca278e5f84b45519acaaff758ebfa01f18e96998bc24b8f1b722dd804b9bf" +checksum = "d4291f0c7220b67ad15e9d5300ba2f215cee504f0924d60e77c9d1c77e7a69b1" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.87", ] [[package]] @@ -1542,9 +1591,9 @@ dependencies = [ [[package]] name = "fixedbitset" -version = "0.4.2" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flate2" @@ -1562,6 +1611,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "foreign-types" version = "0.3.2" @@ -1756,7 +1811,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" dependencies = [ "fallible-iterator 0.3.0", - "indexmap 2.2.6", + "indexmap 2.9.0", "stable_deref_trait", ] @@ -1768,18 +1823,18 @@ checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" [[package]] name = "git-testament" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "710c78d2b68e46e62f5ba63ba0a7a2986640f37f9ecc07903b9ad4e7b2dbfc8e" +checksum = "5a74999c921479f919c87a9d2e6922a79a18683f18105344df8e067149232e51" dependencies = [ "git-testament-derive", ] [[package]] name = "git-testament-derive" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b31494efbbe1a6730f6943759c21b92c8dc431cb4df177e6f2a6429c3c96842" +checksum = "bbeac967e71eb3dc1656742fc7521ec7cd3b6b88738face65bf1fddf702bc4c0" dependencies = [ "log", "proc-macro2", @@ -1846,7 +1901,7 @@ dependencies = [ "num-traits", "object_store", "parking_lot", - "petgraph", + "petgraph 0.8.1", "priority-queue", "prometheus", "prost", @@ -1861,6 +1916,7 @@ dependencies = [ "serde_plain", "serde_regex", "serde_yaml", + "sha2", "slog", "slog-async", "slog-envlogger", @@ -1868,7 +1924,7 @@ dependencies = [ "sqlparser", "stable-hash 0.3.4", "stable-hash 0.4.4", - "strum_macros", + "strum_macros 0.27.1", "thiserror 1.0.61", "tiny-keccak 1.5.0", "tokio", @@ -1905,7 +1961,7 @@ version = "0.36.0" dependencies = [ "anyhow", "heck 0.5.0", - "protobuf 3.7.1", + "protobuf 3.7.2", "protobuf-parse", ] @@ -2126,7 +2182,7 @@ dependencies = [ "blake3 1.6.1", "chrono", "clap", - "derive_more", + "derive_more 2.0.1", "diesel", "diesel-derive-enum", "diesel-dynamic-schema", @@ -2271,7 +2327,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.2.6", + "indexmap 2.9.0", "slab", "tokio", "tokio-util 0.7.11", @@ -2290,7 +2346,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.1.0", - "indexmap 2.2.6", + "indexmap 2.9.0", "slab", "tokio", "tokio-util 0.7.11", @@ -2335,6 +2391,17 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + [[package]] name = "hdrhistogram" version = "7.5.4" @@ -2643,6 +2710,124 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "id-arena" version = "2.2.1" @@ -2668,12 +2853,23 @@ dependencies = [ [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", ] [[package]] @@ -2727,12 +2923,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.6" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown 0.14.5", + "hashbrown 0.15.2", "serde", ] @@ -2826,10 +3022,11 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -2954,9 +3151,9 @@ checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] name = "libredox" @@ -2974,6 +3171,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856" + [[package]] name = "lock_api" version = "0.4.12" @@ -3117,13 +3320,13 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.11" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ "libc", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -3257,7 +3460,7 @@ checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "crc32fast", "hashbrown 0.14.5", - "indexmap 2.2.6", + "indexmap 2.9.0", "memchr", ] @@ -3314,9 +3517,9 @@ checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" [[package]] name = "openssl" -version = "0.10.71" +version = "0.10.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e14130c6a98cd258fdcb0fb6d744152343ff729cbfcb28c656a9d12b999fbcd" +checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" dependencies = [ "bitflags 2.6.0", "cfg-if 1.0.0", @@ -3346,9 +3549,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.106" +version = "0.9.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb61ea9811cc39e3c2069f40b8b8e2e70d8569b361f879786cc7ed48b777cdd" +checksum = "8288979acd84749c744a9014b4382d42b8f7b2592847b5afb2ed29e5d16ede07" dependencies = [ "cc", "libc", @@ -3485,12 +3688,24 @@ dependencies = [ [[package]] name = "petgraph" -version = "0.6.5" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", - "indexmap 2.2.6", + "indexmap 2.9.0", +] + +[[package]] +name = "petgraph" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a98c6720655620a521dcc722d0ad66cd8afd5d86e34a89ef691c50b7b24de06" +dependencies = [ + "fixedbitset", + "hashbrown 0.15.2", + "indexmap 2.9.0", + "serde", ] [[package]] @@ -3628,9 +3843,9 @@ dependencies = [ [[package]] name = "pretty_assertions" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" dependencies = [ "diff", "yansi", @@ -3661,13 +3876,13 @@ dependencies = [ [[package]] name = "priority-queue" -version = "2.0.3" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70c501afe3a2e25c9bd219aa56ec1e04cdb3fcdd763055be268778c13fa82c1f" +checksum = "ef08705fa1589a1a59aa924ad77d14722cb0cd97b67dd5004ed5f4a4873fce8d" dependencies = [ "autocfg", "equivalent", - "indexmap 2.2.6", + "indexmap 2.9.0", ] [[package]] @@ -3692,9 +3907,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" dependencies = [ "unicode-ident", ] @@ -3737,7 +3952,7 @@ dependencies = [ "log", "multimap", "once_cell", - "petgraph", + "petgraph 0.7.1", "prettyplease", "prost", "prost-types", @@ -3776,9 +3991,9 @@ checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" [[package]] name = "protobuf" -version = "3.7.1" +version = "3.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3a7c64d9bf75b1b8d981124c14c179074e8caa7dfe7b6a12e6222ddcd0c8f72" +checksum = "d65a1d4ddae7d8b5de68153b48f6aa3bba8cb002b243dbdbc55a5afbc98f99f4" dependencies = [ "once_cell", "protobuf-support", @@ -3787,14 +4002,14 @@ dependencies = [ [[package]] name = "protobuf-parse" -version = "3.7.1" +version = "3.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322330e133eab455718444b4e033ebfac7c6528972c784fcde28d2cc783c6257" +checksum = "b4aeaa1f2460f1d348eeaeed86aea999ce98c1bded6f089ff8514c9d9dbdc973" dependencies = [ "anyhow", - "indexmap 2.2.6", + "indexmap 2.9.0", "log", - "protobuf 3.7.1", + "protobuf 3.7.2", "protobuf-support", "tempfile", "thiserror 1.0.61", @@ -3803,9 +4018,9 @@ dependencies = [ [[package]] name = "protobuf-support" -version = "3.7.1" +version = "3.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b088fd20b938a875ea00843b6faf48579462630015c3788d397ad6a786663252" +checksum = "3e36c2f31e0a47f9280fb347ef5e461ffcd2c52dd520d8e216b52f93b0b0d7d6" dependencies = [ "thiserror 1.0.61", ] @@ -4061,9 +4276,9 @@ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "reqwest" -version = "0.12.5" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" +checksum = "d19c46a6fdd48bc4dab94b6103fccc55d34c67cc0ad04653aad4ea2a07cd7bbb" dependencies = [ "base64 0.22.1", "bytes", @@ -4090,7 +4305,7 @@ dependencies = [ "pin-project-lite", "quinn", "rustls", - "rustls-native-certs 0.7.1", + "rustls-native-certs 0.8.1", "rustls-pemfile", "rustls-pki-types", "serde", @@ -4102,26 +4317,26 @@ dependencies = [ "tokio-native-tls", "tokio-rustls", "tokio-util 0.7.11", + "tower 0.5.2", "tower-service 0.3.3", "url", "wasm-bindgen", "wasm-bindgen-futures", "wasm-streams", "web-sys", - "winreg", + "windows-registry", ] [[package]] name = "ring" -version = "0.17.8" +version = "0.17.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee" dependencies = [ "cc", "cfg-if 1.0.0", "getrandom 0.2.15", "libc", - "spin", "untrusted", "windows-sys 0.52.0", ] @@ -4472,7 +4687,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.2.6", + "indexmap 2.9.0", "itoa", "ryu", "serde", @@ -4779,7 +4994,7 @@ version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" dependencies = [ - "strum_macros", + "strum_macros 0.26.4", ] [[package]] @@ -4795,6 +5010,19 @@ dependencies = [ "syn 2.0.87", ] +[[package]] +name = "strum_macros" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c77a8c5abcaf0f9ce05d62342b7d298c346515365c36b673df4ebe3ced01fde8" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.87", +] + [[package]] name = "substreams" version = "0.6.0" @@ -4912,23 +5140,37 @@ name = "sync_wrapper" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] [[package]] name = "system-configuration" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", "core-foundation 0.9.4", "system-configuration-sys", ] [[package]] name = "system-configuration-sys" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" dependencies = [ "core-foundation-sys", "libc", @@ -4994,6 +5236,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "terminal_size" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +dependencies = [ + "rustix", + "windows-sys 0.48.0", +] + [[package]] name = "test-store" version = "0.36.0" @@ -5110,6 +5362,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinyvec" version = "1.7.0" @@ -5127,28 +5389,27 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.38.0" +version = "1.44.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" +checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48" dependencies = [ "backtrace", "bytes", "libc", "mio", - "num_cpus", "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "tokio-macros" -version = "2.3.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", @@ -5327,7 +5588,7 @@ version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" dependencies = [ - "indexmap 2.2.6", + "indexmap 2.9.0", "toml_datetime", "winnow 0.5.40", ] @@ -5338,7 +5599,7 @@ version = "0.22.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f21c7aaf97f1bd9ca9d4f9e73b0a6c74bd5afef56f2bc931943a6e1c37e04e38" dependencies = [ - "indexmap 2.2.6", + "indexmap 2.9.0", "serde", "serde_spanned", "toml_datetime", @@ -5643,6 +5904,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4259d9d4425d9f0661581b804cb85fe66a4c631cadd8f490d1c13a35d5d9291" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unicode-width" version = "0.1.13" @@ -5690,12 +5957,12 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.2" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", - "idna 0.5.0", + "idna 1.0.3", "percent-encoding", ] @@ -5705,6 +5972,18 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -5777,23 +6056,24 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if 1.0.0", + "once_cell", + "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", "syn 2.0.87", @@ -5814,9 +6094,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -5824,9 +6104,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", @@ -5837,9 +6117,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "wasm-encoder" @@ -5887,7 +6170,7 @@ version = "0.116.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a58e28b80dd8340cb07b8242ae654756161f6fc8d0038123d679b7b99964fa50" dependencies = [ - "indexmap 2.2.6", + "indexmap 2.9.0", "semver", ] @@ -5897,7 +6180,7 @@ version = "0.118.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77f1154f1ab868e2a01d9834a805faca7bf8b50d041b4ca714d005d0dab1c50c" dependencies = [ - "indexmap 2.2.6", + "indexmap 2.9.0", "semver", ] @@ -5913,7 +6196,7 @@ dependencies = [ "bumpalo", "cfg-if 1.0.0", "fxprof-processed-profile", - "indexmap 2.2.6", + "indexmap 2.9.0", "libc", "log", "object 0.32.2", @@ -6038,7 +6321,7 @@ dependencies = [ "anyhow", "cranelift-entity", "gimli 0.28.1", - "indexmap 2.2.6", + "indexmap 2.9.0", "log", "object 0.32.2", "serde", @@ -6123,7 +6406,7 @@ dependencies = [ "anyhow", "cc", "cfg-if 1.0.0", - "indexmap 2.2.6", + "indexmap 2.9.0", "libc", "log", "mach", @@ -6175,7 +6458,7 @@ checksum = "4b804dfd3d0c0d6d37aa21026fe7772ba1a769c89ee4f5c4f13b82d91d75216f" dependencies = [ "anyhow", "heck 0.4.1", - "indexmap 2.2.6", + "indexmap 2.9.0", "wit-parser", ] @@ -6225,7 +6508,7 @@ dependencies = [ "arrayvec 0.7.4", "base64 0.13.1", "bytes", - "derive_more", + "derive_more 0.99.19", "ethabi", "ethereum-types", "futures 0.3.30", @@ -6327,6 +6610,41 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + +[[package]] +name = "windows-registry" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets 0.53.0", +] + +[[package]] +name = "windows-result" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -6369,13 +6687,29 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -6388,6 +6722,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -6400,6 +6740,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -6412,12 +6758,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -6430,6 +6788,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -6442,6 +6806,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -6454,6 +6824,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -6466,6 +6842,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + [[package]] name = "winnow" version = "0.5.40" @@ -6484,16 +6866,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "winreg" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" -dependencies = [ - "cfg-if 1.0.0", - "windows-sys 0.48.0", -] - [[package]] name = "wiremock" version = "0.6.2" @@ -6535,7 +6907,7 @@ checksum = "316b36a9f0005f5aa4b03c39bc3728d045df136f8c13a73b7db4510dec725e08" dependencies = [ "anyhow", "id-arena", - "indexmap 2.2.6", + "indexmap 2.9.0", "log", "semver", "serde", @@ -6544,6 +6916,18 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "wyz" version = "0.5.1" @@ -6561,9 +6945,33 @@ checksum = "63658493314859b4dfdf3fb8c1defd61587839def09582db50b8a4e93afca6bb" [[package]] name = "yansi" -version = "0.5.1" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] [[package]] name = "zerocopy" @@ -6605,12 +7013,55 @@ dependencies = [ "syn 2.0.87", ] +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "zstd" version = "0.11.2+zstd.1.5.2" diff --git a/Cargo.toml b/Cargo.toml index a8193c3f0ed..e258a84082a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,9 +45,16 @@ async-graphql-axum = "7.0.15" axum = "0.8.1" chrono = "0.4.38" bs58 = "0.5.1" -clap = { version = "4.5.4", features = ["derive", "env"] } +clap = { version = "4.5.4", features = ["derive", "env", "wrap_help"] } derivative = "2.2.0" -diesel = { version = "2.2.7", features = ["postgres", "serde_json", "numeric", "r2d2", "chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes"] } +diesel = { version = "2.2.7", features = [ + "postgres", + "serde_json", + "numeric", + "r2d2", + "chrono", + "i-implement-a-third-party-backend-and-opt-into-breaking-changes", +] } diesel-derive-enum = { version = "2.1.0", features = ["postgres"] } diesel-dynamic-schema = { version = "0.2.3", features = ["postgres"] } diesel_derives = "2.2.3" @@ -63,7 +70,7 @@ lazy_static = "1.5.0" prost = "0.13" prost-types = "0.13" regex = "1.5.4" -reqwest = "0.12.5" +reqwest = "0.12.15" serde = { version = "1.0.126", features = ["rc"] } serde_derive = "1.0.125" serde_json = { version = "1.0", features = ["arbitrary_precision"] } @@ -78,7 +85,7 @@ strum = { version = "0.26", features = ["derive"] } syn = { version = "2.0.87", features = ["full"] } test-store = { path = "./store/test-store" } thiserror = "1.0.25" -tokio = { version = "1.38.0", features = ["full"] } +tokio = { version = "1.44.2", features = ["full"] } tonic = { version = "0.12.3", features = ["tls-roots", "gzip"] } tonic-build = { version = "0.12.3", features = ["prost"] } tower-http = { version = "0.5.2", features = ["cors"] } diff --git a/chain/common/Cargo.toml b/chain/common/Cargo.toml index 6c1cfd9dc03..eef11ed85a3 100644 --- a/chain/common/Cargo.toml +++ b/chain/common/Cargo.toml @@ -7,6 +7,6 @@ edition.workspace = true [dependencies] protobuf = "3.0.2" -protobuf-parse = "3.7.1" +protobuf-parse = "3.7.2" anyhow = "1" heck = "0.5" diff --git a/chain/ethereum/Cargo.toml b/chain/ethereum/Cargo.toml index 43d1afb9bd3..c72772aaa95 100644 --- a/chain/ethereum/Cargo.toml +++ b/chain/ethereum/Cargo.toml @@ -4,7 +4,7 @@ version.workspace = true edition.workspace = true [dependencies] -envconfig = "0.10.0" +envconfig = "0.11.0" jsonrpc-core = "18.0.0" graph = { path = "../../graph" } serde = { workspace = true } diff --git a/chain/ethereum/src/adapter.rs b/chain/ethereum/src/adapter.rs index 469e8932b5e..93a7fc60781 100644 --- a/chain/ethereum/src/adapter.rs +++ b/chain/ethereum/src/adapter.rs @@ -7,7 +7,6 @@ use graph::data_source::common::ContractCall; use graph::firehose::CallToFilter; use graph::firehose::CombinedFilter; use graph::firehose::LogFilter; -use graph::futures01::Future; use graph::prelude::web3::types::Bytes; use graph::prelude::web3::types::H160; use graph::prelude::web3::types::U256; @@ -17,7 +16,6 @@ use prost_types::Any; use std::cmp; use std::collections::{HashMap, HashSet}; use std::fmt; -use std::marker::Unpin; use thiserror::Error; use tiny_keccak::keccak256; use web3::types::{Address, Log, H256}; @@ -26,7 +24,6 @@ use graph::prelude::*; use graph::{ blockchain as bc, components::metrics::{CounterVec, GaugeVec, HistogramVec}, - futures01::Stream, petgraph::{self, graphmap::GraphMap}, }; @@ -1083,22 +1080,19 @@ pub trait EthereumAdapter: Send + Sync + 'static { async fn net_identifiers(&self) -> Result; /// Get the latest block, including full transactions. - fn latest_block( - &self, - logger: &Logger, - ) -> Box + Send + Unpin>; + async fn latest_block(&self, logger: &Logger) -> Result; /// Get the latest block, with only the header and transaction hashes. - fn latest_block_header( + async fn latest_block_header( &self, logger: &Logger, - ) -> Box, Error = bc::IngestorError> + Send>; + ) -> Result, bc::IngestorError>; - fn load_block( + async fn load_block( &self, logger: &Logger, block_hash: H256, - ) -> Box + Send>; + ) -> Result; /// Load Ethereum blocks in bulk, returning results as they come back as a Stream. /// May use the `chain_store` as a cache. @@ -1107,29 +1101,27 @@ pub trait EthereumAdapter: Send + Sync + 'static { logger: Logger, chain_store: Arc, block_hashes: HashSet, - ) -> Box, Error = Error> + Send>; + ) -> Result>, Error>; /// Find a block by its hash. - fn block_by_hash( + async fn block_by_hash( &self, logger: &Logger, block_hash: H256, - ) -> Box, Error = Error> + Send>; + ) -> Result, Error>; - fn block_by_number( + async fn block_by_number( &self, logger: &Logger, block_number: BlockNumber, - ) -> Box, Error = Error> + Send>; + ) -> Result, Error>; /// Load full information for the specified `block` (in particular, transaction receipts). - fn load_full_block( + async fn load_full_block( &self, logger: &Logger, block: LightEthereumBlock, - ) -> Pin< - Box> + Send + '_>, - >; + ) -> Result; /// Find a block by its number, according to the Ethereum node. /// @@ -1140,11 +1132,11 @@ pub trait EthereumAdapter: Send + Sync + 'static { /// those confirmations. /// If the Ethereum node is far behind in processing blocks, even old blocks can be subject to /// reorgs. - fn block_hash_by_block_number( + async fn block_hash_by_block_number( &self, logger: &Logger, block_number: BlockNumber, - ) -> Box, Error = Error> + Send>; + ) -> Result, Error>; /// Finds the hash and number of the lowest non-null block with height greater than or equal to /// the given number. @@ -1177,20 +1169,20 @@ pub trait EthereumAdapter: Send + Sync + 'static { cache: Arc, ) -> Result>, call::Source)>, ContractCallError>; - fn get_balance( + async fn get_balance( &self, logger: &Logger, address: H160, block_ptr: BlockPtr, - ) -> Box + Send>; + ) -> Result; // Returns the compiled bytecode of a smart contract - fn get_code( + async fn get_code( &self, logger: &Logger, address: H160, block_ptr: BlockPtr, - ) -> Box + Send>; + ) -> Result; } #[cfg(test)] diff --git a/chain/ethereum/src/chain.rs b/chain/ethereum/src/chain.rs index f632ee36d93..0408771f23e 100644 --- a/chain/ethereum/src/chain.rs +++ b/chain/ethereum/src/chain.rs @@ -10,14 +10,13 @@ use graph::components::network_provider::ChainName; use graph::components::store::{DeploymentCursorTracker, SourceableStore}; use graph::data::subgraph::UnifiedMappingApiVersion; use graph::firehose::{FirehoseEndpoint, ForkStep}; -use graph::futures03::compat::Future01CompatExt; use graph::futures03::TryStreamExt; use graph::prelude::{ retry, BlockHash, ComponentLoggerConfig, ElasticComponentLoggerConfig, EthereumBlock, EthereumCallCache, LightEthereumBlock, LightEthereumBlockExt, MetricsRegistry, }; use graph::schema::InputSchema; -use graph::slog::{debug, error, trace}; +use graph::slog::{debug, error, trace, warn}; use graph::substreams::Clock; use graph::{ blockchain::{ @@ -257,6 +256,7 @@ pub struct EthereumAdapterSelector { client: Arc>, registry: Arc, chain_store: Arc, + eth_adapters: Arc, } impl EthereumAdapterSelector { @@ -265,12 +265,14 @@ impl EthereumAdapterSelector { client: Arc>, registry: Arc, chain_store: Arc, + eth_adapters: Arc, ) -> Self { Self { logger_factory, client, registry, chain_store, + eth_adapters, } } } @@ -296,6 +298,7 @@ impl TriggersAdapterSelector for EthereumAdapterSelector { chain_store: self.chain_store.cheap_clone(), unified_api_version, capabilities: *capabilities, + eth_adapters: self.eth_adapters.cheap_clone(), }; Ok(Arc::new(adapter)) } @@ -610,7 +613,7 @@ impl Blockchain for Chain { // present in the DB. Box::new(PollingBlockIngestor::new( logger, - graph::env::ENV_VARS.reorg_threshold, + graph::env::ENV_VARS.reorg_threshold(), self.chain_client(), self.chain_store().cheap_clone(), self.polling_ingestor_interval, @@ -739,6 +742,7 @@ pub struct TriggersAdapter { chain_client: Arc>, capabilities: NodeCapabilities, unified_api_version: UnifiedMappingApiVersion, + eth_adapters: Arc, } /// Fetches blocks from the cache based on block numbers, excluding duplicates @@ -784,12 +788,34 @@ async fn fetch_unique_blocks_from_cache( "Loading {} block(s) not in the block cache", missing_blocks.len() ); - debug!(logger, "Missing blocks {:?}", missing_blocks); + trace!(logger, "Missing blocks {:?}", missing_blocks.len()); } (blocks, missing_blocks) } +// This is used to load blocks from the RPC. +async fn load_blocks_with_rpc( + logger: &Logger, + adapter: Arc, + chain_store: Arc, + block_numbers: BTreeSet, +) -> Result> { + let logger_clone = logger.clone(); + load_blocks( + logger, + chain_store, + block_numbers, + |missing_numbers| async move { + adapter + .load_block_ptrs_by_numbers_rpc(logger_clone, missing_numbers) + .try_collect() + .await + }, + ) + .await +} + /// Fetches blocks by their numbers, first attempting to load from cache. /// Missing blocks are retrieved from an external source, with all blocks sorted and converted to `BlockFinality` format. async fn load_blocks( @@ -847,6 +873,37 @@ impl TriggersAdapterTrait for TriggersAdapter { ) -> Result> { match &*self.chain_client { ChainClient::Firehose(endpoints) => { + // If the force_rpc_for_block_ptrs flag is set, we will use the RPC to load the blocks + // even if the firehose is available. If no adapter is available, we will log an error. + // And then fallback to the firehose. + if ENV_VARS.force_rpc_for_block_ptrs { + trace!( + logger, + "Loading blocks from RPC (force_rpc_for_block_ptrs is set)"; + "block_numbers" => format!("{:?}", block_numbers) + ); + match self.eth_adapters.cheapest_with(&self.capabilities).await { + Ok(adapter) => { + match load_blocks_with_rpc( + &logger, + adapter, + self.chain_store.clone(), + block_numbers.clone(), + ) + .await + { + Ok(blocks) => return Ok(blocks), + Err(e) => { + warn!(logger, "Error loading blocks from RPC: {}", e); + } + } + } + Err(e) => { + warn!(logger, "Error getting cheapest adapter: {}", e); + } + } + } + trace!( logger, "Loading blocks from firehose"; @@ -884,29 +941,16 @@ impl TriggersAdapterTrait for TriggersAdapter { .await } - ChainClient::Rpc(client) => { + ChainClient::Rpc(eth_adapters) => { trace!( logger, "Loading blocks from RPC"; "block_numbers" => format!("{:?}", block_numbers) ); - let adapter = client.cheapest_with(&self.capabilities).await?; - let chain_store = self.chain_store.clone(); - let logger_clone = logger.clone(); - - load_blocks( - &logger, - chain_store, - block_numbers, - |missing_numbers| async move { - adapter - .load_block_ptrs_by_numbers_rpc(logger_clone, missing_numbers) - .try_collect() - .await - }, - ) - .await + let adapter = eth_adapters.cheapest_with(&self.capabilities).await?; + load_blocks_with_rpc(&logger, adapter, self.chain_store.clone(), block_numbers) + .await } } } @@ -973,10 +1017,12 @@ impl TriggersAdapterTrait for TriggersAdapter { ChainClient::Firehose(endpoints) => { let endpoint = endpoints.endpoint().await?; let block = endpoint - .get_block_by_number::(ptr.number as u64, &self.logger) + .get_block_by_number_with_retry::(ptr.number as u64, &self.logger) .await - .map_err(|e| anyhow!("Failed to fetch block from firehose: {}", e))?; - + .context(format!( + "Failed to fetch block {} from firehose", + ptr.number + ))?; Ok(block.hash() == ptr.hash) } ChainClient::Rpc(adapter) => { @@ -1013,7 +1059,6 @@ impl TriggersAdapterTrait for TriggersAdapter { } async fn parent_ptr(&self, block: &BlockPtr) -> Result, Error> { - use graph::futures01::stream::Stream; use graph::prelude::LightEthereumBlockExt; let block = match self.chain_client.as_ref() { @@ -1064,9 +1109,6 @@ impl TriggersAdapterTrait for TriggersAdapter { self.chain_store.cheap_clone(), HashSet::from_iter(Some(block.hash_as_h256())), ) - .await - .collect() - .compat() .await?; assert_eq!(blocks.len(), 1); diff --git a/chain/ethereum/src/env.rs b/chain/ethereum/src/env.rs index bc7223dbc07..027a26b623f 100644 --- a/chain/ethereum/src/env.rs +++ b/chain/ethereum/src/env.rs @@ -91,6 +91,10 @@ pub struct EnvVars { /// This is a comma separated list of chain ids for which the gas field will not be set /// when calling `eth_call`. pub eth_call_no_gas: Vec, + /// Set by the flag `GRAPH_ETHEREUM_FORCE_RPC_FOR_BLOCK_PTRS`. On by default. + /// When enabled, forces the use of RPC instead of Firehose for loading block pointers by numbers. + /// This is used in composable subgraphs. Firehose can be slow for loading block pointers by numbers. + pub force_rpc_for_block_ptrs: bool, } // This does not print any values avoid accidentally leaking any sensitive env vars @@ -141,6 +145,7 @@ impl From for EnvVars { .filter(|s| !s.is_empty()) .map(str::to_string) .collect(), + force_rpc_for_block_ptrs: x.force_rpc_for_block_ptrs.0, } } } @@ -192,4 +197,6 @@ struct Inner { genesis_block_number: u64, #[envconfig(from = "GRAPH_ETH_CALL_NO_GAS", default = "421613,421614")] eth_call_no_gas: String, + #[envconfig(from = "GRAPH_ETHEREUM_FORCE_RPC_FOR_BLOCK_PTRS", default = "true")] + force_rpc_for_block_ptrs: EnvVarBoolean, } diff --git a/chain/ethereum/src/ethereum_adapter.rs b/chain/ethereum/src/ethereum_adapter.rs index 7173c069c65..1c1d214f6a5 100644 --- a/chain/ethereum/src/ethereum_adapter.rs +++ b/chain/ethereum/src/ethereum_adapter.rs @@ -147,6 +147,7 @@ impl EthereumAdapter { let retry_log_message = format!("trace_filter RPC call for block range: [{}..{}]", from, to); retry(retry_log_message, &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -295,6 +296,7 @@ impl EthereumAdapter { let eth_adapter = self.clone(); let retry_log_message = format!("eth_getLogs RPC call for block range: [{}..{}]", from, to); retry(retry_log_message, &logger) + .redact_log_urls(true) .when(move |res: &Result<_, web3::error::Error>| match res { Ok(_) => false, Err(e) => !too_many_logs_fingerprints @@ -498,12 +500,12 @@ impl EthereumAdapter { } } - fn code( + async fn code( &self, logger: &Logger, address: Address, block_ptr: BlockPtr, - ) -> impl Future + Send { + ) -> Result { let web3 = self.web3.clone(); let logger = Logger::new(&logger, o!("provider" => self.provider.clone())); @@ -511,6 +513,7 @@ impl EthereumAdapter { let retry_log_message = format!("eth_getCode RPC call for block {}", block_ptr); retry(retry_log_message, &logger) + .redact_log_urls(true) .when(|result| match result { Ok(_) => false, Err(_) => true, @@ -528,17 +531,16 @@ impl EthereumAdapter { } } }) + .await .map_err(|e| e.into_inner().unwrap_or(EthereumRpcError::Timeout)) - .boxed() - .compat() } - fn balance( + async fn balance( &self, logger: &Logger, address: Address, block_ptr: BlockPtr, - ) -> impl Future + Send { + ) -> Result { let web3 = self.web3.clone(); let logger = Logger::new(&logger, o!("provider" => self.provider.clone())); @@ -546,6 +548,7 @@ impl EthereumAdapter { let retry_log_message = format!("eth_getBalance RPC call for block {}", block_ptr); retry(retry_log_message, &logger) + .redact_log_urls(true) .when(|result| match result { Ok(_) => false, Err(_) => true, @@ -563,9 +566,8 @@ impl EthereumAdapter { } } }) + .await .map_err(|e| e.into_inner().unwrap_or(EthereumRpcError::Timeout)) - .boxed() - .compat() } async fn call( @@ -586,6 +588,7 @@ impl EthereumAdapter { let block_id = self.block_ptr_to_id(&block_ptr); let retry_log_message = format!("eth_call RPC call for block {}", block_ptr); retry(retry_log_message, &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -765,6 +768,7 @@ impl EthereumAdapter { stream::iter_ok::<_, Error>(ids.into_iter().map(move |hash| { let web3 = web3.clone(); retry(format!("load block {}", hash), &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -799,6 +803,7 @@ impl EthereumAdapter { async move { retry(format!("load block {}", number), &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -856,6 +861,7 @@ impl EthereumAdapter { stream::iter_ok::<_, Error>(block_nums.into_iter().map(move |block_num| { let web3 = web3.clone(); retry(format!("load block ptr {}", block_num), &logger) + .redact_log_urls(true) .when(|res| !res.is_ok() && !detect_null_block(res)) .no_limit() .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) @@ -1140,6 +1146,7 @@ impl EthereumAdapter { let web3 = self.web3.clone(); u64::try_from( retry("chain_id RPC call", &logger) + .redact_log_urls(true) .no_limit() .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || { @@ -1175,6 +1182,7 @@ impl EthereumAdapterTrait for EthereumAdapter { let metrics = self.metrics.clone(); let provider = self.provider().to_string(); let net_version_future = retry("net_version RPC call", &logger) + .redact_log_urls(true) .no_limit() .timeout_secs(20) .run(move || { @@ -1203,6 +1211,7 @@ impl EthereumAdapterTrait for EthereumAdapter { ENV_VARS.genesis_block_number ); let gen_block_hash_future = retry(retry_log_message, &logger) + .redact_log_urls(true) .no_limit() .timeout_secs(30) .run(move || { @@ -1247,165 +1256,150 @@ impl EthereumAdapterTrait for EthereumAdapter { Ok(ident) } - fn latest_block_header( + async fn latest_block_header( &self, logger: &Logger, - ) -> Box, Error = IngestorError> + Send> { + ) -> Result, IngestorError> { let web3 = self.web3.clone(); - Box::new( - retry("eth_getBlockByNumber(latest) no txs RPC call", logger) - .no_limit() - .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) - .run(move || { - let web3 = web3.cheap_clone(); - async move { - let block_opt = web3 - .eth() - .block(Web3BlockNumber::Latest.into()) - .await - .map_err(|e| { - anyhow!("could not get latest block from Ethereum: {}", e) - })?; + retry("eth_getBlockByNumber(latest) no txs RPC call", logger) + .redact_log_urls(true) + .no_limit() + .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) + .run(move || { + let web3 = web3.cheap_clone(); + async move { + let block_opt = web3 + .eth() + .block(Web3BlockNumber::Latest.into()) + .await + .map_err(|e| anyhow!("could not get latest block from Ethereum: {}", e))?; - block_opt - .ok_or_else(|| anyhow!("no latest block returned from Ethereum").into()) - } - }) - .map_err(move |e| { - e.into_inner().unwrap_or_else(move || { - anyhow!("Ethereum node took too long to return latest block").into() - }) + block_opt + .ok_or_else(|| anyhow!("no latest block returned from Ethereum").into()) + } + }) + .map_err(move |e| { + e.into_inner().unwrap_or_else(move || { + anyhow!("Ethereum node took too long to return latest block").into() }) - .boxed() - .compat(), - ) + }) + .await } - fn latest_block( - &self, - logger: &Logger, - ) -> Box + Send + Unpin> { + async fn latest_block(&self, logger: &Logger) -> Result { let web3 = self.web3.clone(); - Box::new( - retry("eth_getBlockByNumber(latest) with txs RPC call", logger) - .no_limit() - .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) - .run(move || { - let web3 = web3.cheap_clone(); - async move { - let block_opt = web3 - .eth() - .block_with_txs(Web3BlockNumber::Latest.into()) - .await - .map_err(|e| { - anyhow!("could not get latest block from Ethereum: {}", e) - })?; - block_opt - .ok_or_else(|| anyhow!("no latest block returned from Ethereum").into()) - } - }) - .map_err(move |e| { - e.into_inner().unwrap_or_else(move || { - anyhow!("Ethereum node took too long to return latest block").into() - }) + retry("eth_getBlockByNumber(latest) with txs RPC call", logger) + .redact_log_urls(true) + .no_limit() + .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) + .run(move || { + let web3 = web3.cheap_clone(); + async move { + let block_opt = web3 + .eth() + .block_with_txs(Web3BlockNumber::Latest.into()) + .await + .map_err(|e| anyhow!("could not get latest block from Ethereum: {}", e))?; + block_opt + .ok_or_else(|| anyhow!("no latest block returned from Ethereum").into()) + } + }) + .map_err(move |e| { + e.into_inner().unwrap_or_else(move || { + anyhow!("Ethereum node took too long to return latest block").into() }) - .boxed() - .compat(), - ) + }) + .await } - fn load_block( + async fn load_block( &self, logger: &Logger, block_hash: H256, - ) -> Box + Send> { - Box::new( - self.block_by_hash(logger, block_hash) - .and_then(move |block_opt| { - block_opt.ok_or_else(move || { - anyhow!( - "Ethereum node could not find block with hash {}", - block_hash - ) - }) - }), - ) + ) -> Result { + self.block_by_hash(logger, block_hash) + .await? + .ok_or_else(move || { + anyhow!( + "Ethereum node could not find block with hash {}", + block_hash + ) + }) } - fn block_by_hash( + async fn block_by_hash( &self, logger: &Logger, block_hash: H256, - ) -> Box, Error = Error> + Send> { + ) -> Result, Error> { let web3 = self.web3.clone(); let logger = logger.clone(); let retry_log_message = format!( "eth_getBlockByHash RPC call for block hash {:?}", block_hash ); - Box::new( - retry(retry_log_message, &logger) - .limit(ENV_VARS.request_retries) - .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) - .run(move || { - Box::pin(web3.eth().block_with_txs(BlockId::Hash(block_hash))) - .compat() - .from_err() - .compat() - }) - .map_err(move |e| { - e.into_inner().unwrap_or_else(move || { - anyhow!("Ethereum node took too long to return block {}", block_hash) - }) + + retry(retry_log_message, &logger) + .redact_log_urls(true) + .limit(ENV_VARS.request_retries) + .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) + .run(move || { + let web3 = web3.cheap_clone(); + async move { + web3.eth() + .block_with_txs(BlockId::Hash(block_hash)) + .await + .map_err(Error::from) + } + }) + .map_err(move |e| { + e.into_inner().unwrap_or_else(move || { + anyhow!("Ethereum node took too long to return block {}", block_hash) }) - .boxed() - .compat(), - ) + }) + .await } - fn block_by_number( + async fn block_by_number( &self, logger: &Logger, block_number: BlockNumber, - ) -> Box, Error = Error> + Send> { + ) -> Result, Error> { let web3 = self.web3.clone(); let logger = logger.clone(); let retry_log_message = format!( "eth_getBlockByNumber RPC call for block number {}", block_number ); - Box::new( - retry(retry_log_message, &logger) - .no_limit() - .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) - .run(move || { - let web3 = web3.cheap_clone(); - async move { - web3.eth() - .block_with_txs(BlockId::Number(block_number.into())) - .await - .map_err(Error::from) - } - }) - .map_err(move |e| { - e.into_inner().unwrap_or_else(move || { - anyhow!( - "Ethereum node took too long to return block {}", - block_number - ) - }) + retry(retry_log_message, &logger) + .redact_log_urls(true) + .no_limit() + .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) + .run(move || { + let web3 = web3.cheap_clone(); + async move { + web3.eth() + .block_with_txs(BlockId::Number(block_number.into())) + .await + .map_err(Error::from) + } + }) + .map_err(move |e| { + e.into_inner().unwrap_or_else(move || { + anyhow!( + "Ethereum node took too long to return block {}", + block_number + ) }) - .boxed() - .compat(), - ) + }) + .await } - fn load_full_block( + async fn load_full_block( &self, logger: &Logger, block: LightEthereumBlock, - ) -> Pin> + Send + '_>> - { + ) -> Result { let web3 = Arc::clone(&self.web3); let logger = logger.clone(); let block_hash = block.hash.expect("block is missing block hash"); @@ -1414,101 +1408,92 @@ impl EthereumAdapterTrait for EthereumAdapter { // request an empty batch which is not valid in JSON-RPC. if block.transactions.is_empty() { trace!(logger, "Block {} contains no transactions", block_hash); - return Box::pin(std::future::ready(Ok(EthereumBlock { + return Ok(EthereumBlock { block: Arc::new(block), transaction_receipts: Vec::new(), - }))); + }); } let hashes: Vec<_> = block.transactions.iter().map(|txn| txn.hash).collect(); - let supports_block_receipts_future = self.check_block_receipt_support_and_update_cache( - web3.clone(), - block_hash, - self.supports_eip_1898, - self.call_only, - logger.clone(), - ); + let supports_block_receipts = self + .check_block_receipt_support_and_update_cache( + web3.clone(), + block_hash, + self.supports_eip_1898, + self.call_only, + logger.clone(), + ) + .await; - let receipts_future = supports_block_receipts_future - .then(move |supports_block_receipts| { - fetch_receipts_with_retry(web3, hashes, block_hash, logger, supports_block_receipts) + fetch_receipts_with_retry(web3, hashes, block_hash, logger, supports_block_receipts) + .await + .map(|transaction_receipts| EthereumBlock { + block: Arc::new(block), + transaction_receipts, }) - .boxed(); - - let block_future = - futures03::TryFutureExt::map_ok(receipts_future, move |transaction_receipts| { - EthereumBlock { - block: Arc::new(block), - transaction_receipts, - } - }); - - Box::pin(block_future) } - fn block_hash_by_block_number( + async fn block_hash_by_block_number( &self, logger: &Logger, block_number: BlockNumber, - ) -> Box, Error = Error> + Send> { + ) -> Result, Error> { let web3 = self.web3.clone(); let retry_log_message = format!( "eth_getBlockByNumber RPC call for block number {}", block_number ); - Box::new( - retry(retry_log_message, logger) - .no_limit() - .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) - .run(move || { - let web3 = web3.cheap_clone(); - async move { - web3.eth() - .block(BlockId::Number(block_number.into())) - .await - .map(|block_opt| block_opt.and_then(|block| block.hash)) - .map_err(Error::from) - } + retry(retry_log_message, logger) + .redact_log_urls(true) + .no_limit() + .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) + .run(move || { + let web3 = web3.cheap_clone(); + async move { + web3.eth() + .block(BlockId::Number(block_number.into())) + .await + .map(|block_opt| block_opt.and_then(|block| block.hash)) + .map_err(Error::from) + } + }) + .await + .map_err(move |e| { + e.into_inner().unwrap_or_else(move || { + anyhow!( + "Ethereum node took too long to return data for block #{}", + block_number + ) }) - .boxed() - .compat() - .map_err(move |e| { - e.into_inner().unwrap_or_else(move || { - anyhow!( - "Ethereum node took too long to return data for block #{}", - block_number - ) - }) - }), - ) + }) } - fn get_balance( + async fn get_balance( &self, logger: &Logger, address: H160, block_ptr: BlockPtr, - ) -> Box + Send> { + ) -> Result { debug!( logger, "eth_getBalance"; "address" => format!("{}", address), "block" => format!("{}", block_ptr) ); - Box::new(self.balance(logger, address, block_ptr)) + self.balance(logger, address, block_ptr).await } - fn get_code( + async fn get_code( &self, logger: &Logger, address: H160, block_ptr: BlockPtr, - ) -> Box + Send> { + ) -> Result { debug!( logger, "eth_getCode"; "address" => format!("{}", address), "block" => format!("{}", block_ptr) ); - Box::new(self.code(logger, address, block_ptr)) + self.code(logger, address, block_ptr).await } async fn next_existing_ptr_to_number( @@ -1525,6 +1510,7 @@ impl EthereumAdapterTrait for EthereumAdapter { let web3 = self.web3.clone(); let logger = logger.clone(); let res = retry(retry_log_message, &logger) + .redact_log_urls(true) .when(|res| !res.is_ok() && !detect_null_block(res)) .no_limit() .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) @@ -1717,7 +1703,7 @@ impl EthereumAdapterTrait for EthereumAdapter { logger: Logger, chain_store: Arc, block_hashes: HashSet, - ) -> Box, Error = Error> + Send> { + ) -> Result>, Error> { let block_hashes: Vec<_> = block_hashes.iter().cloned().collect(); // Search for the block in the store first then use json-rpc as a backup. let mut blocks: Vec> = chain_store @@ -1739,27 +1725,25 @@ impl EthereumAdapterTrait for EthereumAdapter { // Return a stream that lazily loads batches of blocks. debug!(logger, "Requesting {} block(s)", missing_blocks.len()); - Box::new( - self.load_blocks_rpc(logger.clone(), missing_blocks) - .collect() - .map(move |new_blocks| { - let upsert_blocks: Vec<_> = new_blocks - .iter() - .map(|block| BlockFinality::Final(block.clone())) - .collect(); - let block_refs: Vec<_> = upsert_blocks - .iter() - .map(|block| block as &dyn graph::blockchain::Block) - .collect(); - if let Err(e) = chain_store.upsert_light_blocks(block_refs.as_slice()) { - error!(logger, "Error writing to block cache {}", e); - } - blocks.extend(new_blocks); - blocks.sort_by_key(|block| block.number); - stream::iter_ok(blocks) - }) - .flatten_stream(), - ) + let new_blocks = self + .load_blocks_rpc(logger.clone(), missing_blocks) + .collect() + .compat() + .await?; + let upsert_blocks: Vec<_> = new_blocks + .iter() + .map(|block| BlockFinality::Final(block.clone())) + .collect(); + let block_refs: Vec<_> = upsert_blocks + .iter() + .map(|block| block as &dyn graph::blockchain::Block) + .collect(); + if let Err(e) = chain_store.upsert_light_blocks(block_refs.as_slice()) { + error!(logger, "Error writing to block cache {}", e); + } + blocks.extend(new_blocks); + blocks.sort_by_key(|block| block.number); + Ok(blocks) } } @@ -1894,10 +1878,11 @@ pub(crate) async fn blocks_with_triggers( let logger2 = logger.cheap_clone(); - let blocks = eth + let blocks: Vec<_> = eth .load_blocks(logger.cheap_clone(), chain_store.clone(), block_hashes) - .await - .and_then( + .await? + .into_iter() + .map( move |block| match triggers_by_block.remove(&(block.number() as BlockNumber)) { Some(triggers) => Ok(BlockWithTriggers::new( BlockFinality::Final(block), @@ -1910,9 +1895,7 @@ pub(crate) async fn blocks_with_triggers( )), }, ) - .collect() - .compat() - .await?; + .collect::>()?; // Filter out call triggers that come from unsuccessful transactions let futures = blocks.into_iter().map(|block| { @@ -2279,6 +2262,7 @@ async fn fetch_transaction_receipts_in_batch_with_retry( block_hash ); retry(retry_log_message, &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .no_logging() .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) @@ -2406,6 +2390,7 @@ async fn fetch_block_receipts_with_retry( // Perform the retry operation let receipts_option = retry(retry_log_message, &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || web3.eth().block_receipts(BlockId::Hash(block_hash)).boxed()) @@ -2450,6 +2435,7 @@ async fn fetch_transaction_receipt_with_retry( transaction_hash ); retry(retry_log_message, &logger) + .redact_log_urls(true) .limit(ENV_VARS.request_retries) .timeout_secs(ENV_VARS.json_rpc_timeout.as_secs()) .run(move || web3.eth().transaction_receipt(transaction_hash).boxed()) diff --git a/chain/ethereum/src/ingestor.rs b/chain/ethereum/src/ingestor.rs index e0fc8c5becd..935cb525936 100644 --- a/chain/ethereum/src/ingestor.rs +++ b/chain/ethereum/src/ingestor.rs @@ -3,7 +3,6 @@ use crate::{EthereumAdapter, EthereumAdapterTrait as _}; use graph::blockchain::client::ChainClient; use graph::blockchain::BlockchainKind; use graph::components::network_provider::ChainName; -use graph::futures03::compat::Future01CompatExt as _; use graph::slog::o; use graph::util::backoff::ExponentialBackoff; use graph::{ @@ -175,7 +174,6 @@ impl PollingBlockIngestor { // Get the fully populated block let block = eth_adapter .block_by_hash(logger, block_hash) - .compat() .await? .ok_or(IngestorError::BlockUnavailable(block_hash))?; let ethereum_block = eth_adapter.load_full_block(&logger, block).await?; @@ -210,7 +208,6 @@ impl PollingBlockIngestor { ) -> Result { eth_adapter .latest_block_header(&logger) - .compat() .await .map(|block| block.into()) } diff --git a/chain/ethereum/src/runtime/runtime_adapter.rs b/chain/ethereum/src/runtime/runtime_adapter.rs index 01f148bdd4c..951958d786b 100644 --- a/chain/ethereum/src/runtime/runtime_adapter.rs +++ b/chain/ethereum/src/runtime/runtime_adapter.rs @@ -14,7 +14,6 @@ use graph::data::store::scalar::BigInt; use graph::data::subgraph::API_VERSION_0_0_9; use graph::data_source; use graph::data_source::common::{ContractCall, MappingABI}; -use graph::futures03::compat::Future01CompatExt; use graph::prelude::web3::types::H160; use graph::runtime::gas::Gas; use graph::runtime::{AscIndexId, IndexForAscTypeId}; @@ -227,11 +226,7 @@ fn eth_get_balance( let address: H160 = asc_get(ctx.heap, wasm_ptr.into(), &ctx.gas, 0)?; - let result = graph::block_on( - eth_adapter - .get_balance(logger, address, block_ptr.clone()) - .compat(), - ); + let result = graph::block_on(eth_adapter.get_balance(logger, address, block_ptr.clone())); match result { Ok(v) => { @@ -265,12 +260,8 @@ fn eth_has_code( let address: H160 = asc_get(ctx.heap, wasm_ptr.into(), &ctx.gas, 0)?; - let result = graph::block_on( - eth_adapter - .get_code(logger, address, block_ptr.clone()) - .compat(), - ) - .map(|v| !v.0.is_empty()); + let result = graph::block_on(eth_adapter.get_code(logger, address, block_ptr.clone())) + .map(|v| !v.0.is_empty()); match result { Ok(v) => Ok(asc_new(ctx.heap, &AscWrapped { inner: v }, &ctx.gas)?), diff --git a/core/graphman/src/commands/deployment/info.rs b/core/graphman/src/commands/deployment/info.rs index 2d3f58d5dc9..f4087b3a5e0 100644 --- a/core/graphman/src/commands/deployment/info.rs +++ b/core/graphman/src/commands/deployment/info.rs @@ -7,7 +7,7 @@ use graph::components::store::BlockNumber; use graph::components::store::DeploymentId; use graph::components::store::StatusStore; use graph::data::subgraph::schema::SubgraphHealth; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::Store; use itertools::Itertools; diff --git a/core/graphman/src/commands/deployment/pause.rs b/core/graphman/src/commands/deployment/pause.rs index 2fe4d136613..d7197d42fb3 100644 --- a/core/graphman/src/commands/deployment/pause.rs +++ b/core/graphman/src/commands/deployment/pause.rs @@ -5,7 +5,7 @@ use graph::components::store::DeploymentLocator; use graph::components::store::StoreEvent; use graph_store_postgres::command_support::catalog; use graph_store_postgres::command_support::catalog::Site; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use thiserror::Error; diff --git a/core/graphman/src/commands/deployment/reassign.rs b/core/graphman/src/commands/deployment/reassign.rs index 5d3d633e082..f2b7f9f6479 100644 --- a/core/graphman/src/commands/deployment/reassign.rs +++ b/core/graphman/src/commands/deployment/reassign.rs @@ -7,7 +7,7 @@ use graph::prelude::AssignmentChange; use graph::prelude::NodeId; use graph_store_postgres::command_support::catalog; use graph_store_postgres::command_support::catalog::Site; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use thiserror::Error; @@ -24,6 +24,18 @@ impl Deployment { pub fn locator(&self) -> &DeploymentLocator { &self.locator } + + pub fn assigned_node( + &self, + primary_pool: ConnectionPool, + ) -> Result, GraphmanError> { + let primary_conn = primary_pool.get().map_err(GraphmanError::from)?; + let mut catalog_conn = catalog::Connection::new(primary_conn); + let node = catalog_conn + .assigned_node(&self.site) + .map_err(GraphmanError::from)?; + Ok(node) + } } #[derive(Debug, Error)] @@ -70,16 +82,13 @@ pub fn reassign_deployment( notification_sender: Arc, deployment: &Deployment, node: &NodeId, + curr_node: Option, ) -> Result { let primary_conn = primary_pool.get().map_err(GraphmanError::from)?; let mut catalog_conn = catalog::Connection::new(primary_conn); - - let changes: Vec = match catalog_conn - .assigned_node(&deployment.site) - .map_err(GraphmanError::from)? - { + let changes: Vec = match &curr_node { Some(curr) => { - if &curr == node { + if &curr == &node { vec![] } else { catalog_conn diff --git a/core/graphman/src/commands/deployment/resume.rs b/core/graphman/src/commands/deployment/resume.rs index 7eb0ff6e235..ab394ef4791 100644 --- a/core/graphman/src/commands/deployment/resume.rs +++ b/core/graphman/src/commands/deployment/resume.rs @@ -5,7 +5,7 @@ use graph::components::store::DeploymentLocator; use graph::prelude::StoreEvent; use graph_store_postgres::command_support::catalog; use graph_store_postgres::command_support::catalog::Site; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use thiserror::Error; diff --git a/core/graphman/src/commands/deployment/unassign.rs b/core/graphman/src/commands/deployment/unassign.rs index 5233e61ada1..0061fac49b6 100644 --- a/core/graphman/src/commands/deployment/unassign.rs +++ b/core/graphman/src/commands/deployment/unassign.rs @@ -5,7 +5,7 @@ use graph::components::store::DeploymentLocator; use graph::components::store::StoreEvent; use graph_store_postgres::command_support::catalog; use graph_store_postgres::command_support::catalog::Site; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use thiserror::Error; diff --git a/core/src/subgraph/error.rs b/core/src/subgraph/error.rs index b3131255aed..c50712c08db 100644 --- a/core/src/subgraph/error.rs +++ b/core/src/subgraph/error.rs @@ -1,28 +1,100 @@ use graph::data::subgraph::schema::SubgraphError; -use graph::prelude::{thiserror, Error, StoreError}; +use graph::env::ENV_VARS; +use graph::prelude::{anyhow, thiserror, Error, StoreError}; +pub trait DeterministicError: std::fmt::Debug + std::fmt::Display + Send + Sync + 'static {} + +impl DeterministicError for SubgraphError {} + +impl DeterministicError for StoreError {} + +impl DeterministicError for anyhow::Error {} + +/// An error happened during processing and we need to classify errors into +/// deterministic and non-deterministic errors. This struct holds the result +/// of that classification #[derive(thiserror::Error, Debug)] -pub enum BlockProcessingError { +pub enum ProcessingError { #[error("{0:#}")] - Unknown(#[from] Error), + Unknown(Error), // The error had a deterministic cause but, for a possibly non-deterministic reason, we chose to // halt processing due to the error. #[error("{0}")] - Deterministic(SubgraphError), + Deterministic(Box), #[error("subgraph stopped while processing triggers")] Canceled, } -impl BlockProcessingError { +impl ProcessingError { pub fn is_deterministic(&self) -> bool { - matches!(self, BlockProcessingError::Deterministic(_)) + matches!(self, ProcessingError::Deterministic(_)) + } + + pub fn detail(self, ctx: &str) -> ProcessingError { + match self { + ProcessingError::Unknown(e) => { + let x = e.context(ctx.to_string()); + ProcessingError::Unknown(x) + } + ProcessingError::Deterministic(e) => { + ProcessingError::Deterministic(Box::new(anyhow!("{e}").context(ctx.to_string()))) + } + ProcessingError::Canceled => ProcessingError::Canceled, + } + } +} + +/// Similar to `anyhow::Context`, but for `Result`. We +/// call the method `detail` to avoid ambiguity with anyhow's `context` +/// method +pub trait DetailHelper { + fn detail(self: Self, ctx: &str) -> Result; +} + +impl DetailHelper for Result { + fn detail(self, ctx: &str) -> Result { + self.map_err(|e| e.detail(ctx)) } } -impl From for BlockProcessingError { - fn from(e: StoreError) -> Self { - BlockProcessingError::Unknown(e.into()) +/// Implement this for errors that are always non-deterministic. +pub(crate) trait NonDeterministicErrorHelper { + fn non_deterministic(self: Self) -> Result; +} + +impl NonDeterministicErrorHelper for Result { + fn non_deterministic(self) -> Result { + self.map_err(|e| ProcessingError::Unknown(e)) + } +} + +impl NonDeterministicErrorHelper for Result { + fn non_deterministic(self) -> Result { + self.map_err(|e| ProcessingError::Unknown(Error::from(e))) + } +} + +/// Implement this for errors where it depends on the details whether they +/// are deterministic or not. +pub(crate) trait ClassifyErrorHelper { + fn classify(self: Self) -> Result; +} + +impl ClassifyErrorHelper for Result { + fn classify(self) -> Result { + self.map_err(|e| { + if ENV_VARS.mappings.store_errors_are_nondeterministic { + // Old behavior, just in case the new behavior causes issues + ProcessingError::Unknown(Error::from(e)) + } else { + if e.is_deterministic() { + ProcessingError::Deterministic(Box::new(e)) + } else { + ProcessingError::Unknown(Error::from(e)) + } + } + }) } } diff --git a/core/src/subgraph/inputs.rs b/core/src/subgraph/inputs.rs index ca52073ab06..91bbdd131f4 100644 --- a/core/src/subgraph/inputs.rs +++ b/core/src/subgraph/inputs.rs @@ -75,4 +75,12 @@ impl IndexingInputs { instrument: *instrument, } } + + pub fn errors_are_non_fatal(&self) -> bool { + self.features.contains(&SubgraphFeature::NonFatalErrors) + } + + pub fn errors_are_fatal(&self) -> bool { + !self.features.contains(&SubgraphFeature::NonFatalErrors) + } } diff --git a/core/src/subgraph/registrar.rs b/core/src/subgraph/registrar.rs index 3a712b6daa9..6f7ae17425f 100644 --- a/core/src/subgraph/registrar.rs +++ b/core/src/subgraph/registrar.rs @@ -119,10 +119,7 @@ where assignment_event_stream .compat() .map_err(SubgraphAssignmentProviderError::Unknown) - .map_err(CancelableError::Error) - .cancelable(&assignment_event_stream_cancel_handle, || { - Err(CancelableError::Cancel) - }) + .cancelable(&assignment_event_stream_cancel_handle) .compat() .for_each(move |assignment_event| { assert_eq!(assignment_event.node_id(), &node_id); diff --git a/core/src/subgraph/runner.rs b/core/src/subgraph/runner.rs index 922c7a4003c..fcd8fa30fbb 100644 --- a/core/src/subgraph/runner.rs +++ b/core/src/subgraph/runner.rs @@ -1,10 +1,14 @@ use crate::subgraph::context::IndexingContext; -use crate::subgraph::error::BlockProcessingError; +use crate::subgraph::error::{ + ClassifyErrorHelper as _, DetailHelper as _, NonDeterministicErrorHelper as _, ProcessingError, +}; use crate::subgraph::inputs::IndexingInputs; use crate::subgraph::state::IndexingState; use crate::subgraph::stream::new_block_stream; +use anyhow::Context as _; +use async_trait::async_trait; use graph::blockchain::block_stream::{ - BlockStreamError, BlockStreamEvent, BlockWithTriggers, FirehoseCursor, + BlockStream, BlockStreamError, BlockStreamEvent, BlockWithTriggers, FirehoseCursor, }; use graph::blockchain::{ Block, BlockTime, Blockchain, DataSource as _, SubgraphFilter, Trigger, TriggerFilter as _, @@ -12,23 +16,28 @@ use graph::blockchain::{ }; use graph::components::store::{EmptyStore, GetScope, ReadStore, StoredDynamicDataSource}; use graph::components::subgraph::InstanceDSTemplate; +use graph::components::trigger_processor::RunnableTriggers; use graph::components::{ store::ModificationsAndCache, subgraph::{MappingError, PoICausalityRegion, ProofOfIndexing, SharedProofOfIndexing}, }; use graph::data::store::scalar::Bytes; -use graph::data::subgraph::{ - schema::{SubgraphError, SubgraphHealth}, - SubgraphFeature, -}; +use graph::data::subgraph::schema::{SubgraphError, SubgraphHealth}; use graph::data_source::{ offchain, CausalityRegion, DataSource, DataSourceCreationError, TriggerData, }; use graph::env::EnvVars; +use graph::ext::futures::Cancelable; use graph::futures03::stream::StreamExt; -use graph::futures03::TryStreamExt; -use graph::prelude::*; +use graph::prelude::{ + anyhow, hex, retry, thiserror, BlockNumber, BlockPtr, BlockState, CancelGuard, CancelHandle, + CancelToken as _, CancelableError, CheapClone as _, EntityCache, EntityModification, Error, + InstanceDSTemplateInfo, LogCode, RunnerMetrics, RuntimeHostBuilder, StopwatchMetrics, + StoreError, StreamExtension, UnfailOutcome, Value, ENV_VARS, +}; use graph::schema::EntityKey; +use graph::slog::{debug, error, info, o, trace, warn, Logger}; +use graph::util::lfu_cache::EvictStats; use graph::util::{backoff::ExponentialBackoff, lfu_cache::LfuCache}; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -53,6 +62,7 @@ where inputs: Arc>, logger: Logger, pub metrics: RunnerMetrics, + cancel_handle: Option, } #[derive(Debug, thiserror::Error)] @@ -92,6 +102,7 @@ where }, logger, metrics, + cancel_handle: None, } } @@ -199,6 +210,39 @@ where self.build_filter() } + async fn start_block_stream(&mut self) -> Result>>, Error> { + let block_stream_canceler = CancelGuard::new(); + let block_stream_cancel_handle = block_stream_canceler.handle(); + // TriggerFilter needs to be rebuilt eveytime the blockstream is restarted + self.ctx.filter = Some(self.build_filter()); + + let block_stream = new_block_stream( + &self.inputs, + self.ctx.filter.clone().unwrap(), // Safe to unwrap as we just called `build_filter` in the previous line + &self.metrics.subgraph, + ) + .await? + .cancelable(&block_stream_canceler); + + self.cancel_handle = Some(block_stream_cancel_handle); + + // Keep the stream's cancel guard around to be able to shut it down when the subgraph + // deployment is unassigned + self.ctx + .instances + .insert(self.inputs.deployment.id, block_stream_canceler); + + Ok(block_stream) + } + + fn is_canceled(&self) -> bool { + if let Some(ref cancel_handle) = self.cancel_handle { + cancel_handle.is_canceled() + } else { + false + } + } + pub async fn run(self) -> Result<(), SubgraphRunnerError> { self.run_inner(false).await.map(|_| ()) } @@ -248,27 +292,9 @@ where loop { debug!(self.logger, "Starting or restarting subgraph"); - let block_stream_canceler = CancelGuard::new(); - let block_stream_cancel_handle = block_stream_canceler.handle(); - // TriggerFilter needs to be rebuilt eveytime the blockstream is restarted - self.ctx.filter = Some(self.build_filter()); - - let mut block_stream = new_block_stream( - &self.inputs, - self.ctx.filter.clone().unwrap(), // Safe to unwrap as we just called `build_filter` in the previous line - &self.metrics.subgraph, - ) - .await? - .map_err(CancelableError::from) - .cancelable(&block_stream_canceler, || Err(CancelableError::Cancel)); + let mut block_stream = self.start_block_stream().await?; - // Keep the stream's cancel guard around to be able to shut it down when the subgraph - // deployment is unassigned - self.ctx - .instances - .insert(self.inputs.deployment.id, block_stream_canceler); - - debug!(self.logger, "Starting block stream"); + debug!(self.logger, "Started block stream"); self.metrics.subgraph.deployment_status.running(); @@ -284,21 +310,18 @@ where // This will require some code refactor in how the BlockStream is created let block_start = Instant::now(); - let action = self - .handle_stream_event(event, &block_stream_cancel_handle) - .await - .map(|res| { - self.metrics - .subgraph - .observe_block_processed(block_start.elapsed(), res.block_finished()); - res - })?; + let action = self.handle_stream_event(event).await.map(|res| { + self.metrics + .subgraph + .observe_block_processed(block_start.elapsed(), res.block_finished()); + res + })?; self.update_deployment_synced_metric(); // It is possible that the subgraph was unassigned, but the runner was in // a retry delay state and did not observe the cancel signal. - if block_stream_cancel_handle.is_canceled() { + if self.is_canceled() { // It is also possible that the runner was in a retry delay state while // the subgraph was reassigned and a new runner was started. if self.ctx.instances.contains(&self.inputs.deployment.id) { @@ -346,14 +369,217 @@ where } } + async fn transact_block_state( + &mut self, + logger: &Logger, + block_ptr: BlockPtr, + firehose_cursor: FirehoseCursor, + block_time: BlockTime, + block_state: BlockState, + proof_of_indexing: SharedProofOfIndexing, + offchain_mods: Vec, + processed_offchain_data_sources: Vec, + ) -> Result<(), ProcessingError> { + fn log_evict_stats(logger: &Logger, evict_stats: &EvictStats) { + trace!(logger, "Entity cache statistics"; + "weight" => evict_stats.new_weight, + "evicted_weight" => evict_stats.evicted_weight, + "count" => evict_stats.new_count, + "evicted_count" => evict_stats.evicted_count, + "stale_update" => evict_stats.stale_update, + "hit_rate" => format!("{:.0}%", evict_stats.hit_rate_pct()), + "accesses" => evict_stats.accesses, + "evict_time_ms" => evict_stats.evict_time.as_millis()); + } + + let BlockState { + deterministic_errors, + persisted_data_sources, + metrics: block_state_metrics, + mut entity_cache, + .. + } = block_state; + let first_error = deterministic_errors.first().cloned(); + let has_errors = first_error.is_some(); + + // Avoid writing to store if block stream has been canceled + if self.is_canceled() { + return Err(ProcessingError::Canceled); + } + + if let Some(proof_of_indexing) = proof_of_indexing.into_inner() { + update_proof_of_indexing( + proof_of_indexing, + block_time, + &self.metrics.host.stopwatch, + &mut entity_cache, + ) + .await + .non_deterministic()?; + } + + let section = self + .metrics + .host + .stopwatch + .start_section("as_modifications"); + let ModificationsAndCache { + modifications: mut mods, + entity_lfu_cache: cache, + evict_stats, + } = entity_cache.as_modifications(block_ptr.number).classify()?; + section.end(); + + log_evict_stats(&self.logger, &evict_stats); + + mods.extend(offchain_mods); + + // Put the cache back in the state, asserting that the placeholder cache was not used. + assert!(self.state.entity_lfu_cache.is_empty()); + self.state.entity_lfu_cache = cache; + + if !mods.is_empty() { + info!(&logger, "Applying {} entity operation(s)", mods.len()); + } + + let err_count = deterministic_errors.len(); + for (i, e) in deterministic_errors.iter().enumerate() { + let message = format!("{:#}", e).replace('\n', "\t"); + error!(&logger, "Subgraph error {}/{}", i + 1, err_count; + "error" => message, + "code" => LogCode::SubgraphSyncingFailure + ); + } + + // Transact entity operations into the store and update the + // subgraph's block stream pointer + let _section = self.metrics.host.stopwatch.start_section("transact_block"); + let start = Instant::now(); + + // If a deterministic error has happened, make the PoI to be the only entity that'll be stored. + if has_errors && self.inputs.errors_are_fatal() { + let is_poi_entity = + |entity_mod: &EntityModification| entity_mod.key().entity_type.is_poi(); + mods.retain(is_poi_entity); + // Confidence check + assert!( + mods.len() == 1, + "There should be only one PoI EntityModification" + ); + } + + let is_caught_up = self.is_caught_up(&block_ptr).await.non_deterministic()?; + + self.inputs + .store + .transact_block_operations( + block_ptr.clone(), + block_time, + firehose_cursor, + mods, + &self.metrics.host.stopwatch, + persisted_data_sources, + deterministic_errors, + processed_offchain_data_sources, + self.inputs.errors_are_non_fatal(), + is_caught_up, + ) + .await + .classify() + .detail("Failed to transact block operations")?; + + // For subgraphs with `nonFatalErrors` feature disabled, we consider + // any error as fatal. + // + // So we do an early return to make the subgraph stop processing blocks. + // + // In this scenario the only entity that is stored/transacted is the PoI, + // all of the others are discarded. + if has_errors && self.inputs.errors_are_fatal() { + // Only the first error is reported. + return Err(ProcessingError::Deterministic(Box::new( + first_error.unwrap(), + ))); + } + + let elapsed = start.elapsed().as_secs_f64(); + self.metrics + .subgraph + .block_ops_transaction_duration + .observe(elapsed); + + block_state_metrics + .flush_metrics_to_store(&logger, block_ptr, self.inputs.deployment.id) + .non_deterministic()?; + + if has_errors { + self.maybe_cancel()?; + } + + Ok(()) + } + + /// Cancel the subgraph if `disable_fail_fast` is not set and it is not + /// synced + fn maybe_cancel(&self) -> Result<(), ProcessingError> { + // To prevent a buggy pending version from replacing a current version, if errors are + // present the subgraph will be unassigned. + let store = &self.inputs.store; + if !ENV_VARS.disable_fail_fast && !store.is_deployment_synced() { + store + .unassign_subgraph() + .map_err(|e| ProcessingError::Unknown(e.into()))?; + + // Use `Canceled` to avoiding setting the subgraph health to failed, an error was + // just transacted so it will be already be set to unhealthy. + Err(ProcessingError::Canceled.into()) + } else { + Ok(()) + } + } + + async fn match_and_decode_many<'a, F>( + &'a self, + logger: &Logger, + block: &Arc, + triggers: Vec>, + hosts_filter: F, + ) -> Result>, MappingError> + where + F: Fn(&TriggerData) -> Box + Send + 'a>, + { + let triggers = triggers.into_iter().map(|t| match t { + Trigger::Chain(t) => TriggerData::Onchain(t), + Trigger::Subgraph(t) => TriggerData::Subgraph(t), + }); + + self.ctx + .decoder + .match_and_decode_many( + &logger, + &block, + triggers, + hosts_filter, + &self.metrics.subgraph, + ) + .await + } + /// Processes a block and returns the updated context and a boolean flag indicating /// whether new dynamic data sources have been added to the subgraph. async fn process_block( &mut self, - block_stream_cancel_handle: &CancelHandle, block: BlockWithTriggers, firehose_cursor: FirehoseCursor, - ) -> Result { + ) -> Result { + fn log_triggers_found(logger: &Logger, triggers: &[Trigger]) { + if triggers.len() == 1 { + info!(logger, "1 trigger found in this block"); + } else if triggers.len() > 1 { + info!(logger, "{} triggers found in this block", triggers.len()); + } + } + let triggers = block.trigger_data; let block = Arc::new(block.block); let block_ptr = block.ptr(); @@ -386,18 +612,7 @@ where // Match and decode all triggers in the block let hosts_filter = |trigger: &TriggerData| self.ctx.instance.hosts_for_trigger(trigger); let match_res = self - .ctx - .decoder - .match_and_decode_many( - &logger, - &block, - triggers.into_iter().map(|t| match t { - Trigger::Chain(t) => TriggerData::Onchain(t), - Trigger::Subgraph(t) => TriggerData::Subgraph(t), - }), - hosts_filter, - &self.metrics.subgraph, - ) + .match_and_decode_many(&logger, &block, triggers, hosts_filter) .await; // Process events one after the other, passing in entity operations @@ -441,7 +656,7 @@ where Ok(state) => block_state = state, // Some form of unknown or non-deterministic error ocurred. - Err(MappingError::Unknown(e)) => return Err(BlockProcessingError::Unknown(e)), + Err(MappingError::Unknown(e)) => return Err(ProcessingError::Unknown(e)), Err(MappingError::PossibleReorg(e)) => { info!(logger, "Possible reorg detected, retrying"; @@ -499,50 +714,29 @@ where vec![], )); - let block: Arc = if self.inputs.chain.is_refetch_block_required() { - let cur = firehose_cursor.clone(); - let log = logger.cheap_clone(); - let chain = self.inputs.chain.cheap_clone(); - Arc::new( - retry( - "refetch firehose block after dynamic datasource was added", - &logger, - ) - .limit(5) - .no_timeout() - .run(move || { - let cur = cur.clone(); - let log = log.cheap_clone(); - let chain = chain.cheap_clone(); - async move { chain.refetch_firehose_block(&log, cur).await } - }) - .await?, - ) - } else { - block.cheap_clone() - }; + // TODO: We have to pass a reference to `block` to + // `refetch_block`, otherwise the call to + // handle_offchain_triggers below gets an error that `block` + // has moved. That is extremely fishy since it means that + // `handle_offchain_triggers` uses the non-refetched block + // + // It's also not clear why refetching needs to happen inside + // the loop; will firehose really return something diffrent + // each time even though the cursor doesn't change? + let block = self + .refetch_block(&logger, &block, &firehose_cursor) + .await?; // Reprocess the triggers from this block that match the new data sources let block_with_triggers = self .inputs .triggers_adapter .triggers_in_block(&logger, block.as_ref().clone(), filter) - .await?; + .await + .non_deterministic()?; let triggers = block_with_triggers.trigger_data; - - if triggers.len() == 1 { - info!( - &logger, - "1 trigger found in this block for the new data sources" - ); - } else if triggers.len() > 1 { - info!( - &logger, - "{} triggers found in this block for the new data sources", - triggers.len() - ); - } + log_triggers_found(&logger, &triggers); // Add entity operations for the new data sources to the block state // and add runtimes for the data sources to the subgraph instance. @@ -550,19 +744,11 @@ where // Process the triggers in each host in the same order the // corresponding data sources have been created. + let hosts_filter = |_: &'_ TriggerData| -> Box + Send> { + Box::new(runtime_hosts.iter().map(Arc::as_ref)) + }; let match_res: Result, _> = self - .ctx - .decoder - .match_and_decode_many( - &logger, - &block, - triggers.into_iter().map(|t| match t { - Trigger::Chain(t) => TriggerData::Onchain(t), - Trigger::Subgraph(_) => unreachable!(), // TODO(krishna): Re-evaulate this - }), - |_| Box::new(runtime_hosts.iter().map(Arc::as_ref)), - &self.metrics.subgraph, - ) + .match_and_decode_many(&logger, &block, triggers, hosts_filter) .await; let mut res = Ok(block_state); @@ -606,168 +792,39 @@ where // clean context as in b21fa73b-6453-4340-99fb-1a78ec62efb1. match e { MappingError::PossibleReorg(e) | MappingError::Unknown(e) => { - BlockProcessingError::Unknown(e) + ProcessingError::Unknown(e) } } })?; } } - let has_errors = block_state.has_errors(); - let is_non_fatal_errors_active = self - .inputs - .features - .contains(&SubgraphFeature::NonFatalErrors); - - // Apply entity operations and advance the stream - - // Avoid writing to store if block stream has been canceled - if block_stream_cancel_handle.is_canceled() { - return Err(BlockProcessingError::Canceled); - } - - if let Some(proof_of_indexing) = proof_of_indexing.into_inner() { - update_proof_of_indexing( - proof_of_indexing, - block.timestamp(), - &self.metrics.host.stopwatch, - &mut block_state.entity_cache, - ) - .await?; - } - - let section = self - .metrics - .host - .stopwatch - .start_section("as_modifications"); - let ModificationsAndCache { - modifications: mut mods, - entity_lfu_cache: cache, - evict_stats, - } = block_state - .entity_cache - .as_modifications(block.number()) - .map_err(|e| BlockProcessingError::Unknown(e.into()))?; - section.end(); - - trace!(self.logger, "Entity cache statistics"; - "weight" => evict_stats.new_weight, - "evicted_weight" => evict_stats.evicted_weight, - "count" => evict_stats.new_count, - "evicted_count" => evict_stats.evicted_count, - "stale_update" => evict_stats.stale_update, - "hit_rate" => format!("{:.0}%", evict_stats.hit_rate_pct()), - "accesses" => evict_stats.accesses, - "evict_time_ms" => evict_stats.evict_time.as_millis()); - // Check for offchain events and process them, including their entity modifications in the // set to be transacted. - let offchain_events = self.ctx.offchain_monitor.ready_offchain_events()?; + let offchain_events = self + .ctx + .offchain_monitor + .ready_offchain_events() + .non_deterministic()?; let (offchain_mods, processed_offchain_data_sources, persisted_off_chain_data_sources) = self.handle_offchain_triggers(offchain_events, &block) - .await?; - mods.extend(offchain_mods); - - // Put the cache back in the state, asserting that the placeholder cache was not used. - assert!(self.state.entity_lfu_cache.is_empty()); - self.state.entity_lfu_cache = cache; - - if !mods.is_empty() { - info!(&logger, "Applying {} entity operation(s)", mods.len()); - } - - let err_count = block_state.deterministic_errors.len(); - for (i, e) in block_state.deterministic_errors.iter().enumerate() { - let message = format!("{:#}", e).replace('\n', "\t"); - error!(&logger, "Subgraph error {}/{}", i + 1, err_count; - "error" => message, - "code" => LogCode::SubgraphSyncingFailure - ); - } - - // Transact entity operations into the store and update the - // subgraph's block stream pointer - let _section = self.metrics.host.stopwatch.start_section("transact_block"); - let start = Instant::now(); - - // If a deterministic error has happened, make the PoI to be the only entity that'll be stored. - if has_errors && !is_non_fatal_errors_active { - let is_poi_entity = - |entity_mod: &EntityModification| entity_mod.key().entity_type.is_poi(); - mods.retain(is_poi_entity); - // Confidence check - assert!( - mods.len() == 1, - "There should be only one PoI EntityModification" - ); - } - - let BlockState { - deterministic_errors, - mut persisted_data_sources, - metrics: block_state_metrics, - .. - } = block_state; - - let first_error = deterministic_errors.first().cloned(); - - let is_caught_up = self.is_caught_up(&block_ptr).await?; - - persisted_data_sources.extend(persisted_off_chain_data_sources); - self.inputs - .store - .transact_block_operations( - block_ptr.clone(), - block.timestamp(), - firehose_cursor, - mods, - &self.metrics.host.stopwatch, - persisted_data_sources, - deterministic_errors, - processed_offchain_data_sources, - is_non_fatal_errors_active, - is_caught_up, - ) - .await - .context("Failed to transact block operations")?; - - // For subgraphs with `nonFatalErrors` feature disabled, we consider - // any error as fatal. - // - // So we do an early return to make the subgraph stop processing blocks. - // - // In this scenario the only entity that is stored/transacted is the PoI, - // all of the others are discarded. - if has_errors && !is_non_fatal_errors_active { - // Only the first error is reported. - return Err(BlockProcessingError::Deterministic(first_error.unwrap())); - } - - let elapsed = start.elapsed().as_secs_f64(); - self.metrics - .subgraph - .block_ops_transaction_duration - .observe(elapsed); + .await + .non_deterministic()?; + block_state + .persisted_data_sources + .extend(persisted_off_chain_data_sources); - block_state_metrics.flush_metrics_to_store( + self.transact_block_state( &logger, - block_ptr, - self.inputs.deployment.id, - )?; - - // To prevent a buggy pending version from replacing a current version, if errors are - // present the subgraph will be unassigned. - let store = &self.inputs.store; - if has_errors && !ENV_VARS.disable_fail_fast && !store.is_deployment_synced() { - store - .unassign_subgraph() - .map_err(|e| BlockProcessingError::Unknown(e.into()))?; - - // Use `Canceled` to avoiding setting the subgraph health to failed, an error was - // just transacted so it will be already be set to unhealthy. - return Err(BlockProcessingError::Canceled); - } + block_ptr.clone(), + firehose_cursor.clone(), + block.timestamp(), + block_state, + proof_of_indexing, + offchain_mods, + processed_offchain_data_sources, + ) + .await?; match needs_restart { true => Ok(Action::Restart), @@ -775,6 +832,37 @@ where } } + /// Refetch the block if it that is needed. Otherwise return the block as is. + async fn refetch_block( + &mut self, + logger: &Logger, + block: &Arc, + firehose_cursor: &FirehoseCursor, + ) -> Result, ProcessingError> { + if !self.inputs.chain.is_refetch_block_required() { + return Ok(block.cheap_clone()); + } + + let cur = firehose_cursor.clone(); + let log = logger.cheap_clone(); + let chain = self.inputs.chain.cheap_clone(); + let block = retry( + "refetch firehose block after dynamic datasource was added", + logger, + ) + .limit(5) + .no_timeout() + .run(move || { + let cur = cur.clone(); + let log = log.cheap_clone(); + let chain = chain.cheap_clone(); + async move { chain.refetch_firehose_block(&log, cur).await } + }) + .await + .non_deterministic()?; + Ok(Arc::new(block)) + } + async fn process_wasm_block( &mut self, proof_of_indexing: &SharedProofOfIndexing, @@ -809,7 +897,7 @@ where fn create_dynamic_data_sources( &mut self, created_data_sources: Vec, - ) -> Result<(Vec>, Vec>), Error> { + ) -> Result<(Vec>, Vec>), ProcessingError> { let mut data_sources = vec![]; let mut runtime_hosts = vec![]; @@ -817,15 +905,15 @@ where let manifest_idx = info .template .manifest_idx() - .ok_or_else(|| anyhow!("Expected template to have an idx"))?; + .ok_or_else(|| anyhow!("Expected template to have an idx")) + .non_deterministic()?; let created_ds_template = self .inputs .templates .iter() .find(|t| t.manifest_idx() == manifest_idx) - .ok_or_else(|| { - anyhow!("Expected to find a template for this dynamic data source") - })?; + .ok_or_else(|| anyhow!("Expected to find a template for this dynamic data source")) + .non_deterministic()?; // Try to instantiate a data source from the template let data_source = { @@ -847,14 +935,15 @@ where warn!(self.logger, "{}", e.to_string()); continue; } - Err(DataSourceCreationError::Unknown(e)) => return Err(e), + Err(DataSourceCreationError::Unknown(e)) => return Err(e).non_deterministic(), } }; // Try to create a runtime host for the data source let host = self .ctx - .add_dynamic_data_source(&self.logger, data_source.clone())?; + .add_dynamic_data_source(&self.logger, data_source.clone()) + .non_deterministic()?; match host { Some(host) => { @@ -882,7 +971,7 @@ where &mut self, start: Instant, block_ptr: BlockPtr, - action: Result, + action: Result, ) -> Result { self.state.skip_ptr_updates_timer = Instant::now(); @@ -934,7 +1023,7 @@ where return Ok(action); } - Err(BlockProcessingError::Canceled) => { + Err(ProcessingError::Canceled) => { debug!(self.logger, "Subgraph block stream shut down cleanly"); return Ok(Action::Stop); } @@ -1072,7 +1161,6 @@ where async fn handle_stream_event( &mut self, event: Option, CancelableError>>, - cancel_handle: &CancelHandle, ) -> Result { let action = match event { Some(Ok(BlockStreamEvent::ProcessWasmBlock( @@ -1088,14 +1176,7 @@ where .stopwatch .start_section(PROCESS_WASM_BLOCK_SECTION_NAME); let res = self - .handle_process_wasm_block( - block_ptr.clone(), - block_time, - data, - handler, - cursor, - cancel_handle, - ) + .handle_process_wasm_block(block_ptr.clone(), block_time, data, handler, cursor) .await; let start = Instant::now(); self.handle_action(start, block_ptr, res).await? @@ -1106,8 +1187,7 @@ where .stream .stopwatch .start_section(PROCESS_BLOCK_SECTION_NAME); - self.handle_process_block(block, cursor, cancel_handle) - .await? + self.handle_process_block(block, cursor).await? } Some(Ok(BlockStreamEvent::Revert(revert_to_ptr, cursor))) => { let _section = self @@ -1119,7 +1199,7 @@ where } // Log and drop the errors from the block_stream // The block stream will continue attempting to produce blocks - Some(Err(e)) => self.handle_err(e, cancel_handle).await?, + Some(Err(e)) => self.handle_err(e).await?, // If the block stream ends, that means that there is no more indexing to do. // Typically block streams produce indefinitely, but tests are an example of finite block streams. None => Action::Stop, @@ -1262,24 +1342,19 @@ trait StreamEventHandler { block_data: Box<[u8]>, handler: String, cursor: FirehoseCursor, - cancel_handle: &CancelHandle, - ) -> Result; + ) -> Result; async fn handle_process_block( &mut self, block: BlockWithTriggers, cursor: FirehoseCursor, - cancel_handle: &CancelHandle, ) -> Result; async fn handle_revert( &mut self, revert_to_ptr: BlockPtr, cursor: FirehoseCursor, ) -> Result; - async fn handle_err( - &mut self, - err: CancelableError, - cancel_handle: &CancelHandle, - ) -> Result; + async fn handle_err(&mut self, err: CancelableError) + -> Result; fn needs_restart(&self, revert_to_ptr: BlockPtr, subgraph_ptr: BlockPtr) -> bool; } @@ -1296,8 +1371,7 @@ where block_data: Box<[u8]>, handler: String, cursor: FirehoseCursor, - cancel_handle: &CancelHandle, - ) -> Result { + ) -> Result { let logger = self.logger.new(o!( "block_number" => format!("{:?}", block_ptr.number), "block_hash" => format!("{}", block_ptr.hash) @@ -1316,7 +1390,7 @@ where // Causality region for onchain triggers. let causality_region = PoICausalityRegion::from_network(&self.inputs.network); - let mut block_state = { + let block_state = { match self .process_wasm_block( &proof_of_indexing, @@ -1332,9 +1406,7 @@ where Ok(block_state) => block_state, // Some form of unknown or non-deterministic error ocurred. - Err(MappingError::Unknown(e)) => { - return Err(BlockProcessingError::Unknown(e).into()) - } + Err(MappingError::Unknown(e)) => return Err(ProcessingError::Unknown(e).into()), Err(MappingError::PossibleReorg(e)) => { info!(logger, "Possible reorg detected, retrying"; @@ -1353,145 +1425,17 @@ where } }; - let has_errors = block_state.has_errors(); - let is_non_fatal_errors_active = self - .inputs - .features - .contains(&SubgraphFeature::NonFatalErrors); - - // Apply entity operations and advance the stream - - // Avoid writing to store if block stream has been canceled - if cancel_handle.is_canceled() { - return Err(BlockProcessingError::Canceled.into()); - } - - if let Some(proof_of_indexing) = proof_of_indexing.into_inner() { - update_proof_of_indexing( - proof_of_indexing, - block_time, - &self.metrics.host.stopwatch, - &mut block_state.entity_cache, - ) - .await?; - } - - let section = self - .metrics - .host - .stopwatch - .start_section("as_modifications"); - let ModificationsAndCache { - modifications: mut mods, - entity_lfu_cache: cache, - evict_stats, - } = block_state - .entity_cache - .as_modifications(block_ptr.number) - .map_err(|e| BlockProcessingError::Unknown(e.into()))?; - section.end(); - - trace!(self.logger, "Entity cache statistics"; - "weight" => evict_stats.new_weight, - "evicted_weight" => evict_stats.evicted_weight, - "count" => evict_stats.new_count, - "evicted_count" => evict_stats.evicted_count, - "stale_update" => evict_stats.stale_update, - "hit_rate" => format!("{:.0}%", evict_stats.hit_rate_pct()), - "accesses" => evict_stats.accesses, - "evict_time_ms" => evict_stats.evict_time.as_millis()); - - // Put the cache back in the state, asserting that the placeholder cache was not used. - assert!(self.state.entity_lfu_cache.is_empty()); - self.state.entity_lfu_cache = cache; - - if !mods.is_empty() { - info!(&logger, "Applying {} entity operation(s)", mods.len()); - } - - let err_count = block_state.deterministic_errors.len(); - for (i, e) in block_state.deterministic_errors.iter().enumerate() { - let message = format!("{:#}", e).replace('\n', "\t"); - error!(&logger, "Subgraph error {}/{}", i + 1, err_count; - "error" => message, - "code" => LogCode::SubgraphSyncingFailure - ); - } - - // Transact entity operations into the store and update the - // subgraph's block stream pointer - let _section = self.metrics.host.stopwatch.start_section("transact_block"); - let start = Instant::now(); - - // If a deterministic error has happened, make the PoI to be the only entity that'll be stored. - if has_errors && !is_non_fatal_errors_active { - let is_poi_entity = - |entity_mod: &EntityModification| entity_mod.key().entity_type.is_poi(); - mods.retain(is_poi_entity); - // Confidence check - assert!( - mods.len() == 1, - "There should be only one PoI EntityModification" - ); - } - - let BlockState { - deterministic_errors, - .. - } = block_state; - - let first_error = deterministic_errors.first().cloned(); - - // We consider a subgraph caught up when it's at most 1 blocks behind the chain head. - let is_caught_up = self.is_caught_up(&block_ptr).await?; - - self.inputs - .store - .transact_block_operations( - block_ptr, - block_time, - cursor, - mods, - &self.metrics.host.stopwatch, - vec![], - deterministic_errors, - vec![], - is_non_fatal_errors_active, - is_caught_up, - ) - .await - .context("Failed to transact block operations")?; - - // For subgraphs with `nonFatalErrors` feature disabled, we consider - // any error as fatal. - // - // So we do an early return to make the subgraph stop processing blocks. - // - // In this scenario the only entity that is stored/transacted is the PoI, - // all of the others are discarded. - if has_errors && !is_non_fatal_errors_active { - // Only the first error is reported. - return Err(BlockProcessingError::Deterministic(first_error.unwrap()).into()); - } - - let elapsed = start.elapsed().as_secs_f64(); - self.metrics - .subgraph - .block_ops_transaction_duration - .observe(elapsed); - - // To prevent a buggy pending version from replacing a current version, if errors are - // present the subgraph will be unassigned. - let store = &self.inputs.store; - if has_errors && !ENV_VARS.disable_fail_fast && !store.is_deployment_synced() { - store - .unassign_subgraph() - .map_err(|e| BlockProcessingError::Unknown(e.into()))?; - - // Use `Canceled` to avoiding setting the subgraph health to failed, an error was - // just transacted so it will be already be set to unhealthy. - return Err(BlockProcessingError::Canceled.into()); - }; + self.transact_block_state( + &logger, + block_ptr.clone(), + cursor.clone(), + block_time, + block_state, + proof_of_indexing, + vec![], + vec![], + ) + .await?; Ok(Action::Continue) } @@ -1500,7 +1444,6 @@ where &mut self, block: BlockWithTriggers, cursor: FirehoseCursor, - cancel_handle: &CancelHandle, ) -> Result { let block_ptr = block.ptr(); self.metrics @@ -1533,7 +1476,7 @@ where let start = Instant::now(); - let res = self.process_block(cancel_handle, block, cursor).await; + let res = self.process_block(block, cursor).await; self.handle_action(start, block_ptr, res).await } @@ -1592,9 +1535,8 @@ where async fn handle_err( &mut self, err: CancelableError, - cancel_handle: &CancelHandle, ) -> Result { - if cancel_handle.is_canceled() { + if self.is_canceled() { debug!(&self.logger, "Subgraph block stream shut down cleanly"); return Ok(Action::Stop); } diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 8b395680e6a..46903185ccf 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -223,6 +223,18 @@ those. copying or grafting should take. This limits how long transactions for such long running operations will be, and therefore helps control bloat in other tables. Value is in seconds and defaults to 180s. +- `GRAPH_STORE_BATCH_TIMEOUT`: How long a batch operation during copying or + grafting is allowed to take at most. This is meant to guard against + batches that are catastrophically big and should be set to a small + multiple of `GRAPH_STORE_BATCH_TARGET_DURATION`, like 10 times that + value, and needs to be at least 2 times that value when set. If this + timeout is hit, the batch size is reset to 1 so we can be sure that + batches stay below `GRAPH_STORE_BATCH_TARGET_DURATION` and the smaller + batch is retried. Value is in seconds and defaults to unlimited. +- `GRAPH_STORE_BATCH_WORKERS`: The number of workers to use for batch + operations. If there are idle connectiosn, each subgraph copy operation + will use up to this many workers to copy tables in parallel. Defaults + to 1 and must be at least 1 - `GRAPH_START_BLOCK`: block hash:block number where the forked subgraph will start indexing at. - `GRAPH_FORK_BASE`: api url for where the graph node will fork from, use `https://api.thegraph.com/subgraphs/id/` for the hosted service. diff --git a/graph/Cargo.toml b/graph/Cargo.toml index 3ea0c0bf349..6547d0281c6 100644 --- a/graph/Cargo.toml +++ b/graph/Cargo.toml @@ -21,10 +21,10 @@ graph_derive = { path = "./derive" } diesel = { workspace = true } diesel_derives = { workspace = true } chrono = "0.4.38" -envconfig = "0.10.0" +envconfig = "0.11.0" Inflector = "0.11.3" isatty = "0.1.9" -reqwest = { version = "0.12.5", features = ["json", "stream", "multipart"] } +reqwest = { version = "0.12.15", features = ["json", "stream", "multipart"] } ethabi = "17.2" hex = "0.4.3" http0 = { version = "0", package = "http" } @@ -48,6 +48,7 @@ serde_derive = { workspace = true } serde_json = { workspace = true } serde_regex = { workspace = true } serde_yaml = { workspace = true } +sha2 = "0.10.8" slog = { version = "2.7.0", features = [ "release_max_level_trace", "max_level_trace", @@ -58,13 +59,13 @@ sqlparser = { workspace = true } # stable-hash = { version = "0.4.2" } stable-hash = { git = "https://github.com/graphprotocol/stable-hash", branch = "main" } stable-hash_legacy = { git = "https://github.com/graphprotocol/stable-hash", branch = "old", package = "stable-hash", doc = false } -strum_macros = "0.26.4" +strum_macros = "0.27.1" slog-async = "2.5.0" slog-envlogger = "2.1.0" slog-term = "2.7.0" -petgraph = "0.6.5" +petgraph = "0.8.1" tiny-keccak = "1.5.0" -tokio = { version = "1.38.0", features = [ +tokio = { version = "1.44.2", features = [ "time", "sync", "macros", @@ -75,9 +76,9 @@ tokio = { version = "1.38.0", features = [ tokio-stream = { version = "0.1.15", features = ["sync"] } tokio-retry = "0.3.0" toml = "0.8.8" -url = "2.5.2" +url = "2.5.4" prometheus = "0.13.4" -priority-queue = "2.0.3" +priority-queue = "2.3.1" tonic = { workspace = true } prost = { workspace = true } prost-types = { workspace = true } diff --git a/graph/derive/Cargo.toml b/graph/derive/Cargo.toml index 3598e9022a6..f43691ba463 100644 --- a/graph/derive/Cargo.toml +++ b/graph/derive/Cargo.toml @@ -14,7 +14,7 @@ proc-macro = true [dependencies] syn = { workspace = true } quote = "1.0" -proc-macro2 = "1.0.85" +proc-macro2 = "1.0.94" heck = "0.5" [dev-dependencies] diff --git a/graph/src/blockchain/block_stream.rs b/graph/src/blockchain/block_stream.rs index b9f602d802c..99f2dabd1ac 100644 --- a/graph/src/blockchain/block_stream.rs +++ b/graph/src/blockchain/block_stream.rs @@ -1024,7 +1024,7 @@ mod test { let mut stream = BufferedBlockStream::spawn_from_stream(buffer_size, stream) .map_err(CancelableError::Error) - .cancelable(&guard, || Err(CancelableError::Cancel)); + .cancelable(&guard); let mut blocks = HashSet::::new(); let mut count = 0; diff --git a/graph/src/components/metrics/registry.rs b/graph/src/components/metrics/registry.rs index e010d3a89fa..93cf51b3bd1 100644 --- a/graph/src/components/metrics/registry.rs +++ b/graph/src/components/metrics/registry.rs @@ -3,7 +3,7 @@ use std::sync::{Arc, RwLock}; use prometheus::IntGauge; use prometheus::{labels, Histogram, IntCounterVec}; -use slog::info; +use slog::debug; use crate::components::metrics::{counter_with_labels, gauge_with_labels}; use crate::prelude::Collector; @@ -133,7 +133,7 @@ impl MetricsRegistry { let mut result = self.registry.register(collector.clone()); if matches!(result, Err(PrometheusError::AlreadyReg)) { - info!(logger, "Resolving duplicate metric registration"); + debug!(logger, "Resolving duplicate metric registration"); // Since the current metric is a duplicate, // we can use it to unregister the previous registration. @@ -144,7 +144,6 @@ impl MetricsRegistry { match result { Ok(()) => { - info!(logger, "Successfully registered a new metric"); self.registered_metrics.inc(); } Err(err) => { diff --git a/graph/src/components/server/query.rs b/graph/src/components/server/query.rs index 6bf83ffbf76..4a9fe1557c2 100644 --- a/graph/src/components/server/query.rs +++ b/graph/src/components/server/query.rs @@ -28,7 +28,7 @@ impl From for ServerError { impl From for ServerError { fn from(e: StoreError) -> Self { match e { - StoreError::ConstraintViolation(s) => ServerError::InternalError(s), + StoreError::InternalError(s) => ServerError::InternalError(s), _ => ServerError::ClientError(e.to_string()), } } diff --git a/graph/src/components/store/err.rs b/graph/src/components/store/err.rs index 3aa65c3ecb2..b8743658030 100644 --- a/graph/src/components/store/err.rs +++ b/graph/src/components/store/err.rs @@ -1,18 +1,18 @@ use super::{BlockNumber, DeploymentSchemaVersion}; +use crate::prelude::DeploymentHash; use crate::prelude::QueryExecutionError; -use crate::{data::store::EntityValidationError, prelude::DeploymentHash}; use anyhow::{anyhow, Error}; use diesel::result::Error as DieselError; use thiserror::Error; use tokio::task::JoinError; +pub type StoreResult = Result; + #[derive(Error, Debug)] pub enum StoreError { #[error("store error: {0:#}")] Unknown(Error), - #[error("Entity validation failed: {0}")] - EntityValidationError(EntityValidationError), #[error( "tried to set entity of type `{0}` with ID \"{1}\" but an entity of type `{2}`, \ which has an interface in common with `{0}`, exists with the same ID" @@ -24,8 +24,6 @@ pub enum StoreError { UnknownTable(String), #[error("entity type '{0}' does not have an attribute '{0}'")] UnknownAttribute(String, String), - #[error("malformed directive '{0}'")] - MalformedDirective(String), #[error("query execution failed: {0}")] QueryExecutionError(String), #[error("Child filter nesting not supported by value `{0}`: `{1}`")] @@ -40,8 +38,8 @@ pub enum StoreError { /// An internal error where we expected the application logic to enforce /// some constraint, e.g., that subgraph names are unique, but found that /// constraint to not hold - #[error("internal constraint violated: {0}")] - ConstraintViolation(String), + #[error("internal error: {0}")] + InternalError(String), #[error("deployment not found: {0}")] DeploymentNotFound(String), #[error("shard not found: {0} (this usually indicates a misconfiguration)")] @@ -54,8 +52,6 @@ pub enum StoreError { Canceled, #[error("database unavailable")] DatabaseUnavailable, - #[error("database disabled")] - DatabaseDisabled, #[error("subgraph forking failed: {0}")] ForkFailure(String), #[error("subgraph writer poisoned by previous error")] @@ -74,16 +70,20 @@ pub enum StoreError { UnsupportedFilter(String, String), #[error("writing {0} entities at block {1} failed: {2} Query: {3}")] WriteFailure(String, BlockNumber, String, String), + #[error("database query timed out")] + StatementTimeout, + #[error("database constraint violated: {0}")] + ConstraintViolation(String), } -// Convenience to report a constraint violation +// Convenience to report an internal error #[macro_export] -macro_rules! constraint_violation { +macro_rules! internal_error { ($msg:expr) => {{ - $crate::prelude::StoreError::ConstraintViolation(format!("{}", $msg)) + $crate::prelude::StoreError::InternalError(format!("{}", $msg)) }}; ($fmt:expr, $($arg:tt)*) => {{ - $crate::prelude::StoreError::ConstraintViolation(format!($fmt, $($arg)*)) + $crate::prelude::StoreError::InternalError(format!($fmt, $($arg)*)) }} } @@ -94,7 +94,6 @@ impl Clone for StoreError { fn clone(&self) -> Self { match self { Self::Unknown(arg0) => Self::Unknown(anyhow!("{}", arg0)), - Self::EntityValidationError(arg0) => Self::EntityValidationError(arg0.clone()), Self::ConflictingId(arg0, arg1, arg2) => { Self::ConflictingId(arg0.clone(), arg1.clone(), arg2.clone()) } @@ -103,7 +102,6 @@ impl Clone for StoreError { Self::UnknownAttribute(arg0, arg1) => { Self::UnknownAttribute(arg0.clone(), arg1.clone()) } - Self::MalformedDirective(arg0) => Self::MalformedDirective(arg0.clone()), Self::QueryExecutionError(arg0) => Self::QueryExecutionError(arg0.clone()), Self::ChildFilterNestingNotSupportedError(arg0, arg1) => { Self::ChildFilterNestingNotSupportedError(arg0.clone(), arg1.clone()) @@ -112,14 +110,13 @@ impl Clone for StoreError { Self::DuplicateBlockProcessing(arg0, arg1) => { Self::DuplicateBlockProcessing(arg0.clone(), arg1.clone()) } - Self::ConstraintViolation(arg0) => Self::ConstraintViolation(arg0.clone()), + Self::InternalError(arg0) => Self::InternalError(arg0.clone()), Self::DeploymentNotFound(arg0) => Self::DeploymentNotFound(arg0.clone()), Self::UnknownShard(arg0) => Self::UnknownShard(arg0.clone()), Self::FulltextSearchNonDeterministic => Self::FulltextSearchNonDeterministic, Self::FulltextColumnMissingConfig => Self::FulltextColumnMissingConfig, Self::Canceled => Self::Canceled, Self::DatabaseUnavailable => Self::DatabaseUnavailable, - Self::DatabaseDisabled => Self::DatabaseDisabled, Self::ForkFailure(arg0) => Self::ForkFailure(arg0.clone()), Self::Poisoned => Self::Poisoned, Self::WriterPanic(arg0) => Self::Unknown(anyhow!("writer panic: {}", arg0)), @@ -133,25 +130,37 @@ impl Clone for StoreError { Self::WriteFailure(arg0, arg1, arg2, arg3) => { Self::WriteFailure(arg0.clone(), arg1.clone(), arg2.clone(), arg3.clone()) } + Self::StatementTimeout => Self::StatementTimeout, + Self::ConstraintViolation(arg0) => Self::ConstraintViolation(arg0.clone()), } } } impl StoreError { - fn database_unavailable(e: &DieselError) -> Option { - // When the error is caused by a closed connection, treat the error - // as 'database unavailable'. When this happens during indexing, the - // indexing machinery will retry in that case rather than fail the - // subgraph - if let DieselError::DatabaseError(_, info) = e { - if info - .message() - .contains("server closed the connection unexpectedly") - { - return Some(Self::DatabaseUnavailable); - } + pub fn from_diesel_error(e: &DieselError) -> Option { + const CONN_CLOSE: &str = "server closed the connection unexpectedly"; + const STMT_TIMEOUT: &str = "canceling statement due to statement timeout"; + const UNIQUE_CONSTR: &str = "duplicate key value violates unique constraint"; + let DieselError::DatabaseError(_, info) = e else { + return None; + }; + if info.message().contains(CONN_CLOSE) { + // When the error is caused by a closed connection, treat the error + // as 'database unavailable'. When this happens during indexing, the + // indexing machinery will retry in that case rather than fail the + // subgraph + Some(StoreError::DatabaseUnavailable) + } else if info.message().contains(STMT_TIMEOUT) { + Some(StoreError::StatementTimeout) + } else if info.message().contains(UNIQUE_CONSTR) { + let msg = match info.details() { + Some(details) => format!("{}: {}", info.message(), details.replace('\n', " ")), + None => info.message().to_string(), + }; + Some(StoreError::ConstraintViolation(msg)) + } else { + None } - None } pub fn write_failure( @@ -160,19 +169,52 @@ impl StoreError { block: BlockNumber, query: String, ) -> Self { - match Self::database_unavailable(&error) { - Some(e) => return e, - None => StoreError::WriteFailure(entity.to_string(), block, error.to_string(), query), + Self::from_diesel_error(&error).unwrap_or_else(|| { + StoreError::WriteFailure(entity.to_string(), block, error.to_string(), query) + }) + } + + pub fn is_deterministic(&self) -> bool { + use StoreError::*; + + // This classification tries to err on the side of caution. If in doubt, + // assume the error is non-deterministic. + match self { + // deterministic errors + ConflictingId(_, _, _) + | UnknownField(_, _) + | UnknownTable(_) + | UnknownAttribute(_, _) + | InvalidIdentifier(_) + | UnsupportedFilter(_, _) + | ConstraintViolation(_) => true, + + // non-deterministic errors + Unknown(_) + | QueryExecutionError(_) + | ChildFilterNestingNotSupportedError(_, _) + | DuplicateBlockProcessing(_, _) + | InternalError(_) + | DeploymentNotFound(_) + | UnknownShard(_) + | FulltextSearchNonDeterministic + | FulltextColumnMissingConfig + | Canceled + | DatabaseUnavailable + | ForkFailure(_) + | Poisoned + | WriterPanic(_) + | UnsupportedDeploymentSchemaVersion(_) + | PruneFailure(_) + | WriteFailure(_, _, _, _) + | StatementTimeout => false, } } } impl From for StoreError { fn from(e: DieselError) -> Self { - match Self::database_unavailable(&e) { - Some(e) => return e, - None => StoreError::Unknown(e.into()), - } + Self::from_diesel_error(&e).unwrap_or_else(|| StoreError::Unknown(e.into())) } } diff --git a/graph/src/components/store/mod.rs b/graph/src/components/store/mod.rs index b64f8b35964..ab30caeda75 100644 --- a/graph/src/components/store/mod.rs +++ b/graph/src/components/store/mod.rs @@ -3,11 +3,16 @@ mod err; mod traits; pub mod write; +use diesel::deserialize::FromSql; +use diesel::pg::Pg; +use diesel::serialize::{Output, ToSql}; +use diesel::sql_types::Integer; +use diesel_derives::{AsExpression, FromSqlRow}; pub use entity_cache::{EntityCache, EntityLfuCache, GetScope, ModificationsAndCache}; use slog::Logger; pub use super::subgraph::Entity; -pub use err::StoreError; +pub use err::{StoreError, StoreResult}; use itertools::Itertools; use strum_macros::Display; pub use traits::*; @@ -26,13 +31,13 @@ use std::time::Duration; use crate::blockchain::{Block, BlockHash, BlockPtr}; use crate::cheap_clone::CheapClone; use crate::components::store::write::EntityModification; -use crate::constraint_violation; use crate::data::store::scalar::Bytes; use crate::data::store::{Id, IdList, Value}; use crate::data::value::Word; use crate::data_source::CausalityRegion; use crate::derive::CheapClone; use crate::env::ENV_VARS; +use crate::internal_error; use crate::prelude::{s, Attribute, DeploymentHash, ValueType}; use crate::schema::{ast as sast, EntityKey, EntityType, InputSchema}; use crate::util::stats::MovingStats; @@ -691,7 +696,20 @@ pub struct StoredDynamicDataSource { /// identifier only has meaning in the context of a specific instance of /// graph-node. Only store code should ever construct or consume it; all /// other code passes it around as an opaque token. -#[derive(Copy, Clone, CheapClone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[derive( + Copy, + Clone, + CheapClone, + Debug, + Serialize, + Deserialize, + PartialEq, + Eq, + Hash, + AsExpression, + FromSqlRow, +)] +#[diesel(sql_type = Integer)] pub struct DeploymentId(pub i32); impl Display for DeploymentId { @@ -706,6 +724,19 @@ impl DeploymentId { } } +impl FromSql for DeploymentId { + fn from_sql(bytes: diesel::pg::PgValue) -> diesel::deserialize::Result { + let id = >::from_sql(bytes)?; + Ok(DeploymentId(id)) + } +} + +impl ToSql for DeploymentId { + fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, Pg>) -> diesel::serialize::Result { + >::to_sql(&self.0, out) + } +} + /// A unique identifier for a deployment that specifies both its external /// identifier (`hash`) and its unique internal identifier (`id`) which /// ensures we are talking about a unique location for the deployment's data @@ -972,6 +1003,9 @@ pub struct PruneRequest { pub earliest_block: BlockNumber, /// The last block that contains final entities not subject to a reorg pub final_block: BlockNumber, + /// The first block for which the deployment contained entities when the + /// request was made + pub first_block: BlockNumber, /// The latest block, i.e., the subgraph head pub latest_block: BlockNumber, /// Use the rebuild strategy when removing more than this fraction of @@ -1000,17 +1034,17 @@ impl PruneRequest { let rebuild_threshold = ENV_VARS.store.rebuild_threshold; let delete_threshold = ENV_VARS.store.delete_threshold; if rebuild_threshold < 0.0 || rebuild_threshold > 1.0 { - return Err(constraint_violation!( + return Err(internal_error!( "the copy threshold must be between 0 and 1 but is {rebuild_threshold}" )); } if delete_threshold < 0.0 || delete_threshold > 1.0 { - return Err(constraint_violation!( + return Err(internal_error!( "the delete threshold must be between 0 and 1 but is {delete_threshold}" )); } if history_blocks <= reorg_threshold { - return Err(constraint_violation!( + return Err(internal_error!( "the deployment {} needs to keep at least {} blocks \ of history and can't be pruned to only {} blocks of history", deployment, @@ -1019,7 +1053,7 @@ impl PruneRequest { )); } if first_block >= latest_block { - return Err(constraint_violation!( + return Err(internal_error!( "the earliest block {} must be before the latest block {}", first_block, latest_block @@ -1035,6 +1069,7 @@ impl PruneRequest { earliest_block, final_block, latest_block, + first_block, rebuild_threshold, delete_threshold, }) diff --git a/graph/src/components/store/traits.rs b/graph/src/components/store/traits.rs index 27cb3768e2c..73cb22269fe 100644 --- a/graph/src/components/store/traits.rs +++ b/graph/src/components/store/traits.rs @@ -655,7 +655,7 @@ pub trait QueryStore: Send + Sync { block_hash: &BlockHash, ) -> Result, Option)>, StoreError>; - fn wait_stats(&self) -> Result; + fn wait_stats(&self) -> PoolWaitStats; /// Find the current state for the subgraph deployment `id` and /// return details about it needed for executing queries @@ -668,7 +668,7 @@ pub trait QueryStore: Send + Sync { fn network_name(&self) -> &str; /// A permit should be acquired before starting query execution. - async fn query_permit(&self) -> Result; + async fn query_permit(&self) -> QueryPermit; /// Report the name of the shard in which the subgraph is stored. This /// should only be used for reporting and monitoring @@ -683,7 +683,7 @@ pub trait QueryStore: Send + Sync { #[async_trait] pub trait StatusStore: Send + Sync + 'static { /// A permit should be acquired before starting query execution. - async fn query_permit(&self) -> Result; + async fn query_permit(&self) -> QueryPermit; fn status(&self, filter: status::Filter) -> Result, StoreError>; diff --git a/graph/src/components/store/write.rs b/graph/src/components/store/write.rs index aa56fdcc910..2c470fd32be 100644 --- a/graph/src/components/store/write.rs +++ b/graph/src/components/store/write.rs @@ -5,10 +5,10 @@ use crate::{ blockchain::{block_stream::FirehoseCursor, BlockPtr, BlockTime}, cheap_clone::CheapClone, components::subgraph::Entity, - constraint_violation, data::{store::Id, subgraph::schema::SubgraphError}, data_source::CausalityRegion, derive::CacheWeight, + internal_error, util::cache_weight::CacheWeight, }; @@ -182,7 +182,7 @@ impl EntityModification { match self { Insert { end, .. } | Overwrite { end, .. } => { if end.is_some() { - return Err(constraint_violation!( + return Err(internal_error!( "can not clamp {:?} to block {}", self, block @@ -191,7 +191,7 @@ impl EntityModification { *end = Some(block); } Remove { .. } => { - return Err(constraint_violation!( + return Err(internal_error!( "can not clamp block range for removal of {:?} to {}", self, block @@ -219,7 +219,7 @@ impl EntityModification { end, }), Remove { key, .. } => { - return Err(constraint_violation!( + return Err(internal_error!( "a remove for {}[{}] can not be converted into an insert", entity_type, key.entity_id @@ -330,7 +330,7 @@ impl RowGroup { if !is_forward { // unwrap: we only get here when `last()` is `Some` let last_block = self.rows.last().map(|emod| emod.block()).unwrap(); - return Err(constraint_violation!( + return Err(internal_error!( "we already have a modification for block {}, can not append {:?}", last_block, emod @@ -412,7 +412,7 @@ impl RowGroup { self.rows.push(row); } EntityModification::Overwrite { .. } | EntityModification::Remove { .. } => { - return Err(constraint_violation!( + return Err(internal_error!( "immutable entity type {} only allows inserts, not {:?}", self.entity_type, row @@ -426,7 +426,7 @@ impl RowGroup { use EntityModification::*; if row.block() <= prev_row.block() { - return Err(constraint_violation!( + return Err(internal_error!( "can not append operations that go backwards from {:?} to {:?}", prev_row, row @@ -439,17 +439,22 @@ impl RowGroup { // clamping an old version match (&*prev_row, &row) { (Insert { end: None, .. } | Overwrite { end: None, .. }, Insert { .. }) - | (Remove { .. }, Overwrite { .. } | Remove { .. }) + | (Remove { .. }, Overwrite { .. }) | ( Insert { end: Some(_), .. } | Overwrite { end: Some(_), .. }, Overwrite { .. } | Remove { .. }, ) => { - return Err(constraint_violation!( + return Err(internal_error!( "impossible combination of entity operations: {:?} and then {:?}", prev_row, row )) } + (Remove { .. }, Remove { .. }) => { + // Ignore the new row, since prev_row is already a + // delete. This can happen when subgraphs delete + // entities without checking if they even exist + } ( Insert { end: Some(_), .. } | Overwrite { end: Some(_), .. } | Remove { .. }, Insert { .. }, @@ -476,7 +481,7 @@ impl RowGroup { fn append(&mut self, group: RowGroup) -> Result<(), StoreError> { if self.entity_type != group.entity_type { - return Err(constraint_violation!( + return Err(internal_error!( "Can not append a row group for {} to a row group for {}", group.entity_type, self.entity_type @@ -705,7 +710,7 @@ impl Batch { fn append_inner(&mut self, mut batch: Batch) -> Result<(), StoreError> { if batch.block_ptr.number <= self.block_ptr.number { - return Err(constraint_violation!("Batches must go forward. Can't append a batch with block pointer {} to one with block pointer {}", batch.block_ptr, self.block_ptr)); + return Err(internal_error!("Batches must go forward. Can't append a batch with block pointer {} to one with block pointer {}", batch.block_ptr, self.block_ptr)); } self.block_ptr = batch.block_ptr; diff --git a/graph/src/components/subgraph/instance.rs b/graph/src/components/subgraph/instance.rs index 11b473a878d..c6d3f0c7e85 100644 --- a/graph/src/components/subgraph/instance.rs +++ b/graph/src/components/subgraph/instance.rs @@ -131,10 +131,6 @@ impl BlockState { write_capacity_remaining.saturating_sub(other.write_capacity_remaining); } - pub fn has_errors(&self) -> bool { - !self.deterministic_errors.is_empty() - } - pub fn has_created_data_sources(&self) -> bool { assert!(!self.in_handler); !self.created_data_sources.is_empty() diff --git a/graph/src/components/subgraph/proof_of_indexing/online.rs b/graph/src/components/subgraph/proof_of_indexing/online.rs index d47f08b0a8f..ebf7a65e2f9 100644 --- a/graph/src/components/subgraph/proof_of_indexing/online.rs +++ b/graph/src/components/subgraph/proof_of_indexing/online.rs @@ -9,6 +9,7 @@ use crate::{ prelude::{debug, BlockNumber, DeploymentHash, Logger, ENV_VARS}, util::stable_hash_glue::AsBytes, }; +use sha2::{Digest, Sha256}; use stable_hash::{fast::FastStableHasher, FieldAddress, StableHash, StableHasher}; use stable_hash_legacy::crypto::{Blake3SeqNo, SetHasher}; use stable_hash_legacy::prelude::{ @@ -31,6 +32,8 @@ enum Hashers { Legacy(SetHasher), } +const STABLE_HASH_LEN: usize = 32; + impl Hashers { fn new(version: ProofOfIndexingVersion) -> Self { match version { @@ -132,9 +135,14 @@ impl BlockEventStream { } Hashers::Fast(mut digest) => { if let Some(prev) = prev { - let prev = prev - .try_into() - .expect("Expected valid fast stable hash representation"); + let prev = if prev.len() == STABLE_HASH_LEN { + prev.try_into() + .expect("Expected valid fast stable hash representation") + } else { + let mut hasher = Sha256::new(); + hasher.update(prev); + hasher.finalize().into() + }; let prev = FastStableHasher::from_bytes(prev); digest.mixin(&prev); } diff --git a/graph/src/data/query/error.rs b/graph/src/data/query/error.rs index 65fc1bcd259..d02b1c9c4bd 100644 --- a/graph/src/data/query/error.rs +++ b/graph/src/data/query/error.rs @@ -74,7 +74,7 @@ pub enum QueryExecutionError { DeploymentNotFound(String), IdMissing, IdNotString, - ConstraintViolation(String), + InternalError(String), } impl QueryExecutionError { @@ -132,7 +132,7 @@ impl QueryExecutionError { | DeploymentNotFound(_) | IdMissing | IdNotString - | ConstraintViolation(_) => false, + | InternalError(_) => false, } } } @@ -274,7 +274,7 @@ impl fmt::Display for QueryExecutionError { DeploymentNotFound(id_or_name) => write!(f, "deployment `{}` does not exist", id_or_name), IdMissing => write!(f, "entity is missing an `id` attribute"), IdNotString => write!(f, "entity `id` attribute is not a string"), - ConstraintViolation(msg) => write!(f, "internal constraint violated: {}", msg), + InternalError(msg) => write!(f, "internal error: {}", msg), } } } @@ -306,7 +306,7 @@ impl From for QueryExecutionError { StoreError::ChildFilterNestingNotSupportedError(attr, filter) => { QueryExecutionError::ChildFilterNestingNotSupportedError(attr, filter) } - StoreError::ConstraintViolation(msg) => QueryExecutionError::ConstraintViolation(msg), + StoreError::InternalError(msg) => QueryExecutionError::InternalError(msg), _ => QueryExecutionError::StoreError(CloneableAnyhowError(Arc::new(e.into()))), } } diff --git a/graph/src/data/query/trace.rs b/graph/src/data/query/trace.rs index cf2d153dca4..256c9cdeaf6 100644 --- a/graph/src/data/query/trace.rs +++ b/graph/src/data/query/trace.rs @@ -118,11 +118,8 @@ impl Trace { } } - pub fn query_done(&mut self, dur: Duration, permit: &Result) { - let permit_dur = match permit { - Ok(permit) => permit.wait, - Err(_) => Duration::from_millis(0), - }; + pub fn query_done(&mut self, dur: Duration, permit: &QueryPermit) { + let permit_dur = permit.wait; match self { Trace::None => { /* nothing to do */ } Trace::Root { .. } => { diff --git a/graph/src/data/store/id.rs b/graph/src/data/store/id.rs index 64be7545621..9726141e2d6 100644 --- a/graph/src/data/store/id.rs +++ b/graph/src/data/store/id.rs @@ -20,9 +20,9 @@ use crate::{ use crate::{ components::store::StoreError, - constraint_violation, data::value::Word, derive::CacheWeight, + internal_error, prelude::QueryExecutionError, runtime::gas::{Gas, GasSizeOf}, }; @@ -367,7 +367,7 @@ impl IdList { ids.push(id); Ok(ids) } - _ => Err(constraint_violation!( + _ => Err(internal_error!( "expected string id, got {}: {}", id.id_type(), id, @@ -381,7 +381,7 @@ impl IdList { ids.push(id); Ok(ids) } - _ => Err(constraint_violation!( + _ => Err(internal_error!( "expected bytes id, got {}: {}", id.id_type(), id, @@ -395,7 +395,7 @@ impl IdList { ids.push(id); Ok(ids) } - _ => Err(constraint_violation!( + _ => Err(internal_error!( "expected int8 id, got {}: {}", id.id_type(), id, @@ -423,7 +423,7 @@ impl IdList { ids.push(Word::from(id)); Ok(ids) } - _ => Err(constraint_violation!( + _ => Err(internal_error!( "expected string id, got {}: 0x{}", id.id_type(), id, @@ -438,7 +438,7 @@ impl IdList { ids.push(scalar::Bytes::from(id)); Ok(ids) } - _ => Err(constraint_violation!( + _ => Err(internal_error!( "expected bytes id, got {}: {}", id.id_type(), id, @@ -452,7 +452,7 @@ impl IdList { ids.push(id); Ok(ids) } - _ => Err(constraint_violation!( + _ => Err(internal_error!( "expected int8 id, got {}: {}", id.id_type(), id, @@ -533,7 +533,7 @@ impl IdList { ids.push(id); Ok(()) } - (list, id) => Err(constraint_violation!( + (list, id) => Err(internal_error!( "expected id of type {}, but got {}[{}]", list.id_type(), id.id_type(), diff --git a/graph/src/data/subgraph/mod.rs b/graph/src/data/subgraph/mod.rs index 3e7bc7061ab..77c8ba67d36 100644 --- a/graph/src/data/subgraph/mod.rs +++ b/graph/src/data/subgraph/mod.rs @@ -504,9 +504,9 @@ impl Graft { // The graft point must be at least `reorg_threshold` blocks // behind the subgraph head so that a reorg can not affect the // data that we copy for grafting - (Some(ptr), true) if self.block + ENV_VARS.reorg_threshold > ptr.number => Err(GraftBaseInvalid(format!( + (Some(ptr), true) if self.block + ENV_VARS.reorg_threshold() > ptr.number => Err(GraftBaseInvalid(format!( "failed to graft onto `{}` at block {} since it's only at block {} which is within the reorg threshold of {} blocks", - self.base, self.block, ptr.number, ENV_VARS.reorg_threshold + self.base, self.block, ptr.number, ENV_VARS.reorg_threshold() ))), // If the base deployment is failed *and* the `graft.block` is not // less than the `base.block`, the graft shouldn't be permitted. diff --git a/graph/src/data_source/subgraph.rs b/graph/src/data_source/subgraph.rs index 9e120a4c82c..87b44e66174 100644 --- a/graph/src/data_source/subgraph.rs +++ b/graph/src/data_source/subgraph.rs @@ -239,7 +239,16 @@ impl UnresolvedDataSource { None => { return Err(anyhow!("Entity {} not found in source manifest", entity)); } - Some(TypeKind::Object) => {} + Some(TypeKind::Object) => { + // Check if the entity is immutable + let entity_type = source_manifest.schema.entity_type(entity)?; + if !entity_type.is_immutable() { + return Err(anyhow!( + "Entity {} is not immutable and cannot be used as a mapping entity", + entity + )); + } + } } } Ok(()) @@ -253,23 +262,84 @@ impl UnresolvedDataSource { let source_raw = resolver .cat(logger, &self.source.address.to_ipfs_link()) .await - .context("Failed to resolve source subgraph manifest")?; + .context(format!( + "Failed to resolve source subgraph [{}] manifest", + self.source.address, + ))?; - let source_raw: serde_yaml::Mapping = serde_yaml::from_slice(&source_raw) - .context("Failed to parse source subgraph manifest as YAML")?; + let source_raw: serde_yaml::Mapping = + serde_yaml::from_slice(&source_raw).context(format!( + "Failed to parse source subgraph [{}] manifest as YAML", + self.source.address + ))?; let deployment_hash = self.source.address.clone(); let source_manifest = UnresolvedSubgraphManifest::::parse(deployment_hash, source_raw) - .context("Failed to parse source subgraph manifest")?; + .context(format!( + "Failed to parse source subgraph [{}] manifest", + self.source.address + ))?; source_manifest .resolve(resolver, logger, LATEST_VERSION.clone()) .await - .context("Failed to resolve source subgraph manifest") + .context(format!( + "Failed to resolve source subgraph [{}] manifest", + self.source.address + )) .map(Arc::new) } + /// Recursively verifies that all grafts in the chain meet the minimum spec version requirement for a subgraph source + async fn verify_graft_chain_sourcable( + manifest: Arc>, + resolver: &Arc, + logger: &Logger, + graft_chain: &mut Vec, + ) -> Result<(), Error> { + // Add current manifest to graft chain + graft_chain.push(manifest.id.to_string()); + + // Check if current manifest meets spec version requirement + if manifest.spec_version < SPEC_VERSION_1_3_0 { + return Err(anyhow!( + "Subgraph with a spec version {} is not supported for a subgraph source, minimum supported version is {}. Graft chain: {}", + manifest.spec_version, + SPEC_VERSION_1_3_0, + graft_chain.join(" -> ") + )); + } + + // If there's a graft, recursively verify it + if let Some(graft) = &manifest.graft { + let graft_raw = resolver + .cat(logger, &graft.base.to_ipfs_link()) + .await + .context("Failed to resolve graft base manifest")?; + + let graft_raw: serde_yaml::Mapping = serde_yaml::from_slice(&graft_raw) + .context("Failed to parse graft base manifest as YAML")?; + + let graft_manifest = + UnresolvedSubgraphManifest::::parse(graft.base.clone(), graft_raw) + .context("Failed to parse graft base manifest")? + .resolve(resolver, logger, LATEST_VERSION.clone()) + .await + .context("Failed to resolve graft base manifest")?; + + Box::pin(Self::verify_graft_chain_sourcable( + Arc::new(graft_manifest), + resolver, + logger, + graft_chain, + )) + .await?; + } + + Ok(()) + } + #[allow(dead_code)] pub(super) async fn resolve( self, @@ -286,31 +356,33 @@ impl UnresolvedDataSource { let kind = self.kind.clone(); let source_manifest = self.resolve_source_manifest::(resolver, logger).await?; let source_spec_version = &source_manifest.spec_version; - - if source_manifest - .data_sources - .iter() - .any(|ds| matches!(ds, crate::data_source::DataSource::Subgraph(_))) - { - return Err(anyhow!("Nested subgraph data sources are not supported.")); - } - if source_spec_version < &SPEC_VERSION_1_3_0 { return Err(anyhow!( - "Source subgraph manifest spec version {} is not supported, minimum supported version is {}", + "Source subgraph [{}] manifest spec version {} is not supported, minimum supported version is {}", + self.source.address, source_spec_version, SPEC_VERSION_1_3_0 )); } - let pruning_enabled = match source_manifest.indexer_hints.as_ref() { - None => false, - Some(hints) => hints.prune.is_some(), - }; + // Verify the entire graft chain meets spec version requirements + let mut graft_chain = Vec::new(); + Self::verify_graft_chain_sourcable( + source_manifest.clone(), + resolver, + logger, + &mut graft_chain, + ) + .await?; - if pruning_enabled { + if source_manifest + .data_sources + .iter() + .any(|ds| matches!(ds, crate::data_source::DataSource::Subgraph(_))) + { return Err(anyhow!( - "Pruning is enabled for source subgraph, which is not supported" + "Nested subgraph data sources [{}] are not supported.", + self.name )); } diff --git a/graph/src/env/mappings.rs b/graph/src/env/mappings.rs index 41499056b5b..c1bbb8565e5 100644 --- a/graph/src/env/mappings.rs +++ b/graph/src/env/mappings.rs @@ -62,6 +62,13 @@ pub struct EnvVarsMapping { /// eth calls before running triggers; instead eth calls happen when /// mappings call `ethereum.call`. Off by default. pub disable_declared_calls: bool, + + /// Set by the flag `GRAPH_STORE_ERRORS_ARE_NON_DETERMINISTIC`. Off by + /// default. Setting this to `true` will revert to the old behavior of + /// treating all store errors as nondeterministic. This is a temporary + /// measure and can be removed after 2025-07-01, once we are sure the + /// new behavior works as intended. + pub store_errors_are_nondeterministic: bool, } // This does not print any values avoid accidentally leaking any sensitive env vars @@ -89,6 +96,7 @@ impl From for EnvVarsMapping { ipfs_request_limit: x.ipfs_request_limit, allow_non_deterministic_ipfs: x.allow_non_deterministic_ipfs.0, disable_declared_calls: x.disable_declared_calls.0, + store_errors_are_nondeterministic: x.store_errors_are_nondeterministic.0, } } } @@ -123,4 +131,6 @@ pub struct InnerMappingHandlers { allow_non_deterministic_ipfs: EnvVarBoolean, #[envconfig(from = "GRAPH_DISABLE_DECLARED_CALLS", default = "false")] disable_declared_calls: EnvVarBoolean, + #[envconfig(from = "GRAPH_STORE_ERRORS_ARE_NON_DETERMINISTIC", default = "false")] + store_errors_are_nondeterministic: EnvVarBoolean, } diff --git a/graph/src/env/mod.rs b/graph/src/env/mod.rs index 4383ce17b5c..eff0ebea16e 100644 --- a/graph/src/env/mod.rs +++ b/graph/src/env/mod.rs @@ -15,9 +15,16 @@ use crate::{ runtime::gas::CONST_MAX_GAS_PER_HANDLER, }; +#[cfg(debug_assertions)] +use std::sync::Mutex; + lazy_static! { pub static ref ENV_VARS: EnvVars = EnvVars::from_env().unwrap(); } +#[cfg(debug_assertions)] +lazy_static! { + pub static ref TEST_WITH_NO_REORG: Mutex = Mutex::new(false); +} /// Panics if: /// - The value is not UTF8. @@ -181,7 +188,7 @@ pub struct EnvVars { pub static_filters_threshold: usize, /// Set by the environment variable `ETHEREUM_REORG_THRESHOLD`. The default /// value is 250 blocks. - pub reorg_threshold: BlockNumber, + reorg_threshold: BlockNumber, /// The time to wait between polls when using polling block ingestor. /// The value is set by `ETHERUM_POLLING_INTERVAL` in millis and the /// default is 1000. @@ -247,24 +254,17 @@ pub struct EnvVars { /// Set by the environment variable `GRAPH_FIREHOSE_FETCH_BLOCK_TIMEOUT_SECS`. /// The default value is 60 seconds. pub firehose_block_fetch_timeout: u64, + /// Set by the environment variable `GRAPH_FIREHOSE_BLOCK_BATCH_SIZE`. + /// The default value is 10. + pub firehose_block_batch_size: usize, } impl EnvVars { - pub fn from_env() -> Result { + pub fn from_env() -> Result { let inner = Inner::init_from_env()?; let graphql = InnerGraphQl::init_from_env()?.into(); let mapping_handlers = InnerMappingHandlers::init_from_env()?.into(); - let store = InnerStore::init_from_env()?.into(); - - // The default reorganization (reorg) threshold is set to 250. - // For testing purposes, we need to set this threshold to 0 because: - // 1. Many tests involve reverting blocks. - // 2. Blocks cannot be reverted below the reorg threshold. - // Therefore, during tests, we want to set the reorg threshold to 0. - let reorg_threshold = - inner - .reorg_threshold - .unwrap_or_else(|| if cfg!(debug_assertions) { 0 } else { 250 }); + let store = InnerStore::init_from_env()?.try_into()?; Ok(Self { graphql, @@ -319,13 +319,15 @@ impl EnvVars { external_http_base_url: inner.external_http_base_url, external_ws_base_url: inner.external_ws_base_url, static_filters_threshold: inner.static_filters_threshold, - reorg_threshold, + reorg_threshold: inner.reorg_threshold, ingestor_polling_interval: Duration::from_millis(inner.ingestor_polling_interval), subgraph_settings: inner.subgraph_settings, prefer_substreams_block_streams: inner.prefer_substreams_block_streams, enable_dips_metrics: inner.enable_dips_metrics.0, history_blocks_override: inner.history_blocks_override, - min_history_blocks: inner.min_history_blocks.unwrap_or(2 * reorg_threshold), + min_history_blocks: inner + .min_history_blocks + .unwrap_or(2 * inner.reorg_threshold), dips_metrics_object_store_url: inner.dips_metrics_object_store_url, section_map: inner.section_map, firehose_grpc_max_decode_size_mb: inner.firehose_grpc_max_decode_size_mb, @@ -339,6 +341,7 @@ impl EnvVars { block_write_capacity: inner.block_write_capacity.0, firehose_block_fetch_retry_limit: inner.firehose_block_fetch_retry_limit, firehose_block_fetch_timeout: inner.firehose_block_fetch_timeout, + firehose_block_batch_size: inner.firehose_block_fetch_batch_size, }) } @@ -371,6 +374,23 @@ impl EnvVars { .filter(|x| !x.is_empty()) .collect() } + #[cfg(debug_assertions)] + pub fn reorg_threshold(&self) -> i32 { + // The default reorganization (reorg) threshold is set to 250. + // For testing purposes, we need to set this threshold to 0 because: + // 1. Many tests involve reverting blocks. + // 2. Blocks cannot be reverted below the reorg threshold. + // Therefore, during tests, we want to set the reorg threshold to 0. + if *TEST_WITH_NO_REORG.lock().unwrap() { + 0 + } else { + self.reorg_threshold + } + } + #[cfg(not(debug_assertions))] + pub fn reorg_threshold(&self) -> i32 { + self.reorg_threshold + } } impl Default for EnvVars { @@ -469,8 +489,8 @@ struct Inner { #[envconfig(from = "GRAPH_STATIC_FILTERS_THRESHOLD", default = "10000")] static_filters_threshold: usize, // JSON-RPC specific. - #[envconfig(from = "ETHEREUM_REORG_THRESHOLD")] - reorg_threshold: Option, + #[envconfig(from = "ETHEREUM_REORG_THRESHOLD", default = "250")] + reorg_threshold: BlockNumber, #[envconfig(from = "ETHEREUM_POLLING_INTERVAL", default = "1000")] ingestor_polling_interval: u64, #[envconfig(from = "GRAPH_EXPERIMENTAL_SUBGRAPH_SETTINGS")] @@ -506,6 +526,8 @@ struct Inner { firehose_block_fetch_retry_limit: usize, #[envconfig(from = "GRAPH_FIREHOSE_FETCH_BLOCK_TIMEOUT_SECS", default = "60")] firehose_block_fetch_timeout: u64, + #[envconfig(from = "GRAPH_FIREHOSE_FETCH_BLOCK_BATCH_SIZE", default = "10")] + firehose_block_fetch_batch_size: usize, } #[derive(Clone, Debug)] diff --git a/graph/src/env/store.rs b/graph/src/env/store.rs index 3b4e50ec87d..1c768f45bed 100644 --- a/graph/src/env/store.rs +++ b/graph/src/env/store.rs @@ -81,6 +81,22 @@ pub struct EnvVarsStore { /// The default is 180s. pub batch_target_duration: Duration, + /// Cancel and reset a batch copy operation if it takes longer than + /// this. Set by `GRAPH_STORE_BATCH_TIMEOUT`. Unlimited by default + pub batch_timeout: Option, + + /// The number of workers to use for batch operations. If there are idle + /// connections, each subgraph copy operation will use up to this many + /// workers to copy tables in parallel. Defaults to 1 and must be at + /// least 1 + pub batch_workers: usize, + + /// How long to wait to get an additional connection for a batch worker. + /// This should just be big enough to allow the connection pool to + /// establish a connection. Set by `GRAPH_STORE_BATCH_WORKER_WAIT`. + /// Value is in ms and defaults to 2000ms + pub batch_worker_wait: Duration, + /// Prune tables where we will remove at least this fraction of entity /// versions by rebuilding the table. Set by /// `GRAPH_STORE_HISTORY_REBUILD_THRESHOLD`. The default is 0.5 @@ -95,6 +111,9 @@ pub struct EnvVarsStore { /// blocks) than its history limit. The default value is 1.2 and the /// value must be at least 1.01 pub history_slack_factor: f64, + /// For how many prune runs per deployment to keep status information. + /// Set by `GRAPH_STORE_HISTORY_KEEP_STATUS`. The default is 5 + pub prune_keep_history: usize, /// How long to accumulate changes into a batch before a write has to /// happen. Set by the environment variable /// `GRAPH_STORE_WRITE_BATCH_DURATION` in seconds. The default is 300s. @@ -113,14 +132,6 @@ pub struct EnvVarsStore { pub use_brin_for_all_query_types: bool, /// Temporary env var to disable certain lookups in the chain store pub disable_block_cache_for_lookup: bool, - /// Temporary env var to fall back to the old broken way of determining - /// the time of the last rollup from the POI table instead of the new - /// way that fixes - /// https://github.com/graphprotocol/graph-node/issues/5530 Remove this - /// and all code that is dead as a consequence once this has been vetted - /// sufficiently, probably after 2024-12-01 - /// Defaults to `false`, i.e. using the new fixed behavior - pub last_rollup_from_poi: bool, /// Safety switch to increase the number of columns used when /// calculating the chunk size in `InsertQuery::chunk_size`. This can be /// used to work around Postgres errors complaining 'number of @@ -138,9 +149,11 @@ impl fmt::Debug for EnvVarsStore { } } -impl From for EnvVarsStore { - fn from(x: InnerStore) -> Self { - Self { +impl TryFrom for EnvVarsStore { + type Error = anyhow::Error; + + fn try_from(x: InnerStore) -> Result { + let vars = Self { chain_head_watcher_timeout: Duration::from_secs(x.chain_head_watcher_timeout_in_secs), query_stats_refresh_interval: Duration::from_secs( x.query_stats_refresh_interval_in_secs, @@ -168,18 +181,32 @@ impl From for EnvVarsStore { connection_idle_timeout: Duration::from_secs(x.connection_idle_timeout_in_secs), write_queue_size: x.write_queue_size, batch_target_duration: Duration::from_secs(x.batch_target_duration_in_secs), + batch_timeout: x.batch_timeout_in_secs.map(Duration::from_secs), + batch_workers: x.batch_workers, + batch_worker_wait: Duration::from_millis(x.batch_worker_wait), rebuild_threshold: x.rebuild_threshold.0, delete_threshold: x.delete_threshold.0, history_slack_factor: x.history_slack_factor.0, + prune_keep_history: x.prune_keep_status, write_batch_duration: Duration::from_secs(x.write_batch_duration_in_secs), write_batch_size: x.write_batch_size * 1_000, create_gin_indexes: x.create_gin_indexes, use_brin_for_all_query_types: x.use_brin_for_all_query_types, disable_block_cache_for_lookup: x.disable_block_cache_for_lookup, - last_rollup_from_poi: x.last_rollup_from_poi, insert_extra_cols: x.insert_extra_cols, fdw_fetch_size: x.fdw_fetch_size, + }; + if let Some(timeout) = vars.batch_timeout { + if timeout < 2 * vars.batch_target_duration { + bail!( + "GRAPH_STORE_BATCH_TIMEOUT must be greater than 2*GRAPH_STORE_BATCH_TARGET_DURATION" + ); + } + } + if vars.batch_workers < 1 { + bail!("GRAPH_STORE_BATCH_WORKERS must be at least 1"); } + Ok(vars) } } @@ -222,12 +249,20 @@ pub struct InnerStore { write_queue_size: usize, #[envconfig(from = "GRAPH_STORE_BATCH_TARGET_DURATION", default = "180")] batch_target_duration_in_secs: u64, + #[envconfig(from = "GRAPH_STORE_BATCH_TIMEOUT")] + batch_timeout_in_secs: Option, + #[envconfig(from = "GRAPH_STORE_BATCH_WORKERS", default = "1")] + batch_workers: usize, + #[envconfig(from = "GRAPH_STORE_BATCH_WORKER_WAIT", default = "2000")] + batch_worker_wait: u64, #[envconfig(from = "GRAPH_STORE_HISTORY_REBUILD_THRESHOLD", default = "0.5")] rebuild_threshold: ZeroToOneF64, #[envconfig(from = "GRAPH_STORE_HISTORY_DELETE_THRESHOLD", default = "0.05")] delete_threshold: ZeroToOneF64, #[envconfig(from = "GRAPH_STORE_HISTORY_SLACK_FACTOR", default = "1.2")] history_slack_factor: HistorySlackF64, + #[envconfig(from = "GRAPH_STORE_HISTORY_KEEP_STATUS", default = "5")] + prune_keep_status: usize, #[envconfig(from = "GRAPH_STORE_WRITE_BATCH_DURATION", default = "300")] write_batch_duration_in_secs: u64, #[envconfig(from = "GRAPH_STORE_WRITE_BATCH_SIZE", default = "10000")] @@ -238,11 +273,9 @@ pub struct InnerStore { use_brin_for_all_query_types: bool, #[envconfig(from = "GRAPH_STORE_DISABLE_BLOCK_CACHE_FOR_LOOKUP", default = "false")] disable_block_cache_for_lookup: bool, - #[envconfig(from = "GRAPH_STORE_LAST_ROLLUP_FROM_POI", default = "false")] - last_rollup_from_poi: bool, #[envconfig(from = "GRAPH_STORE_INSERT_EXTRA_COLS", default = "0")] insert_extra_cols: usize, - #[envconfig(from = "GRAPH_STORE_FDW_FETCH_SIZE", default = "10000")] + #[envconfig(from = "GRAPH_STORE_FDW_FETCH_SIZE", default = "1000")] fdw_fetch_size: usize, } diff --git a/graph/src/ext/futures.rs b/graph/src/ext/futures.rs index c25550a426f..7c5eb0fc96e 100644 --- a/graph/src/ext/futures.rs +++ b/graph/src/ext/futures.rs @@ -12,42 +12,45 @@ use std::time::Duration; /// /// Created by calling `cancelable` extension method. /// Can be canceled through the corresponding `CancelGuard`. -pub struct Cancelable { +pub struct Cancelable { inner: T, cancel_receiver: Fuse>, - on_cancel: C, } -impl Cancelable { +impl Cancelable { pub fn get_mut(&mut self) -> &mut T { &mut self.inner } } /// It's not viable to use `select` directly, so we do a custom implementation. -impl S::Item + Unpin> Stream for Cancelable { - type Item = S::Item; +impl> + Unpin, R, E: Display + Debug> Stream for Cancelable { + type Item = Result>; fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { // Error if the stream was canceled by dropping the sender. match self.cancel_receiver.poll_unpin(cx) { Poll::Ready(Ok(_)) => unreachable!(), - Poll::Ready(Err(_)) => Poll::Ready(Some((self.on_cancel)())), - Poll::Pending => Pin::new(&mut self.inner).poll_next(cx), + Poll::Ready(Err(_)) => Poll::Ready(Some(Err(CancelableError::Cancel))), + Poll::Pending => Pin::new(&mut self.inner) + .poll_next(cx) + .map_err(|x| CancelableError::Error(x)), } } } -impl F::Output + Unpin> Future for Cancelable { - type Output = F::Output; +impl> + Unpin, R, E: Display + Debug> Future for Cancelable { + type Output = Result>; fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { // Error if the future was canceled by dropping the sender. // `canceled` is fused so we may ignore `Ok`s. match self.cancel_receiver.poll_unpin(cx) { Poll::Ready(Ok(_)) => unreachable!(), - Poll::Ready(Err(_)) => Poll::Ready((self.on_cancel)()), - Poll::Pending => Pin::new(&mut self.inner).poll(cx), + Poll::Ready(Err(_)) => Poll::Ready(Err(CancelableError::Cancel)), + Poll::Pending => Pin::new(&mut self.inner) + .poll(cx) + .map_err(|x| CancelableError::Error(x)), } } } @@ -209,25 +212,16 @@ pub trait StreamExtension: Stream + Sized { /// When `cancel` is called on a `CancelGuard` or it is dropped, /// `Cancelable` receives an error. /// - fn cancelable Self::Item>( - self, - guard: &impl Canceler, - on_cancel: C, - ) -> Cancelable; + fn cancelable(self, guard: &impl Canceler) -> Cancelable; } impl StreamExtension for S { - fn cancelable S::Item>( - self, - guard: &impl Canceler, - on_cancel: C, - ) -> Cancelable { + fn cancelable(self, guard: &impl Canceler) -> Cancelable { let (canceler, cancel_receiver) = oneshot::channel(); guard.add_cancel_sender(canceler); Cancelable { inner: self, cancel_receiver: cancel_receiver.fuse(), - on_cancel, } } } @@ -237,27 +231,18 @@ pub trait FutureExtension: Future + Sized { /// `Cancelable` receives an error. /// /// `on_cancel` is called to make an error value upon cancelation. - fn cancelable Self::Output>( - self, - guard: &impl Canceler, - on_cancel: C, - ) -> Cancelable; + fn cancelable(self, guard: &impl Canceler) -> Cancelable; fn timeout(self, dur: Duration) -> tokio::time::Timeout; } impl FutureExtension for F { - fn cancelable F::Output>( - self, - guard: &impl Canceler, - on_cancel: C, - ) -> Cancelable { + fn cancelable(self, guard: &impl Canceler) -> Cancelable { let (canceler, cancel_receiver) = oneshot::channel(); guard.add_cancel_sender(canceler); Cancelable { inner: self, cancel_receiver: cancel_receiver.fuse(), - on_cancel, } } diff --git a/graph/src/firehose/endpoints.rs b/graph/src/firehose/endpoints.rs index 825f3ddbd20..448eb845496 100644 --- a/graph/src/firehose/endpoints.rs +++ b/graph/src/firehose/endpoints.rs @@ -13,8 +13,9 @@ use crate::{ prelude::{anyhow, debug, DeploymentHash}, substreams_rpc, }; +use anyhow::Context; use async_trait::async_trait; -use futures03::StreamExt; +use futures03::{StreamExt, TryStreamExt}; use http::uri::{Scheme, Uri}; use itertools::Itertools; use slog::{error, info, trace, Logger}; @@ -193,9 +194,14 @@ impl FirehoseEndpoint { let endpoint_builder = match uri.scheme().unwrap_or(&Scheme::HTTP).as_str() { "http" => Channel::builder(uri), - "https" => Channel::builder(uri) - .tls_config(ClientTlsConfig::new()) - .expect("TLS config on this host is invalid"), + "https" => { + let mut tls = ClientTlsConfig::new(); + tls = tls.with_native_roots(); + + Channel::builder(uri) + .tls_config(tls) + .expect("TLS config on this host is invalid") + } _ => panic!("invalid uri scheme for firehose endpoint"), }; @@ -443,15 +449,47 @@ impl FirehoseEndpoint { } } - pub async fn get_block_by_number( - &self, - number: u64, + pub async fn get_block_by_ptr_with_retry( + self: Arc, + ptr: &BlockPtr, logger: &Logger, ) -> Result where M: prost::Message + BlockchainBlock + Default + 'static, { - debug!( + let retry_log_message = format!("get_block_by_ptr for block {}", ptr); + let endpoint = self.cheap_clone(); + let logger = logger.cheap_clone(); + let ptr_for_retry = ptr.clone(); + + retry(retry_log_message, &logger) + .limit(ENV_VARS.firehose_block_fetch_retry_limit) + .timeout_secs(ENV_VARS.firehose_block_fetch_timeout) + .run(move || { + let endpoint = endpoint.cheap_clone(); + let logger = logger.cheap_clone(); + let ptr = ptr_for_retry.clone(); + async move { + endpoint + .get_block_by_ptr::(&ptr, &logger) + .await + .context(format!( + "Failed to fetch block by ptr {} from firehose", + ptr + )) + } + }) + .await + .map_err(move |e| { + anyhow::anyhow!("Failed to fetch block by ptr {} from firehose: {}", ptr, e) + }) + } + + async fn get_block_by_number(&self, number: u64, logger: &Logger) -> Result + where + M: prost::Message + BlockchainBlock + Default + 'static, + { + trace!( logger, "Connecting to firehose to retrieve block for number {}", number; "provider" => self.provider.as_str(), @@ -473,6 +511,44 @@ impl FirehoseEndpoint { } } + pub async fn get_block_by_number_with_retry( + self: Arc, + number: u64, + logger: &Logger, + ) -> Result + where + M: prost::Message + BlockchainBlock + Default + 'static, + { + let retry_log_message = format!("get_block_by_number for block {}", number); + let endpoint = self.cheap_clone(); + let logger = logger.cheap_clone(); + + retry(retry_log_message, &logger) + .limit(ENV_VARS.firehose_block_fetch_retry_limit) + .timeout_secs(ENV_VARS.firehose_block_fetch_timeout) + .run(move || { + let endpoint = endpoint.cheap_clone(); + let logger = logger.cheap_clone(); + async move { + endpoint + .get_block_by_number::(number, &logger) + .await + .context(format!( + "Failed to fetch block by number {} from firehose", + number + )) + } + }) + .await + .map_err(|e| { + anyhow::anyhow!( + "Failed to fetch block by number {} from firehose: {}", + number, + e + ) + }) + } + pub async fn load_blocks_by_numbers( self: Arc, numbers: Vec, @@ -481,51 +557,24 @@ impl FirehoseEndpoint { where M: prost::Message + BlockchainBlock + Default + 'static, { - let mut blocks = Vec::with_capacity(numbers.len()); - - for number in numbers { - let provider_name = self.provider.as_str(); + let logger = logger.clone(); + let logger_for_error = logger.clone(); + + let blocks_stream = futures03::stream::iter(numbers) + .map(move |number| { + let e = self.cheap_clone(); + let l = logger.clone(); + async move { e.get_block_by_number_with_retry::(number, &l).await } + }) + .buffered(ENV_VARS.firehose_block_batch_size); - trace!( - logger, - "Loading block for block number {}", number; - "provider" => provider_name, + let blocks = blocks_stream.try_collect::>().await.map_err(|e| { + error!( + logger_for_error, + "Failed to load blocks from firehose: {}", e; ); - - let retry_log_message = format!("get_block_by_number for block {}", number); - let endpoint_for_retry = self.cheap_clone(); - - let logger_for_retry = logger.clone(); - let logger_for_error = logger.clone(); - - let block = retry(retry_log_message, &logger_for_retry) - .limit(ENV_VARS.firehose_block_fetch_retry_limit) - .timeout_secs(ENV_VARS.firehose_block_fetch_timeout) - .run(move || { - let e = endpoint_for_retry.cheap_clone(); - let l = logger_for_retry.clone(); - async move { e.get_block_by_number::(number, &l).await } - }) - .await; - - match block { - Ok(block) => { - blocks.push(block); - } - Err(e) => { - error!( - logger_for_error, - "Failed to load block number {}: {}", number, e; - "provider" => provider_name, - ); - return Err(anyhow::format_err!( - "failed to load block number {}: {}", - number, - e - )); - } - } - } + anyhow::format_err!("failed to load blocks from firehose: {}", e) + })?; Ok(blocks) } diff --git a/graph/src/task_spawn.rs b/graph/src/task_spawn.rs index 09055ad5381..dd1477bb1c8 100644 --- a/graph/src/task_spawn.rs +++ b/graph/src/task_spawn.rs @@ -57,10 +57,11 @@ pub fn block_on(f: impl Future03) -> T { } /// Spawns a thread with access to the tokio runtime. Panics if the thread cannot be spawned. -pub fn spawn_thread( - name: impl Into, - f: impl 'static + FnOnce() + Send, -) -> std::thread::JoinHandle<()> { +pub fn spawn_thread(name: impl Into, f: F) -> std::thread::JoinHandle +where + F: 'static + FnOnce() -> R + Send, + R: 'static + Send, +{ let conf = std::thread::Builder::new().name(name.into()); let runtime = tokio::runtime::Handle::current(); conf.spawn(move || { diff --git a/graph/src/util/futures.rs b/graph/src/util/futures.rs index d742457dcd1..7c49806c53a 100644 --- a/graph/src/util/futures.rs +++ b/graph/src/util/futures.rs @@ -1,5 +1,7 @@ use crate::ext::futures::FutureExtension; use futures03::{Future, FutureExt, TryFutureExt}; +use lazy_static::lazy_static; +use regex::Regex; use slog::{debug, trace, warn, Logger}; use std::fmt::Debug; use std::marker::PhantomData; @@ -61,6 +63,7 @@ pub fn retry(operation_name: impl ToString, logger: &Logger) -> RetryConfi log_after: 1, warn_after: 10, limit: RetryConfigProperty::Unknown, + redact_log_urls: false, phantom_item: PhantomData, phantom_error: PhantomData, } @@ -75,6 +78,7 @@ pub struct RetryConfig { limit: RetryConfigProperty, phantom_item: PhantomData, phantom_error: PhantomData, + redact_log_urls: bool, } impl RetryConfig @@ -125,6 +129,12 @@ where self } + /// Redact alphanumeric URLs from log messages. + pub fn redact_log_urls(mut self, redact_log_urls: bool) -> Self { + self.redact_log_urls = redact_log_urls; + self + } + /// Set how long (in seconds) to wait for an attempt to complete before giving up on that /// attempt. pub fn timeout_secs(self, timeout_secs: u64) -> RetryConfigWithTimeout { @@ -173,6 +183,7 @@ where let log_after = self.inner.log_after; let warn_after = self.inner.warn_after; let limit_opt = self.inner.limit.unwrap(&operation_name, "limit"); + let redact_log_urls = self.inner.redact_log_urls; let timeout = self.timeout; trace!(logger, "Run with retry: {}", operation_name); @@ -184,6 +195,7 @@ where log_after, warn_after, limit_opt, + redact_log_urls, move || { try_it() .timeout(timeout) @@ -214,6 +226,7 @@ impl RetryConfigNoTimeout { let log_after = self.inner.log_after; let warn_after = self.inner.warn_after; let limit_opt = self.inner.limit.unwrap(&operation_name, "limit"); + let redact_log_urls = self.inner.redact_log_urls; trace!(logger, "Run with retry: {}", operation_name); @@ -224,6 +237,7 @@ impl RetryConfigNoTimeout { log_after, warn_after, limit_opt, + redact_log_urls, // No timeout, so all errors are inner errors move || try_it().map_err(TimeoutError::Inner), ) @@ -265,6 +279,7 @@ fn run_retry( log_after: u64, warn_after: u64, limit_opt: Option, + redact_log_urls: bool, mut try_it_with_timeout: F, ) -> impl Future>> + Send where @@ -311,25 +326,38 @@ where // If needs retry if condition.check(&result) { + let result_str = || { + if redact_log_urls { + lazy_static! { + static ref RE: Regex = + Regex::new(r#"https?://[a-zA-Z0-9\-\._:/\?#&=]+"#).unwrap(); + } + let e = format!("{result:?}"); + RE.replace_all(&e, "[REDACTED]").into_owned() + } else { + format!("{result:?}") + } + }; + if attempt_count >= warn_after { // This looks like it would be nice to de-duplicate, but if we try // to use log! slog complains about requiring a const for the log level // See also b05e1594-e408-4047-aefb-71fc60d70e8f warn!( logger, - "Trying again after {} failed (attempt #{}) with result {:?}", + "Trying again after {} failed (attempt #{}) with result {}", &operation_name, attempt_count, - result + result_str(), ); } else if attempt_count >= log_after { // See also b05e1594-e408-4047-aefb-71fc60d70e8f debug!( logger, - "Trying again after {} failed (attempt #{}) with result {:?}", + "Trying again after {} failed (attempt #{}) with result {}", &operation_name, attempt_count, - result + result_str(), ); } diff --git a/graph/src/util/ogive.rs b/graph/src/util/ogive.rs index 476bfd76ce8..29938b03b17 100644 --- a/graph/src/util/ogive.rs +++ b/graph/src/util/ogive.rs @@ -1,6 +1,6 @@ use std::ops::RangeInclusive; -use crate::{constraint_violation, prelude::StoreError}; +use crate::{internal_error, prelude::StoreError}; /// A helper to deal with cumulative histograms, also known as ogives. This /// implementation is restricted to histograms where each bin has the same @@ -19,7 +19,7 @@ use crate::{constraint_violation, prelude::StoreError}; /// more fun to say. pub struct Ogive { /// The breakpoints of the piecewise linear function - points: Vec, + points: Vec, /// The size of each bin; the linear piece from `points[i]` to /// `points[i+1]` rises by this much bin_size: f64, @@ -37,9 +37,7 @@ impl Ogive { /// and deduplicated, i.e., they don't have to be in ascending order. pub fn from_equi_histogram(mut points: Vec, total: usize) -> Result { if points.is_empty() { - return Err(constraint_violation!( - "histogram must have at least one point" - )); + return Err(internal_error!("histogram must have at least one point")); } points.sort_unstable(); @@ -48,7 +46,6 @@ impl Ogive { let bins = points.len() - 1; let bin_size = total as f64 / bins as f64; let range = points[0]..=points[bins]; - let points = points.into_iter().map(|p| p as f64).collect(); Ok(Self { points, bin_size, @@ -92,7 +89,6 @@ impl Ogive { fn interval_start(&self, point: i64) -> Result { self.check_in_range(point)?; - let point = point as f64; let idx = self .points .iter() @@ -104,16 +100,22 @@ impl Ogive { /// Return the value of the ogive at `point`, i.e., `f(point)`. It is an /// error if `point` is outside the range of points of this ogive. + /// + /// If `i` is such that + /// `points[i] <= point < points[i+1]`, then + /// ```text + /// f(point) = i * bin_size + (point - points[i]) / (points[i+1] - points[i]) * bin_size + /// ``` + // See the comment on `inverse` for numerical considerations fn value(&self, point: i64) -> Result { if self.points.len() == 1 { return Ok(*self.range.end()); } let idx = self.interval_start(point)?; - let bin_size = self.bin_size as f64; let (a, b) = (self.points[idx], self.points[idx + 1]); - let point = point as f64; - let value = (idx as f64 + (point - a) / (b - a)) * bin_size; + let offset = (point - a) as f64 / (b - a) as f64; + let value = (idx as f64 + offset) * self.bin_size; Ok(value as i64) } @@ -121,24 +123,44 @@ impl Ogive { /// It is an error if `value` is negative. If `value` is greater than /// the total count of the ogive, the maximum point of the ogive is /// returned. + /// + /// For `points[j] <= v < points[j+1]`, the value of `g(v)` is + /// ```text + /// g(v) = (1-lambda)*points[j] + lambda * points[j+1] + /// ``` + /// where `lambda = (v - j * bin_size) / bin_size` + /// + // Note that in the definition of `lambda`, the numerator is + // `v.rem_euclid(bin_size)` + // + // Numerical consideration: in these calculations, we need to be careful + // to never convert one of the points directly to f64 since they can be + // so large that the conversion from i64 to f64 loses precision. That + // loss of precision can cause the convex combination of `points[j]` and + // `points[j+1]` above to lie outside of that interval when `(points[j] + // as f64) as i64 < points[j]` + // + // We therefore try to only convert differences between points to f64 + // which are much smaller. fn inverse(&self, value: i64) -> Result { - let value = value as f64; - if value < 0.0 { - return Err(constraint_violation!("value {} can not be negative", value)); + if value < 0 { + return Err(internal_error!("value {} can not be negative", value)); } - let idx = (value / self.bin_size) as usize; - if idx >= self.points.len() - 1 { + let j = (value / self.bin_size as i64) as usize; + if j >= self.points.len() - 1 { return Ok(*self.range.end()); } - let (a, b) = (self.points[idx] as f64, self.points[idx + 1] as f64); - let lambda = (value - idx as f64 * self.bin_size) / self.bin_size; - let x = (1.0 - lambda) * a + lambda * b; + let (a, b) = (self.points[j], self.points[j + 1]); + // This is the same calculation as in the comment above, but + // rewritten to be more friendly to lossy calculations with f64 + let offset = (value as f64).rem_euclid(self.bin_size) * (b - a) as f64; + let x = a + (offset / self.bin_size) as i64; Ok(x as i64) } fn check_in_range(&self, point: i64) -> Result<(), StoreError> { if !self.range.contains(&point) { - return Err(constraint_violation!( + return Err(internal_error!( "point {} is outside of the range [{}, {}]", point, self.range.start(), diff --git a/graphql/src/execution/resolver.rs b/graphql/src/execution/resolver.rs index ca59e401dfc..0074eb124d8 100644 --- a/graphql/src/execution/resolver.rs +++ b/graphql/src/execution/resolver.rs @@ -18,7 +18,7 @@ use super::Query; pub trait Resolver: Sized + Send + Sync + 'static { const CACHEABLE: bool; - async fn query_permit(&self) -> Result; + async fn query_permit(&self) -> QueryPermit; /// Prepare for executing a query by prefetching as much data as possible fn prefetch( diff --git a/graphql/src/introspection/resolver.rs b/graphql/src/introspection/resolver.rs index 0f67b717c5a..765b0399695 100644 --- a/graphql/src/introspection/resolver.rs +++ b/graphql/src/introspection/resolver.rs @@ -356,7 +356,7 @@ impl Resolver for IntrospectionResolver { // see `fn as_introspection_context`, so this value is irrelevant. const CACHEABLE: bool = false; - async fn query_permit(&self) -> Result { + async fn query_permit(&self) -> QueryPermit { unreachable!() } diff --git a/graphql/src/runner.rs b/graphql/src/runner.rs index 96f30e8bc9d..d2f0bc9c96c 100644 --- a/graphql/src/runner.rs +++ b/graphql/src/runner.rs @@ -143,7 +143,7 @@ where )?; self.load_manager .decide( - &store.wait_stats().map_err(QueryExecutionError::from)?, + &store.wait_stats(), store.shard(), store.deployment_id(), query.shape_hash, diff --git a/graphql/src/store/prefetch.rs b/graphql/src/store/prefetch.rs index 33f0b67452b..95f51d51944 100644 --- a/graphql/src/store/prefetch.rs +++ b/graphql/src/store/prefetch.rs @@ -632,7 +632,7 @@ impl<'a> Loader<'a> { let object_type = input_schema .object_or_aggregation(&object_type.name, parent_interval) .ok_or_else(|| { - vec![QueryExecutionError::ConstraintViolation(format!( + vec![QueryExecutionError::InternalError(format!( "the type `{}`(interval {}) is not an object type", object_type.name, parent_interval diff --git a/graphql/src/store/resolver.rs b/graphql/src/store/resolver.rs index 82c40420fa6..8f5eaaccbd1 100644 --- a/graphql/src/store/resolver.rs +++ b/graphql/src/store/resolver.rs @@ -256,8 +256,8 @@ impl StoreResolver { impl Resolver for StoreResolver { const CACHEABLE: bool = true; - async fn query_permit(&self) -> Result { - self.store.query_permit().await.map_err(Into::into) + async fn query_permit(&self) -> QueryPermit { + self.store.query_permit().await } fn prefetch( @@ -327,7 +327,7 @@ impl Resolver for StoreResolver { None => { let child0_id = child_id(&children[0]); let child1_id = child_id(&children[1]); - QueryExecutionError::ConstraintViolation(format!( + QueryExecutionError::InternalError(format!( "expected only one child for {}.{} but got {}. One child has id {}, another has id {}", object_type.name(), field.name, children.len(), child0_id, child1_id diff --git a/node/Cargo.toml b/node/Cargo.toml index ee6411fc87c..444b18784fc 100644 --- a/node/Cargo.toml +++ b/node/Cargo.toml @@ -19,7 +19,7 @@ clap.workspace = true git-testament = "0.2" itertools = { workspace = true } lazy_static = "1.5.0" -url = "2.5.2" +url = "2.5.4" graph = { path = "../graph" } graph-core = { path = "../core" } graph-chain-arweave = { path = "../chain/arweave" } diff --git a/node/src/bin/manager.rs b/node/src/bin/manager.rs index 20cf93d94df..81c794485d4 100644 --- a/node/src/bin/manager.rs +++ b/node/src/bin/manager.rs @@ -8,7 +8,7 @@ use graph::components::network_provider::ChainName; use graph::endpoint::EndpointMetrics; use graph::env::ENV_VARS; use graph::log::logger_with_levels; -use graph::prelude::{MetricsRegistry, BLOCK_NUMBER_MAX}; +use graph::prelude::{BlockNumber, MetricsRegistry, BLOCK_NUMBER_MAX}; use graph::{data::graphql::load_manager::LoadManager, prelude::chrono, prometheus::Registry}; use graph::{ prelude::{ @@ -26,14 +26,13 @@ use graph_node::network_setup::Networks; use graph_node::{ manager::deployment::DeploymentSearch, store_builder::StoreBuilder, MetricsContext, }; -use graph_store_postgres::connection_pool::PoolCoordinator; -use graph_store_postgres::ChainStore; use graph_store_postgres::{ - connection_pool::ConnectionPool, BlockStore, NotificationSender, Shard, Store, SubgraphStore, - SubscriptionManager, PRIMARY_SHARD, + BlockStore, ChainStore, ConnectionPool, NotificationSender, PoolCoordinator, Shard, Store, + SubgraphStore, SubscriptionManager, PRIMARY_SHARD, }; use itertools::Itertools; use lazy_static::lazy_static; +use std::env; use std::str::FromStr; use std::{collections::HashMap, num::ParseIntError, sync::Arc, time::Duration}; const VERSION_LABEL_KEY: &str = "version"; @@ -298,35 +297,13 @@ pub enum Command { #[clap(subcommand)] Index(IndexCommand), - /// Prune a deployment + /// Prune subgraphs by removing old entity versions /// /// Keep only entity versions that are needed to respond to queries at /// block heights that are within `history` blocks of the subgraph head; /// all other entity versions are removed. - /// - /// Unless `--once` is given, this setting is permanent and the subgraph - /// will periodically be pruned to remove history as the subgraph head - /// moves forward. - Prune { - /// The deployment to prune (see `help info`) - deployment: DeploymentSearch, - /// Prune by rebuilding tables when removing more than this fraction - /// of history. Defaults to GRAPH_STORE_HISTORY_REBUILD_THRESHOLD - #[clap(long, short)] - rebuild_threshold: Option, - /// Prune by deleting when removing more than this fraction of - /// history but less than rebuild_threshold. Defaults to - /// GRAPH_STORE_HISTORY_DELETE_THRESHOLD - #[clap(long, short)] - delete_threshold: Option, - /// How much history to keep in blocks. Defaults to - /// GRAPH_MIN_HISTORY_BLOCKS - #[clap(long, short = 'y')] - history: Option, - /// Prune only this once - #[clap(long, short)] - once: bool, - }, + #[clap(subcommand)] + Prune(PruneCommand), /// General database management #[clap(subcommand)] @@ -586,6 +563,19 @@ pub enum ChainCommand { #[clap(value_parser = clap::builder::NonEmptyStringValueParser::new())] chain_name: String, }, + + /// Ingest a block into the block cache. + /// + /// This will overwrite any blocks we may already have in the block + /// cache, and can therefore be used to get rid of duplicate blocks in + /// the block cache as well as making sure that a certain block is in + /// the cache + Ingest { + /// The name of the chain + name: String, + /// The block number to ingest + number: BlockNumber, + }, } #[derive(Clone, Debug, Subcommand)] @@ -682,6 +672,67 @@ pub enum StatsCommand { }, } +#[derive(Clone, Debug, Subcommand)] +pub enum PruneCommand { + /// Prune a deployment in the foreground + /// + /// Unless `--once` is given, this setting is permanent and the subgraph + /// will periodically be pruned to remove history as the subgraph head + /// moves forward. + Run { + /// The deployment to prune (see `help info`) + deployment: DeploymentSearch, + /// Prune by rebuilding tables when removing more than this fraction + /// of history. Defaults to GRAPH_STORE_HISTORY_REBUILD_THRESHOLD + #[clap(long, short)] + rebuild_threshold: Option, + /// Prune by deleting when removing more than this fraction of + /// history but less than rebuild_threshold. Defaults to + /// GRAPH_STORE_HISTORY_DELETE_THRESHOLD + #[clap(long, short)] + delete_threshold: Option, + /// How much history to keep in blocks. Defaults to + /// GRAPH_MIN_HISTORY_BLOCKS + #[clap(long, short = 'y')] + history: Option, + /// Prune only this once + #[clap(long, short)] + once: bool, + }, + /// Prune a deployment in the background + /// + /// Set the amount of history the subgraph should retain. The actual + /// data removal happens in the background and can be monitored with + /// `prune status`. It can take several minutes of the first pruning to + /// start, during which time `prune status` will not return any + /// information + Set { + /// The deployment to prune (see `help info`) + deployment: DeploymentSearch, + /// Prune by rebuilding tables when removing more than this fraction + /// of history. Defaults to GRAPH_STORE_HISTORY_REBUILD_THRESHOLD + #[clap(long, short)] + rebuild_threshold: Option, + /// Prune by deleting when removing more than this fraction of + /// history but less than rebuild_threshold. Defaults to + /// GRAPH_STORE_HISTORY_DELETE_THRESHOLD + #[clap(long, short)] + delete_threshold: Option, + /// How much history to keep in blocks. Defaults to + /// GRAPH_MIN_HISTORY_BLOCKS + #[clap(long, short = 'y')] + history: Option, + }, + /// Show the status of a pruning operation + Status { + /// The number of the pruning run + #[clap(long, short)] + run: Option, + /// The deployment to check (see `help info`) + deployment: DeploymentSearch, + }, +} + #[derive(Clone, Debug, Subcommand)] pub enum IndexCommand { /// Creates a new database index. @@ -897,7 +948,7 @@ impl Context { fn primary_pool(self) -> ConnectionPool { let primary = self.config.primary_store(); - let coord = Arc::new(PoolCoordinator::new(Arc::new(vec![]))); + let coord = Arc::new(PoolCoordinator::new(&self.logger, Arc::new(vec![]))); let pool = StoreBuilder::main_pool( &self.logger, &self.node_id, @@ -1014,7 +1065,19 @@ impl Context { self, chain_name: &str, ) -> anyhow::Result<(Arc, Arc)> { - let networks = self.networks().await?; + let logger = self.logger.clone(); + let registry = self.metrics_registry(); + let metrics = Arc::new(EndpointMetrics::mock()); + let networks = Networks::from_config_for_chain( + logger, + &self.config, + registry, + metrics, + &[], + chain_name, + ) + .await?; + let chain_store = self.chain_store(chain_name)?; let ethereum_adapter = networks .ethereum_rpcs(chain_name.into()) @@ -1030,6 +1093,9 @@ impl Context { #[tokio::main] async fn main() -> anyhow::Result<()> { + // Disable load management for graphman commands + env::set_var("GRAPH_LOAD_THRESHOLD", "0"); + let opt = Opt::parse(); Terminal::set_color_preference(&opt.color); @@ -1448,6 +1514,12 @@ async fn main() -> anyhow::Result<()> { } } } + Ingest { name, number } => { + let logger = ctx.logger.cheap_clone(); + let (chain_store, ethereum_adapter) = + ctx.chain_store_and_adapter(&name).await?; + commands::chain::ingest(&logger, chain_store, ethereum_adapter, number).await + } } } Stats(cmd) => { @@ -1580,25 +1652,52 @@ async fn main() -> anyhow::Result<()> { } } } - Prune { - deployment, - history, - rebuild_threshold, - delete_threshold, - once, - } => { - let (store, primary_pool) = ctx.store_and_primary(); - let history = history.unwrap_or(ENV_VARS.min_history_blocks.try_into()?); - commands::prune::run( - store, - primary_pool, - deployment, - history, - rebuild_threshold, - delete_threshold, - once, - ) - .await + Prune(cmd) => { + use PruneCommand::*; + match cmd { + Run { + deployment, + history, + rebuild_threshold, + delete_threshold, + once, + } => { + let (store, primary_pool) = ctx.store_and_primary(); + let history = history.unwrap_or(ENV_VARS.min_history_blocks.try_into()?); + commands::prune::run( + store, + primary_pool, + deployment, + history, + rebuild_threshold, + delete_threshold, + once, + ) + .await + } + Set { + deployment, + rebuild_threshold, + delete_threshold, + history, + } => { + let (store, primary_pool) = ctx.store_and_primary(); + let history = history.unwrap_or(ENV_VARS.min_history_blocks.try_into()?); + commands::prune::set( + store, + primary_pool, + deployment, + history, + rebuild_threshold, + delete_threshold, + ) + .await + } + Status { run, deployment } => { + let (store, primary_pool) = ctx.store_and_primary(); + commands::prune::status(store, primary_pool, deployment, run).await + } + } } Drop { deployment, diff --git a/node/src/chain.rs b/node/src/chain.rs index 00785d11876..e2325aa6c7a 100644 --- a/node/src/chain.rs +++ b/node/src/chain.rs @@ -48,10 +48,39 @@ pub enum ProviderNetworkStatus { }, } +pub trait ChainFilter: Send + Sync { + fn filter(&self, chain_name: &str) -> bool; +} + +pub struct AnyChainFilter; + +impl ChainFilter for AnyChainFilter { + fn filter(&self, _: &str) -> bool { + true + } +} + +pub struct OneChainFilter { + chain_name: String, +} + +impl OneChainFilter { + pub fn new(chain_name: String) -> Self { + Self { chain_name } + } +} + +impl ChainFilter for OneChainFilter { + fn filter(&self, chain_name: &str) -> bool { + self.chain_name == chain_name + } +} + pub fn create_substreams_networks( logger: Logger, config: &Config, endpoint_metrics: Arc, + chain_filter: &dyn ChainFilter, ) -> Vec { debug!( logger, @@ -63,7 +92,13 @@ pub fn create_substreams_networks( let mut networks_by_kind: BTreeMap<(BlockchainKind, ChainName), Vec>> = BTreeMap::new(); - for (name, chain) in &config.chains.chains { + let filtered_chains = config + .chains + .chains + .iter() + .filter(|(name, _)| chain_filter.filter(name)); + + for (name, chain) in filtered_chains { let name: ChainName = name.as_str().into(); for provider in &chain.providers { if let ProviderDetails::Substreams(ref firehose) = provider.details { @@ -113,6 +148,7 @@ pub fn create_firehose_networks( logger: Logger, config: &Config, endpoint_metrics: Arc, + chain_filter: &dyn ChainFilter, ) -> Vec { debug!( logger, @@ -124,7 +160,13 @@ pub fn create_firehose_networks( let mut networks_by_kind: BTreeMap<(BlockchainKind, ChainName), Vec>> = BTreeMap::new(); - for (name, chain) in &config.chains.chains { + let filtered_chains = config + .chains + .chains + .iter() + .filter(|(name, _)| chain_filter.filter(name)); + + for (name, chain) in filtered_chains { let name: ChainName = name.as_str().into(); for provider in &chain.providers { let logger = logger.cheap_clone(); @@ -179,11 +221,12 @@ pub fn create_firehose_networks( /// Parses all Ethereum connection strings and returns their network names and /// `EthereumAdapter`. -pub async fn create_all_ethereum_networks( +pub async fn create_ethereum_networks( logger: Logger, registry: Arc, config: &Config, endpoint_metrics: Arc, + chain_filter: &dyn ChainFilter, ) -> anyhow::Result> { let eth_rpc_metrics = Arc::new(ProviderEthRpcMetrics::new(registry)); let eth_networks_futures = config @@ -191,6 +234,7 @@ pub async fn create_all_ethereum_networks( .chains .iter() .filter(|(_, chain)| chain.protocol == BlockchainKind::Ethereum) + .filter(|(name, _)| chain_filter.filter(name)) .map(|(name, _)| { create_ethereum_networks_for_chain( &logger, @@ -441,6 +485,7 @@ pub async fn networks_as_chains( client.clone(), metrics_registry.clone(), chain_store.clone(), + eth_adapters.clone(), ); let call_cache = chain_store.cheap_clone(); @@ -459,7 +504,7 @@ pub async fn networks_as_chains( Arc::new(adapter_selector), Arc::new(EthereumRuntimeAdapterBuilder {}), eth_adapters, - ENV_VARS.reorg_threshold, + ENV_VARS.reorg_threshold(), polling_interval, true, ); diff --git a/node/src/main.rs b/node/src/main.rs index 9b0e94250dc..6cd892079c1 100644 --- a/node/src/main.rs +++ b/node/src/main.rs @@ -28,9 +28,9 @@ use graph_server_http::GraphQLServer as GraphQLQueryServer; use graph_server_index_node::IndexNodeServer; use graph_server_json_rpc::JsonRpcServer; use graph_server_metrics::PrometheusMetricsServer; -use graph_store_postgres::connection_pool::ConnectionPool; -use graph_store_postgres::Store; -use graph_store_postgres::{register_jobs as register_store_jobs, NotificationSender}; +use graph_store_postgres::{ + register_jobs as register_store_jobs, ConnectionPool, NotificationSender, Store, +}; use graphman_server::GraphmanServer; use graphman_server::GraphmanServerConfig; use std::io::{BufRead, BufReader}; @@ -78,8 +78,21 @@ fn read_expensive_queries( Ok(queries) } -#[tokio::main] -async fn main() { +fn main() { + let max_blocking: usize = std::env::var("GRAPH_MAX_BLOCKING_THREADS") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(512); + + tokio::runtime::Builder::new_multi_thread() + .enable_all() + .max_blocking_threads(max_blocking) + .build() + .unwrap() + .block_on(async { main_inner().await }) +} + +async fn main_inner() { env_logger::init(); let env_vars = Arc::new(EnvVars::from_env().unwrap()); diff --git a/node/src/manager/commands/assign.rs b/node/src/manager/commands/assign.rs index 568856f1f9f..01260538a74 100644 --- a/node/src/manager/commands/assign.rs +++ b/node/src/manager/commands/assign.rs @@ -1,8 +1,6 @@ use graph::components::store::DeploymentLocator; use graph::prelude::{anyhow::anyhow, Error, NodeId, StoreEvent}; -use graph_store_postgres::{ - command_support::catalog, connection_pool::ConnectionPool, NotificationSender, -}; +use graph_store_postgres::{command_support::catalog, ConnectionPool, NotificationSender}; use std::thread; use std::time::Duration; diff --git a/node/src/manager/commands/chain.rs b/node/src/manager/commands/chain.rs index f1bdf7d39b9..e1f460a7581 100644 --- a/node/src/manager/commands/chain.rs +++ b/node/src/manager/commands/chain.rs @@ -12,20 +12,22 @@ use graph::components::network_provider::ChainName; use graph::components::store::StoreError; use graph::prelude::BlockNumber; use graph::prelude::ChainStore as _; +use graph::prelude::LightEthereumBlockExt; use graph::prelude::{anyhow, anyhow::bail}; use graph::slog::Logger; use graph::{components::store::BlockStore as _, prelude::anyhow::Error}; +use graph_chain_ethereum::chain::BlockFinality; +use graph_chain_ethereum::EthereumAdapter; +use graph_chain_ethereum::EthereumAdapterTrait as _; use graph_store_postgres::add_chain; -use graph_store_postgres::connection_pool::PoolCoordinator; use graph_store_postgres::find_chain; use graph_store_postgres::update_chain_name; use graph_store_postgres::BlockStore; use graph_store_postgres::ChainStatus; use graph_store_postgres::ChainStore; +use graph_store_postgres::PoolCoordinator; use graph_store_postgres::Shard; -use graph_store_postgres::{ - command_support::catalog::block_store, connection_pool::ConnectionPool, -}; +use graph_store_postgres::{command_support::catalog::block_store, ConnectionPool}; use crate::network_setup::Networks; @@ -261,3 +263,30 @@ pub fn change_block_cache_shard( Ok(()) } + +pub async fn ingest( + logger: &Logger, + chain_store: Arc, + ethereum_adapter: Arc, + number: BlockNumber, +) -> Result<(), Error> { + let Some(block) = ethereum_adapter + .block_by_number(logger, number) + .await + .map_err(|e| anyhow!("error getting block number {number}: {}", e))? + else { + bail!("block number {number} not found"); + }; + let ptr = block.block_ptr(); + // For inserting the block, it doesn't matter whether the block is final or not. + let block = Arc::new(BlockFinality::Final(Arc::new(block))); + chain_store.upsert_block(block).await?; + + let rows = chain_store.confirm_block_hash(ptr.number, &ptr.hash)?; + + println!("Inserted block {}", ptr); + if rows > 0 { + println!(" (also deleted {rows} duplicate row(s) with that number)"); + } + Ok(()) +} diff --git a/node/src/manager/commands/check_blocks.rs b/node/src/manager/commands/check_blocks.rs index 6a82c67c3e6..0afa54bd7d3 100644 --- a/node/src/manager/commands/check_blocks.rs +++ b/node/src/manager/commands/check_blocks.rs @@ -153,7 +153,6 @@ async fn handle_multiple_block_hashes( mod steps { use super::*; - use graph::futures03::compat::Future01CompatExt; use graph::{ anyhow::bail, prelude::serde_json::{self, Value}, @@ -204,7 +203,6 @@ mod steps { ) -> anyhow::Result { let provider_block = ethereum_adapter .block_by_hash(logger, *block_hash) - .compat() .await .with_context(|| format!("failed to fetch block {block_hash}"))? .ok_or_else(|| anyhow!("JRPC provider found no block with hash {block_hash:?}"))?; diff --git a/node/src/manager/commands/copy.rs b/node/src/manager/commands/copy.rs index ab007ea319d..57f207b5b98 100644 --- a/node/src/manager/commands/copy.rs +++ b/node/src/manager/commands/copy.rs @@ -1,8 +1,8 @@ use diesel::{ExpressionMethods, JoinOnDsl, OptionalExtension, QueryDsl, RunQueryDsl}; -use std::{collections::HashMap, sync::Arc, time::SystemTime}; +use std::{collections::HashMap, sync::Arc}; use graph::{ - components::store::{BlockStore as _, DeploymentId}, + components::store::{BlockStore as _, DeploymentId, DeploymentLocator}, data::query::QueryTarget, prelude::{ anyhow::{anyhow, bail, Error}, @@ -17,10 +17,10 @@ use graph_store_postgres::{ }, PRIMARY_SHARD, }; -use graph_store_postgres::{connection_pool::ConnectionPool, Shard, Store, SubgraphStore}; +use graph_store_postgres::{ConnectionPool, Shard, Store, SubgraphStore}; -use crate::manager::deployment::DeploymentSearch; use crate::manager::display::List; +use crate::manager::{deployment::DeploymentSearch, fmt}; type UtcDateTime = DateTime; @@ -84,10 +84,9 @@ impl CopyState { } } -pub async fn create( +async fn create_inner( store: Arc, - primary: ConnectionPool, - src: DeploymentSearch, + src: &DeploymentLocator, shard: String, shards: Vec, node: String, @@ -104,7 +103,6 @@ pub async fn create( }; let subgraph_store = store.subgraph_store(); - let src = src.locate_unique(&primary)?; let query_store = store .query_store(QueryTarget::Deployment( src.hash.clone(), @@ -154,6 +152,32 @@ pub async fn create( Ok(()) } +pub async fn create( + store: Arc, + primary: ConnectionPool, + src: DeploymentSearch, + shard: String, + shards: Vec, + node: String, + block_offset: u32, + activate: bool, + replace: bool, +) -> Result<(), Error> { + let src = src.locate_unique(&primary)?; + create_inner( + store, + &src, + shard, + shards, + node, + block_offset, + activate, + replace, + ) + .await + .map_err(|e| anyhow!("cannot copy {src}: {e}")) +} + pub fn activate(store: Arc, deployment: String, shard: String) -> Result<(), Error> { let shard = Shard::new(shard)?; let deployment = @@ -231,33 +255,11 @@ pub fn list(pools: HashMap) -> Result<(), Error> { } pub fn status(pools: HashMap, dst: &DeploymentSearch) -> Result<(), Error> { + const CHECK: &str = "✓"; + use catalog::active_copies as ac; use catalog::deployment_schemas as ds; - fn done(ts: &Option) -> String { - ts.map(|_| "✓").unwrap_or(".").to_string() - } - - fn duration(start: &UtcDateTime, end: &Option) -> String { - let start = *start; - let end = *end; - - let end = end.unwrap_or(UtcDateTime::from(SystemTime::now())); - let duration = end - start; - - human_duration(duration) - } - - fn human_duration(duration: Duration) -> String { - if duration.num_seconds() < 5 { - format!("{}ms", duration.num_milliseconds()) - } else if duration.num_minutes() < 5 { - format!("{}s", duration.num_seconds()) - } else { - format!("{}m", duration.num_minutes()) - } - } - let primary = pools .get(&*PRIMARY_SHARD) .ok_or_else(|| anyhow!("can not find deployment with id {}", dst))?; @@ -290,7 +292,7 @@ pub fn status(pools: HashMap, dst: &DeploymentSearch) -> }; let progress = match &state.finished_at { - Some(_) => done(&state.finished_at), + Some(_) => CHECK.to_string(), None => { let target: i64 = tables.iter().map(|table| table.target_vid).sum(); let next: i64 = tables.iter().map(|table| table.next_vid).sum(); @@ -314,7 +316,7 @@ pub fn status(pools: HashMap, dst: &DeploymentSearch) -> state.dst.to_string(), state.target_block_number.to_string(), on_sync.to_str().to_string(), - duration(&state.started_at, &state.finished_at), + fmt::duration(&state.started_at, &state.finished_at), progress, ]; match (cancelled_at, state.cancelled_at) { @@ -334,27 +336,29 @@ pub fn status(pools: HashMap, dst: &DeploymentSearch) -> println!(); println!( - "{:^30} | {:^8} | {:^8} | {:^8} | {:^8}", + "{:^30} | {:^10} | {:^10} | {:^8} | {:^10}", "entity type", "next", "target", "batch", "duration" ); - println!("{:-<74}", "-"); + println!("{:-<80}", "-"); for table in tables { - let status = if table.next_vid > 0 && table.next_vid < table.target_vid { - ">".to_string() - } else if table.target_vid < 0 { + let status = match &table.finished_at { + // table finished + Some(_) => CHECK, // empty source table - "✓".to_string() - } else { - done(&table.finished_at) + None if table.target_vid < 0 => CHECK, + // copying in progress + None if table.duration_ms > 0 => ">", + // not started + None => ".", }; println!( - "{} {:<28} | {:>8} | {:>8} | {:>8} | {:>8}", + "{} {:<28} | {:>10} | {:>10} | {:>8} | {:>10}", status, table.entity_type, table.next_vid, table.target_vid, table.batch_size, - human_duration(Duration::milliseconds(table.duration_ms)), + fmt::human_duration(Duration::milliseconds(table.duration_ms)), ); } diff --git a/node/src/manager/commands/database.rs b/node/src/manager/commands/database.rs index 17d11c041cf..bb1f3b195e3 100644 --- a/node/src/manager/commands/database.rs +++ b/node/src/manager/commands/database.rs @@ -1,7 +1,7 @@ use std::{io::Write, time::Instant}; use graph::prelude::anyhow; -use graph_store_postgres::connection_pool::PoolCoordinator; +use graph_store_postgres::PoolCoordinator; pub async fn remap( coord: &PoolCoordinator, diff --git a/node/src/manager/commands/deployment/info.rs b/node/src/manager/commands/deployment/info.rs index 417092d6e2d..27a69c3841a 100644 --- a/node/src/manager/commands/deployment/info.rs +++ b/node/src/manager/commands/deployment/info.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use anyhow::bail; use anyhow::Result; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::Store; use graphman::commands::deployment::info::load_deployment_statuses; use graphman::commands::deployment::info::load_deployments; diff --git a/node/src/manager/commands/deployment/pause.rs b/node/src/manager/commands/deployment/pause.rs index 2a690ea688a..3e35496113e 100644 --- a/node/src/manager/commands/deployment/pause.rs +++ b/node/src/manager/commands/deployment/pause.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use anyhow::Result; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use graphman::commands::deployment::pause::{ load_active_deployment, pause_active_deployment, PauseDeploymentError, diff --git a/node/src/manager/commands/deployment/reassign.rs b/node/src/manager/commands/deployment/reassign.rs index 60528f16206..afe4147a21e 100644 --- a/node/src/manager/commands/deployment/reassign.rs +++ b/node/src/manager/commands/deployment/reassign.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use anyhow::Result; use graph::prelude::NodeId; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use graphman::commands::deployment::reassign::{ load_deployment, reassign_deployment, ReassignResult, @@ -16,11 +16,24 @@ pub fn run( node: &NodeId, ) -> Result<()> { let deployment = load_deployment(primary_pool.clone(), &deployment)?; + let curr_node = deployment.assigned_node(primary_pool.clone())?; + let reassign_msg = match &curr_node { + Some(curr_node) => format!( + "Reassigning deployment {} (was {})", + deployment.locator(), + curr_node + ), + None => format!("Reassigning deployment {}", deployment.locator()), + }; + println!("{}", reassign_msg); - println!("Reassigning deployment {}", deployment.locator()); - - let reassign_result = - reassign_deployment(primary_pool, notification_sender, &deployment, node)?; + let reassign_result = reassign_deployment( + primary_pool, + notification_sender, + &deployment, + node, + curr_node, + )?; match reassign_result { ReassignResult::EmptyResponse => { diff --git a/node/src/manager/commands/deployment/restart.rs b/node/src/manager/commands/deployment/restart.rs index 4febf81b63c..5f3783b3e92 100644 --- a/node/src/manager/commands/deployment/restart.rs +++ b/node/src/manager/commands/deployment/restart.rs @@ -3,7 +3,7 @@ use std::thread::sleep; use std::time::Duration; use anyhow::Result; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use graphman::deployment::DeploymentSelector; diff --git a/node/src/manager/commands/deployment/resume.rs b/node/src/manager/commands/deployment/resume.rs index 7e57d60cd48..01a9924ad51 100644 --- a/node/src/manager/commands/deployment/resume.rs +++ b/node/src/manager/commands/deployment/resume.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use anyhow::Result; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use graphman::commands::deployment::resume::load_paused_deployment; use graphman::commands::deployment::resume::resume_paused_deployment; diff --git a/node/src/manager/commands/deployment/unassign.rs b/node/src/manager/commands/deployment/unassign.rs index 45567e81f63..0c27a2f5944 100644 --- a/node/src/manager/commands/deployment/unassign.rs +++ b/node/src/manager/commands/deployment/unassign.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use anyhow::Result; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use graphman::commands::deployment::unassign::load_assigned_deployment; use graphman::commands::deployment::unassign::unassign_deployment; diff --git a/node/src/manager/commands/drop.rs b/node/src/manager/commands/drop.rs index 2c86e88e23a..b0d10d0ff63 100644 --- a/node/src/manager/commands/drop.rs +++ b/node/src/manager/commands/drop.rs @@ -4,7 +4,7 @@ use crate::manager::{ prompt::prompt_for_confirmation, }; use graph::anyhow::{self, bail}; -use graph_store_postgres::{connection_pool::ConnectionPool, NotificationSender, SubgraphStore}; +use graph_store_postgres::{ConnectionPool, NotificationSender, SubgraphStore}; use std::sync::Arc; /// Finds, unassigns, record and remove matching deployments. diff --git a/node/src/manager/commands/index.rs b/node/src/manager/commands/index.rs index a20ce74e9ea..6aa68137ad1 100644 --- a/node/src/manager/commands/index.rs +++ b/node/src/manager/commands/index.rs @@ -6,8 +6,7 @@ use graph::{ }; use graph_store_postgres::{ command_support::index::{CreateIndex, Method}, - connection_pool::ConnectionPool, - SubgraphStore, + ConnectionPool, SubgraphStore, }; use std::io::Write as _; use std::{collections::HashSet, sync::Arc}; diff --git a/node/src/manager/commands/prune.rs b/node/src/manager/commands/prune.rs index c169577ee65..05b1730806d 100644 --- a/node/src/manager/commands/prune.rs +++ b/node/src/manager/commands/prune.rs @@ -6,7 +6,7 @@ use std::{ }; use graph::{ - components::store::{PrunePhase, PruneRequest}, + components::store::{DeploymentLocator, PrunePhase, PruneRequest}, env::ENV_VARS, }; use graph::{ @@ -14,11 +14,15 @@ use graph::{ data::subgraph::status, prelude::{anyhow, BlockNumber}, }; -use graph_store_postgres::{connection_pool::ConnectionPool, Store}; +use graph_store_postgres::{ + command_support::{Phase, PruneTableState}, + ConnectionPool, Store, +}; use crate::manager::{ - commands::stats::{abbreviate_table_name, show_stats}, + commands::stats::show_stats, deployment::DeploymentSearch, + fmt::{self, MapOrNull as _}, }; struct Progress { @@ -66,7 +70,7 @@ fn print_batch( }; print!( "\r{:<30} | {:>10} | {:>9}s {phase}", - abbreviate_table_name(table, 30), + fmt::abbreviate(table, 30), total_rows, elapsed.as_secs() ); @@ -156,15 +160,19 @@ impl PruneReporter for Progress { } } -pub async fn run( - store: Arc, +struct Args { + history: BlockNumber, + deployment: DeploymentLocator, + earliest_block: BlockNumber, + latest_block: BlockNumber, +} + +fn check_args( + store: &Arc, primary_pool: ConnectionPool, search: DeploymentSearch, history: usize, - rebuild_threshold: Option, - delete_threshold: Option, - once: bool, -) -> Result<(), anyhow::Error> { +) -> Result { let history = history as BlockNumber; let deployment = search.locate_unique(&primary_pool)?; let mut info = store @@ -181,22 +189,38 @@ pub async fn run( .chains .pop() .ok_or_else(|| anyhow!("deployment {} does not index any chain", deployment))?; - let latest = status.latest_block.map(|ptr| ptr.number()).unwrap_or(0); - if latest <= history { - return Err(anyhow!("deployment {deployment} has only indexed up to block {latest} and we can't preserve {history} blocks of history")); + let latest_block = status.latest_block.map(|ptr| ptr.number()).unwrap_or(0); + if latest_block <= history { + return Err(anyhow!("deployment {deployment} has only indexed up to block {latest_block} and we can't preserve {history} blocks of history")); } + Ok(Args { + history, + deployment, + earliest_block: status.earliest_block_number, + latest_block, + }) +} - println!("prune {deployment}"); - println!(" latest: {latest}"); - println!(" final: {}", latest - ENV_VARS.reorg_threshold); - println!(" earliest: {}\n", latest - history); +async fn first_prune( + store: &Arc, + args: &Args, + rebuild_threshold: Option, + delete_threshold: Option, +) -> Result<(), anyhow::Error> { + println!("prune {}", args.deployment); + println!( + " range: {} - {} ({} blocks)", + args.earliest_block, + args.latest_block, + args.latest_block - args.earliest_block + ); let mut req = PruneRequest::new( - &deployment, - history, - ENV_VARS.reorg_threshold, - status.earliest_block_number, - latest, + &args.deployment, + args.history, + ENV_VARS.reorg_threshold(), + args.earliest_block, + args.latest_block, )?; if let Some(rebuild_threshold) = rebuild_threshold { req.rebuild_threshold = rebuild_threshold; @@ -209,17 +233,188 @@ pub async fn run( store .subgraph_store() - .prune(reporter, &deployment, req) + .prune(reporter, &args.deployment, req) .await?; + Ok(()) +} + +async fn run_inner( + store: Arc, + primary_pool: ConnectionPool, + search: DeploymentSearch, + history: usize, + rebuild_threshold: Option, + delete_threshold: Option, + once: bool, + do_first_prune: bool, +) -> Result<(), anyhow::Error> { + let args = check_args(&store, primary_pool, search, history)?; + + if do_first_prune { + first_prune(&store, &args, rebuild_threshold, delete_threshold).await?; + } // Only after everything worked out, make the history setting permanent if !once { store.subgraph_store().set_history_blocks( - &deployment, - history, - ENV_VARS.reorg_threshold, + &args.deployment, + args.history, + ENV_VARS.reorg_threshold(), )?; } Ok(()) } + +pub async fn run( + store: Arc, + primary_pool: ConnectionPool, + search: DeploymentSearch, + history: usize, + rebuild_threshold: Option, + delete_threshold: Option, + once: bool, +) -> Result<(), anyhow::Error> { + run_inner( + store, + primary_pool, + search, + history, + rebuild_threshold, + delete_threshold, + once, + true, + ) + .await +} + +pub async fn set( + store: Arc, + primary_pool: ConnectionPool, + search: DeploymentSearch, + history: usize, + rebuild_threshold: Option, + delete_threshold: Option, +) -> Result<(), anyhow::Error> { + run_inner( + store, + primary_pool, + search, + history, + rebuild_threshold, + delete_threshold, + false, + false, + ) + .await +} + +pub async fn status( + store: Arc, + primary_pool: ConnectionPool, + search: DeploymentSearch, + run: Option, +) -> Result<(), anyhow::Error> { + fn percentage(left: Option, x: Option, right: Option) -> String { + match (left, x, right) { + (Some(left), Some(x), Some(right)) => { + let range = right - left; + if range == 0 { + return fmt::null(); + } + let percent = (x - left) as f64 / range as f64 * 100.0; + format!("{:.0}%", percent.min(100.0)) + } + _ => fmt::null(), + } + } + + let deployment = search.locate_unique(&primary_pool)?; + + let viewer = store.subgraph_store().prune_viewer(&deployment).await?; + let runs = viewer.runs()?; + if runs.is_empty() { + return Err(anyhow!("No prune runs found for deployment {deployment}")); + } + let run = run.unwrap_or(*runs.last().unwrap()); + let Some((state, table_states)) = viewer.state(run)? else { + let runs = match runs.len() { + 0 => unreachable!("we checked that runs is not empty"), + 1 => format!("There is only one prune run #{}", runs[0]), + 2 => format!("Only prune runs #{} and #{} exist", runs[0], runs[1]), + _ => format!( + "Only prune runs #{} and #{} up to #{} exist", + runs[0], + runs[1], + runs.last().unwrap() + ), + }; + return Err(anyhow!( + "No information about prune run #{run} found for deployment {deployment}.\n {runs}" + )); + }; + println!("prune {deployment} (run #{run})"); + println!( + " range: {} - {} ({} blocks, should keep {} blocks)", + state.first_block, + state.latest_block, + state.latest_block - state.first_block, + state.history_blocks + ); + println!(" started: {}", fmt::date_time(&state.started_at)); + match &state.finished_at { + Some(finished_at) => println!(" finished: {}", fmt::date_time(finished_at)), + None => println!(" finished: still running"), + } + println!( + " duration: {}", + fmt::duration(&state.started_at, &state.finished_at) + ); + + println!( + "\n{:^30} | {:^22} | {:^8} | {:^11} | {:^8}", + "table", "status", "rows", "batch_size", "duration" + ); + println!( + "{:-^30}-+-{:-^22}-+-{:-^8}-+-{:-^11}-+-{:-^8}", + "", "", "", "", "" + ); + for ts in table_states { + #[allow(unused_variables)] + let PruneTableState { + vid: _, + id: _, + run: _, + table_name, + strategy, + phase, + start_vid, + final_vid, + nonfinal_vid, + rows, + next_vid, + batch_size, + started_at, + finished_at, + } = ts; + + let complete = match phase { + Phase::Queued | Phase::Started => "0%".to_string(), + Phase::CopyFinal => percentage(start_vid, next_vid, final_vid), + Phase::CopyNonfinal | Phase::Delete => percentage(start_vid, next_vid, nonfinal_vid), + Phase::Done => fmt::check(), + Phase::Unknown => fmt::null(), + }; + + let table_name = fmt::abbreviate(&table_name, 30); + let rows = rows.map_or_null(|rows| rows.to_string()); + let batch_size = batch_size.map_or_null(|b| b.to_string()); + let duration = started_at.map_or_null(|s| fmt::duration(&s, &finished_at)); + let phase = phase.as_str(); + println!( + "{table_name:<30} | {:<15} {complete:>6} | {rows:>8} | {batch_size:>11} | {duration:>8}", + format!("{strategy}/{phase}") + ); + } + Ok(()) +} diff --git a/node/src/manager/commands/rewind.rs b/node/src/manager/commands/rewind.rs index 339f2ec979a..51d432dfd49 100644 --- a/node/src/manager/commands/rewind.rs +++ b/node/src/manager/commands/rewind.rs @@ -10,8 +10,8 @@ use graph::components::store::{BlockStore as _, ChainStore as _, DeploymentLocat use graph::env::ENV_VARS; use graph::prelude::{anyhow, BlockNumber, BlockPtr}; use graph_store_postgres::command_support::catalog::{self as store_catalog}; -use graph_store_postgres::{connection_pool::ConnectionPool, Store}; use graph_store_postgres::{BlockStore, NotificationSender}; +use graph_store_postgres::{ConnectionPool, Store}; async fn block_ptr( store: Arc, @@ -133,13 +133,13 @@ pub async fn run( let deployment_details = deployment_store.deployment_details_for_id(locator)?; let block_number_to = block_ptr_to.as_ref().map(|b| b.number).unwrap_or(0); - if block_number_to < deployment_details.earliest_block_number + ENV_VARS.reorg_threshold { + if block_number_to < deployment_details.earliest_block_number + ENV_VARS.reorg_threshold() { bail!( "The block number {} is not safe to rewind to for deployment {}. The earliest block number of this deployment is {}. You can only safely rewind to block number {}", block_ptr_to.as_ref().map(|b| b.number).unwrap_or(0), locator, deployment_details.earliest_block_number, - deployment_details.earliest_block_number + ENV_VARS.reorg_threshold + deployment_details.earliest_block_number + ENV_VARS.reorg_threshold() ); } } diff --git a/node/src/manager/commands/stats.rs b/node/src/manager/commands/stats.rs index d1c2635bf4a..abb02fdb77c 100644 --- a/node/src/manager/commands/stats.rs +++ b/node/src/manager/commands/stats.rs @@ -3,6 +3,7 @@ use std::collections::HashSet; use std::sync::Arc; use crate::manager::deployment::DeploymentSearch; +use crate::manager::fmt; use diesel::r2d2::ConnectionManager; use diesel::r2d2::PooledConnection; use diesel::PgConnection; @@ -11,7 +12,7 @@ use graph::components::store::VersionStats; use graph::prelude::anyhow; use graph_store_postgres::command_support::catalog as store_catalog; use graph_store_postgres::command_support::catalog::Site; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::Shard; use graph_store_postgres::SubgraphStore; use graph_store_postgres::PRIMARY_SHARD; @@ -51,19 +52,6 @@ pub async fn account_like( Ok(()) } -pub fn abbreviate_table_name(table: &str, size: usize) -> String { - if table.len() > size { - let fragment = size / 2 - 2; - let last = table.len() - fragment; - let mut table = table.to_string(); - table.replace_range(fragment..last, ".."); - let table = table.trim().to_string(); - table - } else { - table.to_string() - } -} - pub fn show_stats( stats: &[VersionStats], account_like: HashSet, @@ -83,7 +71,7 @@ pub fn show_stats( fn print_stats(s: &VersionStats, account_like: bool) { println!( "{:<26} {:3} | {:>10} | {:>10} | {:>5.1}%", - abbreviate_table_name(&s.tablename, 26), + fmt::abbreviate(&s.tablename, 26), if account_like { "(a)" } else { " " }, s.entities, s.versions, diff --git a/node/src/manager/commands/txn_speed.rs b/node/src/manager/commands/txn_speed.rs index f36aa2dac41..480d4669a9f 100644 --- a/node/src/manager/commands/txn_speed.rs +++ b/node/src/manager/commands/txn_speed.rs @@ -2,7 +2,7 @@ use diesel::PgConnection; use std::{collections::HashMap, thread::sleep, time::Duration}; use graph::prelude::anyhow; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use crate::manager::catalog; diff --git a/node/src/manager/deployment.rs b/node/src/manager/deployment.rs index fc1a3e0e5a7..a7cedbd33f2 100644 --- a/node/src/manager/deployment.rs +++ b/node/src/manager/deployment.rs @@ -11,14 +11,14 @@ use graph::{ prelude::{anyhow, lazy_static, regex::Regex, DeploymentHash}, }; use graph_store_postgres::command_support::catalog as store_catalog; -use graph_store_postgres::connection_pool::ConnectionPool; use graph_store_postgres::unused; +use graph_store_postgres::ConnectionPool; lazy_static! { // `Qm...` optionally follow by `:$shard` static ref HASH_RE: Regex = Regex::new("\\A(?PQm[^:]+)(:(?P[a-z0-9_]+))?\\z").unwrap(); // `sgdNNN` - static ref DEPLOYMENT_RE: Regex = Regex::new("\\A(?Psgd[0-9]+)\\z").unwrap(); + static ref DEPLOYMENT_RE: Regex = Regex::new("\\A(?P(sgd)?[0-9]+)\\z").unwrap(); } /// A search for one or multiple deployments to make it possible to search @@ -58,7 +58,12 @@ impl FromStr for DeploymentSearch { Ok(DeploymentSearch::Hash { hash, shard }) } else if let Some(caps) = DEPLOYMENT_RE.captures(s) { let namespace = caps.name("nsp").unwrap().as_str().to_string(); - Ok(DeploymentSearch::Deployment { namespace }) + if namespace.starts_with("sgd") { + Ok(DeploymentSearch::Deployment { namespace }) + } else { + let namespace = format!("sgd{namespace}"); + Ok(DeploymentSearch::Deployment { namespace }) + } } else { Ok(DeploymentSearch::Name { name: s.to_string(), diff --git a/node/src/manager/fmt.rs b/node/src/manager/fmt.rs new file mode 100644 index 00000000000..6aaa12192a7 --- /dev/null +++ b/node/src/manager/fmt.rs @@ -0,0 +1,123 @@ +use std::time::SystemTime; + +use graph::prelude::chrono::{DateTime, Duration, Local, Utc}; + +pub const NULL: &str = "ø"; +const CHECK: &str = "✓"; + +pub fn null() -> String { + NULL.to_string() +} + +pub fn check() -> String { + CHECK.to_string() +} + +pub trait MapOrNull { + fn map_or_null(&self, f: F) -> String + where + F: FnOnce(&T) -> String; +} + +impl MapOrNull for Option { + fn map_or_null(&self, f: F) -> String + where + F: FnOnce(&T) -> String, + { + self.as_ref() + .map(|value| f(value)) + .unwrap_or_else(|| NULL.to_string()) + } +} + +/// Return the duration from `start` to `end` formatted using +/// `human_duration`. Use now if `end` is `None` +pub fn duration(start: &DateTime, end: &Option>) -> String { + let start = *start; + let end = *end; + + let end = end.unwrap_or(DateTime::::from(SystemTime::now())); + let duration = end - start; + + human_duration(duration) +} + +/// Format a duration using ms/s/m as units depending on how long the +/// duration was +pub fn human_duration(duration: Duration) -> String { + if duration.num_seconds() < 5 { + format!("{}ms", duration.num_milliseconds()) + } else if duration.num_minutes() < 5 { + format!("{}s", duration.num_seconds()) + } else { + let minutes = duration.num_minutes(); + if minutes < 90 { + format!("{}m", duration.num_minutes()) + } else { + let hours = minutes / 60; + let minutes = minutes % 60; + if hours < 24 { + format!("{}h {}m", hours, minutes) + } else { + let days = hours / 24; + let hours = hours % 24; + format!("{}d {}h {}m", days, hours, minutes) + } + } + } +} + +/// Abbreviate a long name to fit into `size` characters. The abbreviation +/// is done by replacing the middle of the name with `..`. For example, if +/// `name` is `foo_bar_baz` and `size` is 10, the result will be +/// `foo.._baz`. If the name is shorter than `size`, it is returned +/// unchanged. +pub fn abbreviate(name: &str, size: usize) -> String { + if name.len() > size { + let fragment = size / 2 - 2; + let last = name.len() - fragment; + let mut name = name.to_string(); + name.replace_range(fragment..last, ".."); + let table = name.trim().to_string(); + table + } else { + name.to_string() + } +} + +pub fn date_time(date: &DateTime) -> String { + let date = DateTime::::from(*date); + date.format("%Y-%m-%d %H:%M:%S%Z").to_string() +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_human_duration() { + let duration = Duration::seconds(1); + assert_eq!(human_duration(duration), "1000ms"); + + let duration = Duration::seconds(10); + assert_eq!(human_duration(duration), "10s"); + + let duration = Duration::minutes(5); + assert_eq!(human_duration(duration), "5m"); + + let duration = Duration::hours(1); + assert_eq!(human_duration(duration), "60m"); + + let duration = Duration::minutes(100); + assert_eq!(human_duration(duration), "1h 40m"); + + let duration = Duration::days(1); + assert_eq!(human_duration(duration), "1d 0h 0m"); + + let duration = Duration::days(1) + Duration::minutes(35); + assert_eq!(human_duration(duration), "1d 0h 35m"); + + let duration = Duration::days(1) + Duration::minutes(95); + assert_eq!(human_duration(duration), "1d 1h 35m"); + } +} diff --git a/node/src/manager/mod.rs b/node/src/manager/mod.rs index 6a332653ca8..d95e5fbadc1 100644 --- a/node/src/manager/mod.rs +++ b/node/src/manager/mod.rs @@ -8,6 +8,7 @@ pub mod color; pub mod commands; pub mod deployment; mod display; +pub mod fmt; pub mod prompt; /// A dummy subscription manager that always panics diff --git a/node/src/network_setup.rs b/node/src/network_setup.rs index 1ebe2b5109c..55a4995eb6b 100644 --- a/node/src/network_setup.rs +++ b/node/src/network_setup.rs @@ -30,8 +30,8 @@ use graph_store_postgres::{BlockStore, ChainHeadUpdateListener}; use std::{any::Any, cmp::Ordering, sync::Arc, time::Duration}; use crate::chain::{ - create_all_ethereum_networks, create_firehose_networks, create_substreams_networks, - networks_as_chains, + create_ethereum_networks, create_firehose_networks, create_substreams_networks, + networks_as_chains, AnyChainFilter, ChainFilter, OneChainFilter, }; #[derive(Debug, Clone)] @@ -183,31 +183,38 @@ impl Networks { .await } - pub async fn from_config( + async fn from_config_inner( logger: Logger, config: &crate::config::Config, registry: Arc, endpoint_metrics: Arc, provider_checks: &[Arc], + chain_filter: &dyn ChainFilter, ) -> Result { if config.query_only(&config.node) { return Ok(Networks::noop()); } - let eth = create_all_ethereum_networks( + let eth = create_ethereum_networks( logger.cheap_clone(), registry, &config, endpoint_metrics.cheap_clone(), + chain_filter, ) .await?; let firehose = create_firehose_networks( logger.cheap_clone(), &config, endpoint_metrics.cheap_clone(), + chain_filter, + ); + let substreams = create_substreams_networks( + logger.cheap_clone(), + &config, + endpoint_metrics, + chain_filter, ); - let substreams = - create_substreams_networks(logger.cheap_clone(), &config, endpoint_metrics); let adapters: Vec<_> = eth .into_iter() .chain(firehose.into_iter()) @@ -217,6 +224,44 @@ impl Networks { Ok(Networks::new(&logger, adapters, provider_checks)) } + pub async fn from_config_for_chain( + logger: Logger, + config: &crate::config::Config, + registry: Arc, + endpoint_metrics: Arc, + provider_checks: &[Arc], + chain_name: &str, + ) -> Result { + let filter = OneChainFilter::new(chain_name.to_string()); + Self::from_config_inner( + logger, + config, + registry, + endpoint_metrics, + provider_checks, + &filter, + ) + .await + } + + pub async fn from_config( + logger: Logger, + config: &crate::config::Config, + registry: Arc, + endpoint_metrics: Arc, + provider_checks: &[Arc], + ) -> Result { + Self::from_config_inner( + logger, + config, + registry, + endpoint_metrics, + provider_checks, + &AnyChainFilter, + ) + .await + } + fn new( logger: &Logger, adapters: Vec, diff --git a/node/src/store_builder.rs b/node/src/store_builder.rs index 7fadf6b92c2..e1d1d38635f 100644 --- a/node/src/store_builder.rs +++ b/node/src/store_builder.rs @@ -1,20 +1,18 @@ use std::iter::FromIterator; use std::{collections::HashMap, sync::Arc}; -use graph::futures03::future::join_all; use graph::prelude::{o, MetricsRegistry, NodeId}; +use graph::slog::warn; use graph::url::Url; use graph::{ prelude::{info, CheapClone, Logger}, util::security::SafeDisplay, }; -use graph_store_postgres::connection_pool::{ - ConnectionPool, ForeignServer, PoolCoordinator, PoolName, -}; use graph_store_postgres::{ BlockStore as DieselBlockStore, ChainHeadUpdateListener as PostgresChainHeadUpdateListener, - ChainStoreMetrics, NotificationSender, Shard as ShardName, Store as DieselStore, SubgraphStore, - SubscriptionManager, PRIMARY_SHARD, + ChainStoreMetrics, ConnectionPool, ForeignServer, NotificationSender, PoolCoordinator, + PoolRole, Shard as ShardName, Store as DieselStore, SubgraphStore, SubscriptionManager, + PRIMARY_SHARD, }; use crate::config::{Config, Shard}; @@ -62,7 +60,7 @@ impl StoreBuilder { // attempt doesn't work for all of them because the database is // unavailable, they will try again later in the normal course of // using the pool - join_all(pools.values().map(|pool| pool.setup())).await; + coord.setup_all(logger).await; let chains = HashMap::from_iter(config.chains.chains.iter().map(|(name, chain)| { let shard = ShardName::new(chain.shard.to_string()) @@ -111,13 +109,28 @@ impl StoreBuilder { .collect::, _>>() .expect("connection url's contain enough detail"); let servers = Arc::new(servers); - let coord = Arc::new(PoolCoordinator::new(servers)); + let coord = Arc::new(PoolCoordinator::new(logger, servers)); let shards: Vec<_> = config .stores .iter() - .map(|(name, shard)| { + .filter_map(|(name, shard)| { let logger = logger.new(o!("shard" => name.to_string())); + let pool_size = shard.pool_size.size_for(node, name).unwrap_or_else(|_| { + panic!("cannot determine the pool size for store {}", name) + }); + if pool_size == 0 { + if name == PRIMARY_SHARD.as_str() { + panic!("pool size for primary shard must be greater than 0"); + } else { + warn!( + logger, + "pool size for shard {} is 0, ignoring this shard", name + ); + return None; + } + } + let conn_pool = Self::main_pool( &logger, node, @@ -138,7 +151,7 @@ impl StoreBuilder { let name = ShardName::new(name.to_string()).expect("shard names have been validated"); - (name, conn_pool, read_only_conn_pools, weights) + Some((name, conn_pool, read_only_conn_pools, weights)) }) .collect(); @@ -196,8 +209,8 @@ impl StoreBuilder { Arc::new(DieselStore::new(subgraph_store, block_store)) } - /// Create a connection pool for the main database of the primary shard - /// without connecting to all the other configured databases + /// Create a connection pool for the main (non-replica) database of a + /// shard pub fn main_pool( logger: &Logger, node: &NodeId, @@ -225,7 +238,7 @@ impl StoreBuilder { coord.create_pool( &logger, name, - PoolName::Main, + PoolRole::Main, shard.connection.clone(), pool_size, Some(fdw_pool_size), @@ -265,7 +278,7 @@ impl StoreBuilder { coord.clone().create_pool( &logger, name, - PoolName::Replica(pool), + PoolRole::Replica(pool), replica.connection.clone(), pool_size, None, diff --git a/runtime/derive/Cargo.toml b/runtime/derive/Cargo.toml index 9019e5ad36e..bc3f74ec9f6 100644 --- a/runtime/derive/Cargo.toml +++ b/runtime/derive/Cargo.toml @@ -9,5 +9,5 @@ proc-macro = true [dependencies] syn = { workspace = true } quote = "1.0" -proc-macro2 = "1.0.85" +proc-macro2 = "1.0.94" heck = "0.5" diff --git a/runtime/wasm/src/module/mod.rs b/runtime/wasm/src/module/mod.rs index 4b01b3a5fd8..b911542ffe5 100644 --- a/runtime/wasm/src/module/mod.rs +++ b/runtime/wasm/src/module/mod.rs @@ -70,23 +70,13 @@ impl ToAscPtr for offchain::TriggerData { } } -impl ToAscPtr for subgraph::TriggerData { - fn to_asc_ptr( - self, - heap: &mut H, - gas: &GasCounter, - ) -> Result, HostExportError> { - asc_new(heap, &self.entity, gas).map(|ptr| ptr.erase()) - } -} - impl ToAscPtr for subgraph::MappingEntityTrigger { fn to_asc_ptr( self, heap: &mut H, gas: &GasCounter, ) -> Result, HostExportError> { - asc_new(heap, &self.data.entity, gas).map(|ptr| ptr.erase()) + asc_new(heap, &self.data.entity.entity.sorted_ref(), gas).map(|ptr| ptr.erase()) } } diff --git a/runtime/wasm/src/to_from/external.rs b/runtime/wasm/src/to_from/external.rs index 9bbe0298abc..6bb7122613f 100644 --- a/runtime/wasm/src/to_from/external.rs +++ b/runtime/wasm/src/to_from/external.rs @@ -1,13 +1,11 @@ use ethabi; -use graph::blockchain::block_stream::{EntityOperationKind, EntitySourceOperation}; use graph::data::store::scalar::Timestamp; use graph::data::value::Word; use graph::prelude::{BigDecimal, BigInt}; use graph::runtime::gas::GasCounter; use graph::runtime::{ - asc_get, asc_new, AscIndexId, AscPtr, AscType, AscValue, HostExportError, IndexForAscTypeId, - ToAscObj, + asc_get, asc_new, AscIndexId, AscPtr, AscType, AscValue, HostExportError, ToAscObj, }; use graph::{data::store, runtime::DeterministicHostError}; use graph::{prelude::serde_json, runtime::FromAscObj}; @@ -474,39 +472,6 @@ pub enum AscSubgraphEntityOp { Delete, } -#[derive(AscType)] -pub struct AscEntityTrigger { - pub entity_op: AscSubgraphEntityOp, - pub entity_type: AscPtr, - pub entity: AscPtr, - pub vid: i64, -} - -impl ToAscObj for EntitySourceOperation { - fn to_asc_obj( - &self, - heap: &mut H, - gas: &GasCounter, - ) -> Result { - let entity_op = match self.entity_op { - EntityOperationKind::Create => AscSubgraphEntityOp::Create, - EntityOperationKind::Modify => AscSubgraphEntityOp::Modify, - EntityOperationKind::Delete => AscSubgraphEntityOp::Delete, - }; - - Ok(AscEntityTrigger { - entity_op, - entity_type: asc_new(heap, &self.entity_type.as_str(), gas)?, - entity: asc_new(heap, &self.entity.sorted_ref(), gas)?, - vid: self.vid, - }) - } -} - -impl AscIndexId for AscEntityTrigger { - const INDEX_ASC_TYPE_ID: IndexForAscTypeId = IndexForAscTypeId::AscEntityTrigger; -} - impl ToAscObj> for serde_yaml::Value { fn to_asc_obj( &self, diff --git a/server/graphman/src/resolvers/context.rs b/server/graphman/src/resolvers/context.rs index 8cc3e819c6d..14726b2ae30 100644 --- a/server/graphman/src/resolvers/context.rs +++ b/server/graphman/src/resolvers/context.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use async_graphql::Context; use async_graphql::Result; -use graph_store_postgres::connection_pool::ConnectionPool; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use graph_store_postgres::Store; diff --git a/server/graphman/src/resolvers/deployment_mutation/reassign.rs b/server/graphman/src/resolvers/deployment_mutation/reassign.rs index 3887d67032a..026ef94ed9f 100644 --- a/server/graphman/src/resolvers/deployment_mutation/reassign.rs +++ b/server/graphman/src/resolvers/deployment_mutation/reassign.rs @@ -14,11 +14,14 @@ pub fn run( node: &NodeId, ) -> Result { let deployment = load_deployment(ctx.primary_pool.clone(), deployment)?; + let curr_node = deployment.assigned_node(ctx.primary_pool.clone())?; + let reassign_result = reassign_deployment( ctx.primary_pool.clone(), ctx.notification_sender.clone(), &deployment, &node, + curr_node, )?; Ok(reassign_result) } diff --git a/server/graphman/src/server.rs b/server/graphman/src/server.rs index ea71e7c2228..a969433cdea 100644 --- a/server/graphman/src/server.rs +++ b/server/graphman/src/server.rs @@ -10,8 +10,8 @@ use axum::Router; use graph::log::factory::LoggerFactory; use graph::prelude::ComponentLoggerConfig; use graph::prelude::ElasticComponentLoggerConfig; -use graph_store_postgres::connection_pool::ConnectionPool; use graph_store_postgres::graphman::GraphmanStore; +use graph_store_postgres::ConnectionPool; use graph_store_postgres::NotificationSender; use graph_store_postgres::Store; use slog::{info, Logger}; diff --git a/server/index-node/Cargo.toml b/server/index-node/Cargo.toml index 72b7ff869f7..63c68a311a8 100644 --- a/server/index-node/Cargo.toml +++ b/server/index-node/Cargo.toml @@ -11,4 +11,4 @@ graph-chain-arweave = { path = "../../chain/arweave" } graph-chain-ethereum = { path = "../../chain/ethereum" } graph-chain-near = { path = "../../chain/near" } graph-chain-substreams = { path = "../../chain/substreams" } -git-testament = "0.2.5" +git-testament = "0.2.6" diff --git a/server/index-node/src/resolver.rs b/server/index-node/src/resolver.rs index a60e5d35fd9..7974afe41db 100644 --- a/server/index-node/src/resolver.rs +++ b/server/index-node/src/resolver.rs @@ -777,8 +777,8 @@ fn entity_changes_to_graphql(entity_changes: Vec) -> r::Value { impl Resolver for IndexNodeResolver { const CACHEABLE: bool = false; - async fn query_permit(&self) -> Result { - self.store.query_permit().await.map_err(Into::into) + async fn query_permit(&self) -> QueryPermit { + self.store.query_permit().await } fn prefetch( diff --git a/store/postgres/Cargo.toml b/store/postgres/Cargo.toml index 9a746646807..c95b3cb83a4 100644 --- a/store/postgres/Cargo.toml +++ b/store/postgres/Cargo.toml @@ -7,7 +7,7 @@ edition.workspace = true async-trait = "0.1.50" blake3 = "1.6" chrono = { workspace = true } -derive_more = { version = "0.99.18" } +derive_more = { version = "2.0.1", features = ["full"] } diesel = { workspace = true } diesel-dynamic-schema = { workspace = true } diesel-derive-enum = { workspace = true } @@ -21,17 +21,17 @@ lazy_static = "1.5" lru_time_cache = "0.11" maybe-owned = "0.3.4" postgres = "0.19.1" -openssl = "0.10.71" +openssl = "0.10.72" postgres-openssl = "0.5.0" rand = "0.8.4" serde = { workspace = true } serde_json = { workspace = true } stable-hash_legacy = { git = "https://github.com/graphprotocol/stable-hash", branch = "old", package = "stable-hash" } anyhow = "1.0.86" -git-testament = "0.2.5" +git-testament = "0.2.6" itertools = "0.13.0" hex = "0.4.3" -pretty_assertions = "1.4.0" +pretty_assertions = "1.4.1" [dev-dependencies] clap.workspace = true diff --git a/store/postgres/migrations/2025-04-08-224710_add_prune_state/down.sql b/store/postgres/migrations/2025-04-08-224710_add_prune_state/down.sql new file mode 100644 index 00000000000..324bc18f154 --- /dev/null +++ b/store/postgres/migrations/2025-04-08-224710_add_prune_state/down.sql @@ -0,0 +1,2 @@ +drop table subgraphs.prune_table_state; +drop table subgraphs.prune_state; diff --git a/store/postgres/migrations/2025-04-08-224710_add_prune_state/up.sql b/store/postgres/migrations/2025-04-08-224710_add_prune_state/up.sql new file mode 100644 index 00000000000..8c767ed7384 --- /dev/null +++ b/store/postgres/migrations/2025-04-08-224710_add_prune_state/up.sql @@ -0,0 +1,60 @@ +create table subgraphs.prune_state( + -- diesel can't deal with composite primary keys + vid int primary key + generated always as identity, + + -- id of the deployment + id int not null, + -- how many times the deployment has been pruned + run int not null, + + -- from PruneRequest + first_block int not null, + final_block int not null, + latest_block int not null, + history_blocks int not null, + + started_at timestamptz not null, + finished_at timestamptz, + + constraint prune_state_id_run_uq unique(id, run) +); + +create table subgraphs.prune_table_state( + -- diesel can't deal with composite primary keys + vid int primary key + generated always as identity, + + id int not null, + run int not null, + table_name text not null, + -- 'r' (rebuild) or 'd' (delete) + strategy char not null, + phase text not null, + + start_vid int8, + final_vid int8, + nonfinal_vid int8, + rows int8, + + next_vid int8, + batch_size int8, + + started_at timestamptz, + finished_at timestamptz, + + constraint prune_table_state_id_run_table_name_uq + unique(id, run, table_name), + + constraint prune_table_state_strategy_ck + check(strategy in ('r', 'd')), + + constraint prune_table_state_phase_ck + check(phase in ('queued', 'started', 'copy_final', + 'copy_nonfinal', 'delete', 'done')), + + constraint prune_table_state_id_run_fk + foreign key(id, run) + references subgraphs.prune_state(id, run) + on delete cascade +); diff --git a/store/postgres/src/advisory_lock.rs b/store/postgres/src/advisory_lock.rs index bd60d34c634..85e2cf5a4ae 100644 --- a/store/postgres/src/advisory_lock.rs +++ b/store/postgres/src/advisory_lock.rs @@ -6,7 +6,7 @@ //! has more details on advisory locks. //! //! We use the following 64 bit locks: -//! * 1,2: to synchronize on migratons +//! * 1: to synchronize on migratons //! //! We use the following 2x 32-bit locks //! * 1, n: to lock copying of the deployment with id n in the destination @@ -69,17 +69,31 @@ const COPY: Scope = Scope { id: 1 }; const WRITE: Scope = Scope { id: 2 }; const PRUNE: Scope = Scope { id: 3 }; -/// Get a lock for running migrations. Blocks until we get the lock. -pub(crate) fn lock_migration(conn: &mut PgConnection) -> Result<(), StoreError> { - sql_query("select pg_advisory_lock(1)").execute(conn)?; +/// Block until we can get the migration lock, then run `f` and unlock when +/// it is done. This is used to make sure that only one node runs setup at a +/// time. +pub(crate) async fn with_migration_lock( + conn: &mut PgConnection, + f: F, +) -> Result +where + F: FnOnce(&mut PgConnection) -> Fut, + Fut: std::future::Future>, +{ + fn execute(conn: &mut PgConnection, query: &str, msg: &str) -> Result<(), StoreError> { + sql_query(query).execute(conn).map(|_| ()).map_err(|e| { + StoreError::from_diesel_error(&e) + .unwrap_or_else(|| StoreError::Unknown(anyhow::anyhow!("{}: {}", msg, e))) + }) + } - Ok(()) -} + const LOCK: &str = "select pg_advisory_lock(1)"; + const UNLOCK: &str = "select pg_advisory_unlock(1)"; -/// Release the migration lock. -pub(crate) fn unlock_migration(conn: &mut PgConnection) -> Result<(), StoreError> { - sql_query("select pg_advisory_unlock(1)").execute(conn)?; - Ok(()) + execute(conn, LOCK, "failed to acquire migration lock")?; + let res = f(conn).await; + execute(conn, UNLOCK, "failed to release migration lock")?; + res } /// Take the lock used to keep two copy operations to run simultaneously on diff --git a/store/postgres/src/block_range.rs b/store/postgres/src/block_range.rs index 7dbcaa29c00..d6044c644ad 100644 --- a/store/postgres/src/block_range.rs +++ b/store/postgres/src/block_range.rs @@ -1,3 +1,4 @@ +use derive_more::Constructor; use diesel::pg::Pg; use diesel::query_builder::{AstPass, QueryFragment}; use diesel::result::QueryResult; diff --git a/store/postgres/src/block_store.rs b/store/postgres/src/block_store.rs index 9af40b8d2a0..d34915248b3 100644 --- a/store/postgres/src/block_store.rs +++ b/store/postgres/src/block_store.rs @@ -17,13 +17,13 @@ use graph::{ prelude::{error, info, BlockNumber, BlockPtr, Logger, ENV_VARS}, slog::o, }; -use graph::{constraint_violation, prelude::CheapClone}; +use graph::{internal_error, prelude::CheapClone}; use graph::{prelude::StoreError, util::timed_cache::TimedCache}; use crate::{ chain_head_listener::ChainHeadUpdateSender, chain_store::{ChainStoreMetrics, Storage}, - connection_pool::ConnectionPool, + pool::ConnectionPool, primary::Mirror as PrimaryMirror, ChainStore, NotificationSender, Shard, PRIMARY_SHARD, }; @@ -55,12 +55,12 @@ pub mod primary { }; use graph::{ blockchain::{BlockHash, ChainIdentifier}, - constraint_violation, + internal_error, prelude::StoreError, }; use crate::chain_store::Storage; - use crate::{connection_pool::ConnectionPool, Shard}; + use crate::{ConnectionPool, Shard}; table! { chains(id) { @@ -92,7 +92,7 @@ pub mod primary { net_version: self.net_version.clone(), genesis_block_hash: BlockHash::try_from(self.genesis_block.as_str()).map_err( |e| { - constraint_violation!( + internal_error!( "the genesis block hash `{}` for chain `{}` is not a valid hash: {}", self.genesis_block, self.name, @@ -319,11 +319,7 @@ impl BlockStore { } pub(crate) async fn query_permit_primary(&self) -> QueryPermit { - self.mirror - .primary() - .query_permit() - .await - .expect("the primary is never disabled") + self.mirror.primary().query_permit().await } pub fn allocate_chain( @@ -370,7 +366,7 @@ impl BlockStore { let pool = self .pools .get(&chain.shard) - .ok_or_else(|| constraint_violation!("there is no pool for shard {}", chain.shard))? + .ok_or_else(|| internal_error!("there is no pool for shard {}", chain.shard))? .clone(); let sender = ChainHeadUpdateSender::new( self.mirror.primary().clone(), @@ -431,7 +427,7 @@ impl BlockStore { pub fn chain_head_block(&self, chain: &str) -> Result, StoreError> { let store = self .store(chain) - .ok_or_else(|| constraint_violation!("unknown network `{}`", chain))?; + .ok_or_else(|| internal_error!("unknown network `{}`", chain))?; store.chain_head_block(chain) } @@ -470,7 +466,7 @@ impl BlockStore { pub fn drop_chain(&self, chain: &str) -> Result<(), StoreError> { let chain_store = self .store(chain) - .ok_or_else(|| constraint_violation!("unknown chain {}", chain))?; + .ok_or_else(|| internal_error!("unknown chain {}", chain))?; // Delete from the primary first since that's where // deployment_schemas has a fk constraint on chains @@ -507,7 +503,7 @@ impl BlockStore { }; if let Some(head_block) = store.remove_cursor(&&store.chain)? { - let lower_bound = head_block.saturating_sub(ENV_VARS.reorg_threshold * 2); + let lower_bound = head_block.saturating_sub(ENV_VARS.reorg_threshold() * 2); info!(&self.logger, "Removed cursor for non-firehose chain, now cleaning shallow blocks"; "network" => &store.chain, "lower_bound" => lower_bound); store.cleanup_shallow_blocks(lower_bound)?; } diff --git a/store/postgres/src/catalog.rs b/store/postgres/src/catalog.rs index 1524a768acc..a6767082555 100644 --- a/store/postgres/src/catalog.rs +++ b/store/postgres/src/catalog.rs @@ -22,8 +22,8 @@ use graph::{ prelude::{lazy_static, StoreError}, }; -use crate::connection_pool::ForeignServer; use crate::{ + pool::ForeignServer, primary::{Namespace, Site, NAMESPACE_PUBLIC}, relational::SqlName, }; @@ -398,6 +398,16 @@ pub fn drop_foreign_schema(conn: &mut PgConnection, src: &Site) -> Result<(), St Ok(()) } +pub fn foreign_tables(conn: &mut PgConnection, nsp: &str) -> Result, StoreError> { + use foreign_tables as ft; + + ft::table + .filter(ft::foreign_table_schema.eq(nsp)) + .select(ft::foreign_table_name) + .get_results::(conn) + .map_err(StoreError::from) +} + /// Drop the schema `nsp` and all its contents if it exists, and create it /// again so that `nsp` is an empty schema pub fn recreate_schema(conn: &mut PgConnection, nsp: &str) -> Result<(), StoreError> { diff --git a/store/postgres/src/chain_head_listener.rs b/store/postgres/src/chain_head_listener.rs index b10ab46529f..301c1f19209 100644 --- a/store/postgres/src/chain_head_listener.rs +++ b/store/postgres/src/chain_head_listener.rs @@ -11,8 +11,8 @@ use std::sync::Arc; use lazy_static::lazy_static; use crate::{ - connection_pool::ConnectionPool, notification_listener::{JsonNotification, NotificationListener, SafeChannelName}, + pool::ConnectionPool, NotificationSender, }; use graph::blockchain::ChainHeadUpdateListener as ChainHeadUpdateListenerTrait; diff --git a/store/postgres/src/chain_store.rs b/store/postgres/src/chain_store.rs index 097aa799eff..a94c44a8870 100644 --- a/store/postgres/src/chain_store.rs +++ b/store/postgres/src/chain_store.rs @@ -30,12 +30,11 @@ use graph::prelude::{ BlockPtr, CachedEthereumCall, CancelableError, ChainStore as ChainStoreTrait, Error, EthereumCallCache, StoreError, }; -use graph::{constraint_violation, ensure}; +use graph::{ensure, internal_error}; use self::recent_blocks_cache::RecentBlocksCache; use crate::{ - block_store::ChainStatus, chain_head_listener::ChainHeadUpdateSender, - connection_pool::ConnectionPool, + block_store::ChainStatus, chain_head_listener::ChainHeadUpdateSender, pool::ConnectionPool, }; /// Our own internal notion of a block @@ -98,8 +97,8 @@ mod data { update, }; use graph::blockchain::{Block, BlockHash}; - use graph::constraint_violation; use graph::data::store::scalar::Bytes; + use graph::internal_error; use graph::prelude::ethabi::ethereum_types::H160; use graph::prelude::transaction_receipt::LightTransactionReceipt; use graph::prelude::web3::types::H256; @@ -176,7 +175,7 @@ mod data { if bytes.len() == H256::len_bytes() { Ok(H256::from_slice(bytes)) } else { - Err(constraint_violation!( + Err(internal_error!( "invalid H256 value `{}` has {} bytes instead of {}", graph::prelude::hex::encode(bytes), bytes.len(), @@ -1840,7 +1839,7 @@ impl ChainStore { number.map(|number| number.try_into()).transpose().map_err( |e: std::num::TryFromIntError| { - constraint_violation!( + internal_error!( "head block number for {} is {:?} which does not fit into a u32: {}", chain, number, @@ -2792,7 +2791,7 @@ impl EthereumCallCache for ChainStore { let mut resps = Vec::new(); for (id, retval, _) in rows { let idx = ids.iter().position(|i| i.as_ref() == id).ok_or_else(|| { - constraint_violation!( + internal_error!( "get_calls returned a call id that was not requested: {}", hex::encode(id) ) diff --git a/store/postgres/src/copy.rs b/store/postgres/src/copy.rs index f2f7e9f1d66..58420b053a5 100644 --- a/store/postgres/src/copy.rs +++ b/store/postgres/src/copy.rs @@ -13,12 +13,17 @@ //! `graph-node` was restarted while the copy was running. use std::{ convert::TryFrom, - ops::DerefMut, - sync::Arc, + future::Future, + pin::Pin, + sync::{ + atomic::{AtomicBool, AtomicI64, Ordering}, + Arc, Mutex, + }, time::{Duration, Instant}, }; use diesel::{ + connection::SimpleConnection as _, dsl::sql, insert_into, r2d2::{ConnectionManager, PooledConnection}, @@ -26,21 +31,26 @@ use diesel::{ QueryDsl, RunQueryDsl, }; use graph::{ - constraint_violation, - prelude::{info, o, warn, BlockNumber, BlockPtr, Logger, StoreError}, + futures03::{future::select_all, FutureExt as _}, + internal_error, + prelude::{ + info, lazy_static, o, warn, BlockNumber, BlockPtr, CheapClone, Logger, StoreError, ENV_VARS, + }, schema::EntityType, + slog::error, + tokio, }; use itertools::Itertools; use crate::{ advisory_lock, catalog, deployment, dynds::DataSourcesTable, - primary::{DeploymentId, Site}, - relational::index::IndexList, + primary::{DeploymentId, Primary, Site}, + relational::{index::IndexList, Layout, Table}, + relational_queries as rq, vid_batcher::{VidBatcher, VidRange}, + ConnectionPool, }; -use crate::{connection_pool::ConnectionPool, relational::Layout}; -use crate::{relational::Table, relational_queries as rq}; const LOG_INTERVAL: Duration = Duration::from_secs(3 * 60); @@ -54,6 +64,13 @@ const ACCEPTABLE_REPLICATION_LAG: Duration = Duration::from_secs(30); /// the lag again const REPLICATION_SLEEP: Duration = Duration::from_secs(10); +lazy_static! { + static ref STATEMENT_TIMEOUT: Option = ENV_VARS + .store + .batch_timeout + .map(|duration| format!("set local statement_timeout={}", duration.as_millis())); +} + table! { subgraphs.copy_state(dst) { // deployment_schemas.id @@ -85,32 +102,24 @@ table! { } } -// This is the same as primary::active_copies, but mapped into each shard -table! { - primary_public.active_copies(dst) { - src -> Integer, - dst -> Integer, - cancelled_at -> Nullable, - } -} - -#[derive(Copy, Clone, PartialEq, Eq)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum Status { Finished, Cancelled, } -#[allow(dead_code)] struct CopyState { src: Arc, dst: Arc, target_block: BlockPtr, - tables: Vec, + finished: Vec, + unfinished: Vec, } impl CopyState { fn new( conn: &mut PgConnection, + primary: Primary, src: Arc, dst: Arc, target_block: BlockPtr, @@ -131,7 +140,7 @@ impl CopyState { Some((src_id, hash, number)) => { let stored_target_block = BlockPtr::from((hash, number)); if stored_target_block != target_block { - return Err(constraint_violation!( + return Err(internal_error!( "CopyState {} for copying {} to {} has incompatible block pointer {} instead of {}", dst.site.id, src.site.deployment, @@ -140,7 +149,7 @@ impl CopyState { target_block)); } if src_id != src.site.id { - return Err(constraint_violation!( + return Err(internal_error!( "CopyState {} for copying {} to {} has incompatible source {} instead of {}", dst.site.id, src.site.deployment, @@ -149,9 +158,9 @@ impl CopyState { src.site.id )); } - Self::load(conn, src, dst, target_block) + Self::load(conn, primary, src, dst, target_block) } - None => Self::create(conn, src, dst, target_block), + None => Self::create(conn, primary.cheap_clone(), src, dst, target_block), }?; Ok(state) @@ -159,21 +168,27 @@ impl CopyState { fn load( conn: &mut PgConnection, + primary: Primary, src: Arc, dst: Arc, target_block: BlockPtr, ) -> Result { - let tables = TableState::load(conn, src.as_ref(), dst.as_ref())?; + let tables = TableState::load(conn, primary, src.as_ref(), dst.as_ref())?; + let (finished, mut unfinished): (Vec<_>, Vec<_>) = + tables.into_iter().partition(|table| table.finished()); + unfinished.sort_by_key(|table| table.dst.object.to_string()); Ok(CopyState { src, dst, target_block, - tables, + finished, + unfinished, }) } fn create( conn: &mut PgConnection, + primary: Primary, src: Arc, dst: Arc, target_block: BlockPtr, @@ -190,7 +205,7 @@ impl CopyState { )) .execute(conn)?; - let mut tables: Vec<_> = dst + let mut unfinished: Vec<_> = dst .tables .values() .filter_map(|dst_table| { @@ -199,6 +214,7 @@ impl CopyState { .map(|src_table| { TableState::init( conn, + primary.cheap_clone(), dst.site.clone(), &src, src_table.clone(), @@ -208,9 +224,9 @@ impl CopyState { }) }) .collect::>()?; - tables.sort_by_key(|table| table.dst.object.to_string()); + unfinished.sort_by_key(|table| table.dst.object.to_string()); - let values = tables + let values = unfinished .iter() .map(|table| { ( @@ -228,7 +244,8 @@ impl CopyState { src, dst, target_block, - tables, + finished: Vec::new(), + unfinished, }) } @@ -258,7 +275,7 @@ impl CopyState { // drop_foreign_schema does), see that we do not have // metadata for `src` if crate::deployment::exists(conn, &self.src.site)? { - return Err(constraint_violation!( + return Err(internal_error!( "we think we are copying {}[{}] across shards from {} to {}, but the \ source subgraph is actually in this shard", self.src.site.deployment, @@ -272,6 +289,10 @@ impl CopyState { } Ok(()) } + + fn all_tables(&self) -> impl Iterator { + self.finished.iter().chain(self.unfinished.iter()) + } } pub(crate) fn source( @@ -295,6 +316,7 @@ pub(crate) fn source( /// transformation. See `CopyEntityBatchQuery` for the details of what /// exactly that means struct TableState { + primary: Primary, src: Arc, dst: Arc
, dst_site: Arc, @@ -305,6 +327,7 @@ struct TableState { impl TableState { fn init( conn: &mut PgConnection, + primary: Primary, dst_site: Arc, src_layout: &Layout, src: Arc
, @@ -314,6 +337,7 @@ impl TableState { let vid_range = VidRange::for_copy(conn, &src, target_block)?; let batcher = VidBatcher::load(conn, &src_layout.site.namespace, src.as_ref(), vid_range)?; Ok(Self { + primary, src, dst, dst_site, @@ -328,6 +352,7 @@ impl TableState { fn load( conn: &mut PgConnection, + primary: Primary, src_layout: &Layout, dst_layout: &Layout, ) -> Result, StoreError> { @@ -343,7 +368,7 @@ impl TableState { layout .table_for_entity(entity_type) .map_err(|e| { - constraint_violation!( + internal_error!( "invalid {} table {} in CopyState {} (table {}): {}", kind, entity_type, @@ -391,6 +416,7 @@ impl TableState { .with_batch_size(size as usize); Ok(TableState { + primary: primary.cheap_clone(), src, dst, dst_site: dst_layout.site.clone(), @@ -457,13 +483,8 @@ impl TableState { } fn is_cancelled(&self, conn: &mut PgConnection) -> Result { - use active_copies as ac; - let dst = self.dst_site.as_ref(); - let canceled = ac::table - .filter(ac::dst.eq(dst.id)) - .select(ac::cancelled_at.is_not_null()) - .get_result::(conn)?; + let canceled = self.primary.is_copy_cancelled(dst)?; if canceled { use copy_state as cs; @@ -495,37 +516,57 @@ impl TableState { Ok(Status::Finished) } + + fn set_batch_size(&mut self, conn: &mut PgConnection, size: usize) -> Result<(), StoreError> { + use copy_table_state as cts; + + self.batcher.set_batch_size(size); + + update( + cts::table + .filter(cts::dst.eq(self.dst_site.id)) + .filter(cts::entity_type.eq(self.dst.object.as_str())), + ) + .set(cts::batch_size.eq(self.batcher.batch_size() as i64)) + .execute(conn)?; + + Ok(()) + } } -// A helper for logging progress while data is being copied -struct CopyProgress<'a> { - logger: &'a Logger, - last_log: Instant, +// A helper for logging progress while data is being copied and +// communicating across all copy workers +struct CopyProgress { + logger: Logger, + last_log: Arc>, src: Arc, dst: Arc, - current_vid: i64, + /// The sum of all `target_vid` of tables that have finished + current_vid: AtomicI64, target_vid: i64, + cancelled: AtomicBool, } -impl<'a> CopyProgress<'a> { - fn new(logger: &'a Logger, state: &CopyState) -> Self { +impl CopyProgress { + fn new(logger: Logger, state: &CopyState) -> Self { let target_vid: i64 = state - .tables - .iter() + .all_tables() .map(|table| table.batcher.target_vid()) .sum(); let current_vid = state - .tables - .iter() + .all_tables() + .filter(|table| table.finished()) .map(|table| table.batcher.next_vid()) .sum(); + let current_vid = AtomicI64::new(current_vid); Self { logger, - last_log: Instant::now(), + last_log: Arc::new(Mutex::new(Instant::now())), src: state.src.site.clone(), dst: state.dst.site.clone(), current_vid, target_vid, + cancelled: AtomicBool::new(false), } } @@ -540,6 +581,16 @@ impl<'a> CopyProgress<'a> { ); } + fn start_table(&self, table: &TableState) { + info!( + self.logger, + "Starting to copy `{}` entities from {} to {}", + table.dst.object, + table.src.qualified_name, + table.dst.qualified_name + ); + } + fn progress_pct(current_vid: i64, target_vid: i64) -> f64 { // When a step is done, current_vid == target_vid + 1; don't report // more than 100% completion @@ -550,8 +601,21 @@ impl<'a> CopyProgress<'a> { } } - fn update(&mut self, entity_type: &EntityType, batcher: &VidBatcher) { - if self.last_log.elapsed() > LOG_INTERVAL { + fn update(&self, entity_type: &EntityType, batcher: &VidBatcher) { + let mut last_log = self.last_log.lock().unwrap_or_else(|err| { + // Better to clear the poison error and skip a log message than + // crash for no important reason + warn!( + self.logger, + "Lock for progress locking was poisoned, skipping a log message" + ); + let mut last_log = err.into_inner(); + *last_log = Instant::now(); + self.last_log.clear_poison(); + last_log + }); + if last_log.elapsed() > LOG_INTERVAL { + let total_current_vid = self.current_vid.load(Ordering::SeqCst) + batcher.next_vid(); info!( self.logger, "Copied {:.2}% of `{}` entities ({}/{} entity versions), {:.2}% of overall data", @@ -559,14 +623,15 @@ impl<'a> CopyProgress<'a> { entity_type, batcher.next_vid(), batcher.target_vid(), - Self::progress_pct(self.current_vid + batcher.next_vid(), self.target_vid) + Self::progress_pct(total_current_vid, self.target_vid) ); - self.last_log = Instant::now(); + *last_log = Instant::now(); } } - fn table_finished(&mut self, batcher: &VidBatcher) { - self.current_vid += batcher.next_vid(); + fn table_finished(&self, batcher: &VidBatcher) { + self.current_vid + .fetch_add(batcher.next_vid(), Ordering::SeqCst); } fn finished(&self) { @@ -575,6 +640,262 @@ impl<'a> CopyProgress<'a> { "Finished copying data into {}[{}]", self.dst.deployment, self.dst.namespace ); } + + fn cancel(&self) { + self.cancelled.store(true, Ordering::SeqCst); + } + + fn is_cancelled(&self) -> bool { + self.cancelled.load(Ordering::SeqCst) + } +} + +enum WorkerResult { + Ok(CopyTableWorker), + Err(StoreError), + Wake, +} + +impl From> for WorkerResult { + fn from(result: Result) -> Self { + match result { + Ok(worker) => WorkerResult::Ok(worker), + Err(e) => WorkerResult::Err(e), + } + } +} + +/// We pass connections back and forth between the control loop and various +/// workers. We need to make sure that we end up with the connection that +/// was used to acquire the copy lock in the right place so we can release +/// the copy lock which is only possible with the connection that acquired +/// it. +/// +/// This struct helps us with that. It wraps a connection and tracks whether +/// the connection was used to acquire the copy lock +struct LockTrackingConnection { + inner: PooledConnection>, + has_lock: bool, +} + +impl LockTrackingConnection { + fn new(inner: PooledConnection>) -> Self { + Self { + inner, + has_lock: false, + } + } + + fn transaction(&mut self, f: F) -> Result + where + F: FnOnce(&mut PgConnection) -> Result, + { + let conn = &mut self.inner; + conn.transaction(|conn| f(conn)) + } + + /// Put `self` into `other` if `self` has the lock. + fn extract(self, other: &mut Option) { + if self.has_lock { + *other = Some(self); + } + } + + fn lock(&mut self, logger: &Logger, dst: &Site) -> Result<(), StoreError> { + if self.has_lock { + warn!(logger, "already acquired copy lock for {}", dst); + return Ok(()); + } + advisory_lock::lock_copying(&mut self.inner, dst)?; + self.has_lock = true; + Ok(()) + } + + fn unlock(&mut self, logger: &Logger, dst: &Site) -> Result<(), StoreError> { + if !self.has_lock { + error!( + logger, + "tried to release copy lock for {} even though we are not the owner", dst + ); + return Ok(()); + } + advisory_lock::unlock_copying(&mut self.inner, dst)?; + self.has_lock = false; + Ok(()) + } +} + +/// A helper to run copying of one table. We need to thread `conn` and +/// `table` from the control loop to the background worker and back again to +/// the control loop. This worker facilitates that +struct CopyTableWorker { + conn: LockTrackingConnection, + table: TableState, + result: Result, +} + +impl CopyTableWorker { + fn new(conn: LockTrackingConnection, table: TableState) -> Self { + Self { + conn, + table, + result: Ok(Status::Cancelled), + } + } + + async fn run(mut self, logger: Logger, progress: Arc) -> WorkerResult { + let object = self.table.dst.object.cheap_clone(); + graph::spawn_blocking_allow_panic(move || { + self.result = self.run_inner(logger, &progress); + self + }) + .await + .map_err(|e| internal_error!("copy worker for {} panicked: {}", object, e)) + .into() + } + + fn run_inner(&mut self, logger: Logger, progress: &CopyProgress) -> Result { + use Status::*; + + let conn = &mut self.conn.inner; + progress.start_table(&self.table); + while !self.table.finished() { + // It is important that this check happens outside the write + // transaction so that we do not hold on to locks acquired + // by the check + if self.table.is_cancelled(conn)? || progress.is_cancelled() { + progress.cancel(); + return Ok(Cancelled); + } + + // Pause copying if replication is lagging behind to avoid + // overloading replicas + let mut lag = catalog::replication_lag(conn)?; + if lag > MAX_REPLICATION_LAG { + loop { + info!(logger, + "Replicas are lagging too much; pausing copying for {}s to allow them to catch up", + REPLICATION_SLEEP.as_secs(); + "lag_s" => lag.as_secs()); + std::thread::sleep(REPLICATION_SLEEP); + lag = catalog::replication_lag(conn)?; + if lag <= ACCEPTABLE_REPLICATION_LAG { + break; + } + } + } + + let status = { + loop { + if progress.is_cancelled() { + break Cancelled; + } + + match conn.transaction(|conn| { + if let Some(timeout) = STATEMENT_TIMEOUT.as_ref() { + conn.batch_execute(timeout)?; + } + self.table.copy_batch(conn) + }) { + Ok(status) => { + break status; + } + Err(StoreError::StatementTimeout) => { + let timeout = ENV_VARS + .store + .batch_timeout + .map(|t| t.as_secs().to_string()) + .unwrap_or_else(|| "unlimted".to_string()); + warn!( + logger, + "Current batch timed out. Retrying with a smaller batch size."; + "timeout_s" => timeout, + "table" => self.table.dst.qualified_name.as_str(), + "current_vid" => self.table.batcher.next_vid(), + "current_batch_size" => self.table.batcher.batch_size(), + ); + } + Err(e) => { + return Err(e); + } + } + // We hit a timeout. Reset the batch size to 1. + // That's small enough that we will make _some_ + // progress, assuming the timeout is set to a + // reasonable value (several minutes) + // + // Our estimation of batch sizes is generally good + // and stays within the prescribed bounds, but there + // are cases where proper estimation of the batch + // size is nearly impossible since the size of the + // rows in the table jumps sharply at some point + // that is hard to predict. This mechanism ensures + // that if our estimation is wrong, the consequences + // aren't too severe. + conn.transaction(|conn| self.table.set_batch_size(conn, 1))?; + } + }; + + if status == Cancelled { + progress.cancel(); + return Ok(Cancelled); + } + progress.update(&self.table.dst.object, &self.table.batcher); + } + progress.table_finished(&self.table.batcher); + Ok(Finished) + } +} + +/// A helper to manage the workers that are copying data. Besides the actual +/// workers it also keeps a worker that wakes us up periodically to give us +/// a chance to create more workers if there are database connections +/// available +struct Workers { + /// The list of workers that are currently running. This will always + /// include a future that wakes us up periodically + futures: Vec>>>, +} + +impl Workers { + fn new() -> Self { + Self { + futures: vec![Self::waker()], + } + } + + fn add(&mut self, worker: Pin>>) { + self.futures.push(worker); + } + + fn has_work(&self) -> bool { + self.futures.len() > 1 + } + + async fn select(&mut self) -> WorkerResult { + use WorkerResult::*; + + let futures = std::mem::take(&mut self.futures); + let (result, _idx, remaining) = select_all(futures).await; + self.futures = remaining; + match result { + Ok(_) | Err(_) => { /* nothing to do */ } + Wake => { + self.futures.push(Self::waker()); + } + } + result + } + + fn waker() -> Pin>> { + let sleep = tokio::time::sleep(ENV_VARS.store.batch_target_duration); + Box::pin(sleep.map(|()| WorkerResult::Wake)) + } + + /// Return the number of workers that are not the waker + fn len(&self) -> usize { + self.futures.len() - 1 + } } /// A helper for copying subgraphs @@ -582,12 +903,25 @@ pub struct Connection { /// The connection pool for the shard that will contain the destination /// of the copy logger: Logger, - conn: PooledConnection>, + /// We always have one database connection to make sure that copy jobs, + /// once started, can eventually finished so that we don't have + /// different copy jobs that are all half done and have to wait for + /// other jobs to finish + /// + /// This is an `Option` because we need to take this connection out of + /// `self` at some point to spawn a background task to copy an + /// individual table. Except for that case, this will always be + /// `Some(..)`. Most code shouldn't access `self.conn` directly, but use + /// `self.transaction` + conn: Option, + pool: ConnectionPool, + primary: Primary, + workers: usize, src: Arc, dst: Arc, target_block: BlockPtr, - src_manifest_idx_and_name: Vec<(i32, String)>, - dst_manifest_idx_and_name: Vec<(i32, String)>, + src_manifest_idx_and_name: Arc>, + dst_manifest_idx_and_name: Arc>, } impl Connection { @@ -599,6 +933,7 @@ impl Connection { /// is available. pub fn new( logger: &Logger, + primary: Primary, pool: ConnectionPool, src: Arc, dst: Arc, @@ -609,7 +944,7 @@ impl Connection { let logger = logger.new(o!("dst" => dst.site.namespace.to_string())); if src.site.schema_version != dst.site.schema_version { - return Err(StoreError::ConstraintViolation(format!( + return Err(StoreError::InternalError(format!( "attempted to copy between different schema versions, \ source version is {} but destination version is {}", src.site.schema_version, dst.site.schema_version @@ -624,9 +959,15 @@ impl Connection { } false })?; + let src_manifest_idx_and_name = Arc::new(src_manifest_idx_and_name); + let dst_manifest_idx_and_name = Arc::new(dst_manifest_idx_and_name); + let conn = Some(LockTrackingConnection::new(conn)); Ok(Self { logger, conn, + pool, + primary, + workers: ENV_VARS.store.batch_workers, src, dst, target_block, @@ -639,104 +980,249 @@ impl Connection { where F: FnOnce(&mut PgConnection) -> Result, { - self.conn.transaction(|conn| f(conn)) + let Some(conn) = self.conn.as_mut() else { + return Err(internal_error!( + "copy connection has been handed to background task but not returned yet (transaction)" + )); + }; + conn.transaction(|conn| f(conn)) } /// Copy private data sources if the source uses a schema version that /// has a private data sources table. The copying is done in its own /// transaction. fn copy_private_data_sources(&mut self, state: &CopyState) -> Result<(), StoreError> { + let src_manifest_idx_and_name = self.src_manifest_idx_and_name.cheap_clone(); + let dst_manifest_idx_and_name = self.dst_manifest_idx_and_name.cheap_clone(); if state.src.site.schema_version.private_data_sources() { - let conn = &mut self.conn; - conn.transaction(|conn| { + self.transaction(|conn| { DataSourcesTable::new(state.src.site.namespace.clone()).copy_to( conn, &DataSourcesTable::new(state.dst.site.namespace.clone()), state.target_block.number, - &self.src_manifest_idx_and_name, - &self.dst_manifest_idx_and_name, + &src_manifest_idx_and_name, + &dst_manifest_idx_and_name, ) })?; } Ok(()) } - pub fn copy_data_internal(&mut self, index_list: IndexList) -> Result { + /// Create a worker using the connection in `self.conn`. This may return + /// `None` if there are no more tables that need to be copied. It is an + /// error to call this if `self.conn` is `None` + fn default_worker( + &mut self, + state: &mut CopyState, + progress: &Arc, + ) -> Option>>> { + let Some(conn) = self.conn.take() else { + return None; + }; + let Some(table) = state.unfinished.pop() else { + self.conn = Some(conn); + return None; + }; + + let worker = CopyTableWorker::new(conn, table); + Some(Box::pin( + worker.run(self.logger.cheap_clone(), progress.cheap_clone()), + )) + } + + /// Opportunistically create an extra worker if we have more tables to + /// copy and there are idle fdw connections. If there are no more tables + /// or no idle connections, this will return `None`. + fn extra_worker( + &mut self, + state: &mut CopyState, + progress: &Arc, + ) -> Option>>> { + // It's important that we get the connection before the table since + // we remove the table from the state and could drop it otherwise + let Some(conn) = self + .pool + .try_get_fdw(&self.logger, ENV_VARS.store.batch_worker_wait) + else { + return None; + }; + let Some(table) = state.unfinished.pop() else { + return None; + }; + let conn = LockTrackingConnection::new(conn); + + let worker = CopyTableWorker::new(conn, table); + Some(Box::pin( + worker.run(self.logger.cheap_clone(), progress.cheap_clone()), + )) + } + + /// Check that we can make progress, i.e., that we have at least one + /// worker that copies as long as there are unfinished tables. This is a + /// safety check to guard against `copy_data_internal` looping forever + /// because of some internal inconsistency + fn assert_progress(&self, num_workers: usize, state: &CopyState) -> Result<(), StoreError> { + if num_workers == 0 && !state.unfinished.is_empty() { + // Something bad happened. We should have at least one + // worker if there are still tables to copy + if self.conn.is_none() { + return Err(internal_error!( + "copy connection has been handed to background task but not returned yet (copy_data_internal)" + )); + } else { + return Err(internal_error!("no workers left but still tables to copy")); + } + } + Ok(()) + } + + /// Wait for all workers to finish. This is called when we a worker has + /// failed with an error that forces us to abort copying + async fn cancel_workers(&mut self, progress: Arc, mut workers: Workers) { + progress.cancel(); + error!( + self.logger, + "copying encountered an error; waiting for all workers to finish" + ); + while workers.has_work() { + use WorkerResult::*; + let result = workers.select().await; + match result { + Ok(worker) => { + worker.conn.extract(&mut self.conn); + } + Err(e) => { + /* Ignore; we had an error previously */ + error!(self.logger, "copy worker panicked: {}", e); + } + Wake => { /* Ignore; this is just a waker */ } + } + } + } + + async fn copy_data_internal(&mut self, index_list: IndexList) -> Result { let src = self.src.clone(); let dst = self.dst.clone(); let target_block = self.target_block.clone(); - let mut state = self.transaction(|conn| CopyState::new(conn, src, dst, target_block))?; + let primary = self.primary.cheap_clone(); + let mut state = + self.transaction(|conn| CopyState::new(conn, primary, src, dst, target_block))?; - let logger = &self.logger.clone(); - let mut progress = CopyProgress::new(logger, &state); + let progress = Arc::new(CopyProgress::new(self.logger.cheap_clone(), &state)); progress.start(); - for table in state.tables.iter_mut().filter(|table| !table.finished()) { - while !table.finished() { - // It is important that this check happens outside the write - // transaction so that we do not hold on to locks acquired - // by the check - if table.is_cancelled(&mut self.conn)? { - return Ok(Status::Cancelled); + // Run as many copy jobs as we can in parallel, up to `self.workers` + // many. We can always start at least one worker because of the + // connection in `self.conn`. If the fdw pool has idle connections + // and there are more tables to be copied, we can start more + // workers, up to `self.workers` many + // + // The loop has to be very careful about terminating early so that + // we do not ever leave the loop with `self.conn == None` + let mut workers = Workers::new(); + while !state.unfinished.is_empty() || workers.has_work() { + // We usually add at least one job here, except if we are out of + // tables to copy. In that case, we go through the `while` loop + // every time one of the tables we are currently copying + // finishes + if let Some(worker) = self.default_worker(&mut state, &progress) { + workers.add(worker); + } + loop { + if workers.len() >= self.workers { + break; } + let Some(worker) = self.extra_worker(&mut state, &progress) else { + break; + }; + workers.add(worker); + } - // Pause copying if replication is lagging behind to avoid - // overloading replicas - let mut lag = catalog::replication_lag(&mut self.conn)?; - if lag > MAX_REPLICATION_LAG { - loop { - info!(&self.logger, - "Replicas are lagging too much; pausing copying for {}s to allow them to catch up", - REPLICATION_SLEEP.as_secs(); - "lag_s" => lag.as_secs()); - std::thread::sleep(REPLICATION_SLEEP); - lag = catalog::replication_lag(&mut self.conn)?; - if lag <= ACCEPTABLE_REPLICATION_LAG { - break; + self.assert_progress(workers.len(), &state)?; + let result = workers.select().await; + + // Analyze `result` and take another trip through the loop if + // everything is ok; wait for pending workers and return if + // there was an error or if copying was cancelled. + use WorkerResult as W; + match result { + W::Err(e) => { + // This is a panic in the background task. We need to + // cancel all other tasks and return the error + error!(self.logger, "copy worker panicked: {}", e); + self.cancel_workers(progress, workers).await; + return Err(e); + } + W::Ok(worker) => { + // Put the connection back into self.conn so that we can use it + // in the next iteration. + worker.conn.extract(&mut self.conn); + + match (worker.result, progress.is_cancelled()) { + (Ok(Status::Finished), false) => { + // The worker finished successfully, and nothing was + // cancelled; take another trip through the loop + state.finished.push(worker.table); + } + (Ok(Status::Finished), true) => { + state.finished.push(worker.table); + self.cancel_workers(progress, workers).await; + return Ok(Status::Cancelled); + } + (Ok(Status::Cancelled), _) => { + self.cancel_workers(progress, workers).await; + return Ok(Status::Cancelled); + } + (Err(e), _) => { + error!(self.logger, "copy worker had an error: {}", e); + self.cancel_workers(progress, workers).await; + return Err(e); } } } - - let status = self.transaction(|conn| table.copy_batch(conn))?; - if status == Status::Cancelled { - return Ok(status); + W::Wake => { + // nothing to do, just try to create more workers by + // going through the loop again } - progress.update(&table.dst.object, &table.batcher); - } - progress.table_finished(&table.batcher); + }; } + debug_assert!(self.conn.is_some()); // Create indexes for all the attributes that were postponed at the start of // the copy/graft operations. // First recreate the indexes that existed in the original subgraph. - let conn = self.conn.deref_mut(); - for table in state.tables.iter() { + for table in state.all_tables() { let arr = index_list.indexes_for_table( &self.dst.site.namespace, &table.src.name.to_string(), &table.dst, true, + false, true, )?; for (_, sql) in arr { let query = sql_query(format!("{};", sql)); - query.execute(conn)?; + self.transaction(|conn| query.execute(conn).map_err(StoreError::from))?; } } // Second create the indexes for the new fields. // Here we need to skip those created in the first step for the old fields. - for table in state.tables.iter() { + for table in state.all_tables() { let orig_colums = table .src .columns .iter() .map(|c| c.name.to_string()) .collect_vec(); - for sql in table.dst.create_postponed_indexes(orig_colums).into_iter() { + for sql in table + .dst + .create_postponed_indexes(orig_colums, false) + .into_iter() + { let query = sql_query(sql); - query.execute(conn)?; + self.transaction(|conn| query.execute(conn).map_err(StoreError::from))?; } } @@ -764,7 +1250,7 @@ impl Connection { /// lower(v1.block_range) => v2.vid > v1.vid` and we can therefore stop /// the copying of each table as soon as we hit `max_vid = max { v.vid | /// lower(v.block_range) <= target_block.number }`. - pub fn copy_data(&mut self, index_list: IndexList) -> Result { + pub async fn copy_data(mut self, index_list: IndexList) -> Result { // We require sole access to the destination site, and that we get a // consistent view of what has been copied so far. In general, that // is always true. It can happen though that this function runs when @@ -777,9 +1263,31 @@ impl Connection { &self.logger, "Obtaining copy lock (this might take a long time if another process is still copying)" ); - advisory_lock::lock_copying(&mut self.conn, self.dst.site.as_ref())?; - let res = self.copy_data_internal(index_list); - advisory_lock::unlock_copying(&mut self.conn, self.dst.site.as_ref())?; + + let dst_site = self.dst.site.cheap_clone(); + let Some(conn) = self.conn.as_mut() else { + return Err(internal_error!("copy connection went missing (copy_data)")); + }; + conn.lock(&self.logger, &dst_site)?; + + let res = self.copy_data_internal(index_list).await; + + match self.conn.as_mut() { + None => { + // A background worker panicked and left us without our + // dedicated connection; we would need to get that + // connection to unlock the advisory lock. We can't do that, + // so we just log an error + warn!( + self.logger, + "can't unlock copy lock since the default worker panicked; lock will linger until session ends" + ); + } + Some(conn) => { + conn.unlock(&self.logger, &dst_site)?; + } + } + if matches!(res, Ok(Status::Cancelled)) { warn!(&self.logger, "Copying was cancelled and is incomplete"); } diff --git a/store/postgres/src/deployment.rs b/store/postgres/src/deployment.rs index 92181ac5a6c..49f42ba07b9 100644 --- a/store/postgres/src/deployment.rs +++ b/store/postgres/src/deployment.rs @@ -40,9 +40,9 @@ use stable_hash_legacy::crypto::SetHasher; use std::{collections::BTreeSet, convert::TryFrom, ops::Bound, time::Duration}; use std::{str::FromStr, sync::Arc}; -use crate::connection_pool::ForeignServer; +use crate::ForeignServer; use crate::{block_range::BLOCK_RANGE_COLUMN, primary::Site}; -use graph::constraint_violation; +use graph::internal_error; #[derive(DbEnum, Debug, Clone, Copy)] #[PgType = "text"] @@ -92,7 +92,7 @@ impl TryFrom> for OnSync { None => Ok(OnSync::None), Some("activate") => Ok(OnSync::Activate), Some("replace") => Ok(OnSync::Replace), - _ => Err(constraint_violation!("illegal value for on_sync: {value}")), + _ => Err(internal_error!("illegal value for on_sync: {value}")), } } } @@ -466,7 +466,7 @@ pub fn transact_block( ))), // More than one matching row was found. - _ => Err(StoreError::ConstraintViolation( + _ => Err(StoreError::InternalError( "duplicate deployments in shard".to_owned(), )), } @@ -515,7 +515,7 @@ pub fn forward_block_ptr( }, // More than one matching row was found. - _ => Err(StoreError::ConstraintViolation( + _ => Err(StoreError::InternalError( "duplicate deployments in shard".to_owned(), )), } @@ -546,10 +546,14 @@ pub fn revert_block_ptr( // Work around a Diesel issue with serializing BigDecimals to numeric let number = format!("{}::numeric", ptr.number); + // Intention is to revert to a block lower than the reorg threshold, on the other + // hand the earliest we can possibly go is genesys block, so go to genesys even + // if it's within the reorg threshold. + let earliest_block = i32::max(ptr.number - ENV_VARS.reorg_threshold(), 0); let affected_rows = update( d::table .filter(d::deployment.eq(id.as_str())) - .filter(d::earliest_block_number.le(ptr.number - ENV_VARS.reorg_threshold)), + .filter(d::earliest_block_number.le(earliest_block)), ) .set(( d::latest_ethereum_block_number.eq(sql(&number)), @@ -608,7 +612,7 @@ pub fn initialize_block_ptr(conn: &mut PgConnection, site: &Site) -> Result<(), .select(d::latest_ethereum_block_hash) .first::>>(conn) .map_err(|e| { - constraint_violation!( + internal_error!( "deployment sgd{} must have been created before calling initialize_block_ptr but we got {}", site.id, e ) @@ -641,10 +645,10 @@ pub fn initialize_block_ptr(conn: &mut PgConnection, site: &Site) -> Result<(), fn convert_to_u32(number: Option, field: &str, subgraph: &str) -> Result { number - .ok_or_else(|| constraint_violation!("missing {} for subgraph `{}`", field, subgraph)) + .ok_or_else(|| internal_error!("missing {} for subgraph `{}`", field, subgraph)) .and_then(|number| { u32::try_from(number).map_err(|_| { - constraint_violation!( + internal_error!( "invalid value {:?} for {} in subgraph {}", number, field, @@ -1326,7 +1330,7 @@ pub fn set_on_sync( match n { 0 => Err(StoreError::DeploymentNotFound(site.to_string())), 1 => Ok(()), - _ => Err(constraint_violation!( + _ => Err(internal_error!( "multiple manifests for deployment {}", site.to_string() )), diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index b148129d924..1cb569730a0 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -37,8 +37,8 @@ use std::time::{Duration, Instant}; use graph::components::store::EntityCollection; use graph::components::subgraph::{ProofOfIndexingFinisher, ProofOfIndexingVersion}; -use graph::constraint_violation; use graph::data::subgraph::schema::{DeploymentCreate, SubgraphError}; +use graph::internal_error; use graph::prelude::{ anyhow, debug, info, o, warn, web3, AttributeNames, BlockNumber, BlockPtr, CheapClone, DeploymentHash, DeploymentState, Entity, EntityQuery, Error, Logger, QueryExecutionError, @@ -51,12 +51,12 @@ use crate::block_range::{BLOCK_COLUMN, BLOCK_RANGE_COLUMN}; use crate::deployment::{self, OnSync}; use crate::detail::ErrorDetail; use crate::dynds::DataSourcesTable; -use crate::primary::DeploymentId; +use crate::primary::{DeploymentId, Primary}; use crate::relational::index::{CreateIndex, IndexList, Method}; -use crate::relational::{Layout, LayoutCache, SqlName, Table}; +use crate::relational::{self, Layout, LayoutCache, SqlName, Table}; use crate::relational_queries::FromEntityData; use crate::{advisory_lock, catalog, retry}; -use crate::{connection_pool::ConnectionPool, detail}; +use crate::{detail, ConnectionPool}; use crate::{dynds, primary::Site}; /// When connected to read replicas, this allows choosing which DB server to use for an operation. @@ -93,6 +93,8 @@ type PruneHandle = JoinHandle>; pub struct StoreInner { logger: Logger, + primary: Primary, + pool: ConnectionPool, read_only_pools: Vec, @@ -130,6 +132,7 @@ impl Deref for DeploymentStore { impl DeploymentStore { pub fn new( logger: &Logger, + primary: Primary, pool: ConnectionPool, read_only_pools: Vec, mut pool_weights: Vec, @@ -160,6 +163,7 @@ impl DeploymentStore { // Create the store let store = StoreInner { logger: logger.clone(), + primary, pool, read_only_pools, replica_order, @@ -415,7 +419,7 @@ impl DeploymentStore { Ok(conn) } - pub(crate) async fn query_permit(&self, replica: ReplicaId) -> Result { + pub(crate) async fn query_permit(&self, replica: ReplicaId) -> QueryPermit { let pool = match replica { ReplicaId::Main => &self.pool, ReplicaId::ReadOnly(idx) => &self.read_only_pools[idx], @@ -423,7 +427,7 @@ impl DeploymentStore { pool.query_permit().await } - pub(crate) fn wait_stats(&self, replica: ReplicaId) -> Result { + pub(crate) fn wait_stats(&self, replica: ReplicaId) -> PoolWaitStats { match replica { ReplicaId::Main => self.pool.wait_stats(), ReplicaId::ReadOnly(idx) => self.read_only_pools[idx].wait_stats(), @@ -802,7 +806,7 @@ impl DeploymentStore { reorg_threshold: BlockNumber, ) -> Result<(), StoreError> { if history_blocks <= reorg_threshold { - return Err(constraint_violation!( + return Err(internal_error!( "the amount of history to keep for sgd{} can not be set to \ {history_blocks} since it must be more than the \ reorg threshold {reorg_threshold}", @@ -872,6 +876,19 @@ impl DeploymentStore { }) .await } + + pub(crate) async fn prune_viewer( + self: &Arc, + site: Arc, + ) -> Result { + let store = self.cheap_clone(); + let layout = self + .pool + .with_conn(move |conn, _| store.layout(conn, site.clone()).map_err(|e| e.into())) + .await?; + + Ok(relational::prune::Viewer::new(self.pool.clone(), layout)) + } } /// Methods that back the trait `WritableStore`, but have small variations in their signatures @@ -900,20 +917,12 @@ impl DeploymentStore { .await } - pub(crate) fn block_time( - &self, - site: Arc, - block: BlockNumber, - ) -> Result, StoreError> { + pub(crate) fn block_time(&self, site: Arc) -> Result, StoreError> { let store = self.cheap_clone(); let mut conn = self.get_conn()?; let layout = store.layout(&mut conn, site.cheap_clone())?; - if ENV_VARS.store.last_rollup_from_poi { - layout.block_time(&mut conn, block) - } else { - layout.last_rollup(&mut conn) - } + layout.last_rollup(&mut conn) } pub(crate) async fn get_proof_of_indexing( @@ -1212,9 +1221,7 @@ impl DeploymentStore { Some(Ok(Ok(()))) => Ok(false), Some(Ok(Err(err))) => Err(StoreError::PruneFailure(err.to_string())), Some(Err(join_err)) => Err(StoreError::PruneFailure(join_err.to_string())), - None => Err(constraint_violation!( - "prune handle is finished but not ready" - )), + None => Err(internal_error!("prune handle is finished but not ready")), } } Some(false) => { @@ -1234,6 +1241,15 @@ impl DeploymentStore { site: Arc, req: PruneRequest, ) -> Result<(), StoreError> { + { + if store.is_source(&site)? { + debug!( + logger, + "Skipping pruning since this deployment is being copied" + ); + return Ok(()); + } + } let logger2 = logger.cheap_clone(); retry::forever_async(&logger2, "prune", move || { let store = store.cheap_clone(); @@ -1248,7 +1264,7 @@ impl DeploymentStore { let req = PruneRequest::new( &site.as_ref().into(), history_blocks, - ENV_VARS.reorg_threshold, + ENV_VARS.reorg_threshold(), earliest_block, latest_block, )?; @@ -1319,7 +1335,7 @@ impl DeploymentStore { // Sanity check on block numbers let from_number = block_ptr_from.map(|ptr| ptr.number); if from_number <= Some(block_ptr_to.number) { - constraint_violation!( + internal_error!( "truncate must go backwards, but would go from block {} to block {}", from_number.unwrap_or(0), block_ptr_to.number @@ -1345,7 +1361,7 @@ impl DeploymentStore { // Sanity check on block numbers let from_number = block_ptr_from.map(|ptr| ptr.number); if from_number <= Some(block_ptr_to.number) { - constraint_violation!( + internal_error!( "rewind must go backwards, but would go from block {} to block {}", from_number.unwrap_or(0), block_ptr_to.number @@ -1382,7 +1398,7 @@ impl DeploymentStore { let info = self.subgraph_info_with_conn(&mut conn, site.cheap_clone())?; if let Some(graft_block) = info.graft_block { if graft_block > block_ptr_to.number { - return Err(constraint_violation!( + return Err(internal_error!( "Can not revert subgraph `{}` to block {} as it was \ grafted at block {} and reverting past a graft point \ is not possible", @@ -1481,7 +1497,7 @@ impl DeploymentStore { /// to the graph point, so that calling this needlessly with `Some(..)` /// will remove any progress that might have been made since the last /// time the deployment was started. - pub(crate) fn start_subgraph( + pub(crate) async fn start_subgraph( &self, logger: &Logger, site: Arc, @@ -1509,8 +1525,9 @@ impl DeploymentStore { // as adding new tables in `self`; we only need to check that tables // that actually need to be copied from the source are compatible // with the corresponding tables in `self` - let mut copy_conn = crate::copy::Connection::new( + let copy_conn = crate::copy::Connection::new( logger, + self.primary.cheap_clone(), self.pool.clone(), src.clone(), dst.clone(), @@ -1518,7 +1535,7 @@ impl DeploymentStore { src_manifest_idx_and_name, dst_manifest_idx_and_name, )?; - let status = copy_conn.copy_data(index_list)?; + let status = copy_conn.copy_data(index_list).await?; if status == crate::copy::Status::Cancelled { return Err(StoreError::Canceled); } @@ -1543,6 +1560,12 @@ impl DeploymentStore { catalog::copy_account_like(conn, &src.site, &dst.site)?; + // Analyze all tables for this deployment + info!(logger, "Analyzing all {} tables", dst.tables.len()); + for entity_name in dst.tables.keys() { + self.analyze_with_conn(site.cheap_clone(), entity_name.as_str(), conn)?; + } + // Rewind the subgraph so that entity versions that are // clamped in the future (beyond `block`) become valid for // all blocks after `block`. `revert_block` gets rid of @@ -1553,6 +1576,7 @@ impl DeploymentStore { .number .checked_add(1) .expect("block numbers fit into an i32"); + info!(logger, "Rewinding to block {}", block.number); let count = dst.revert_block(conn, block_to_revert)?; deployment::update_entity_count(conn, &dst.site, count)?; @@ -1565,11 +1589,6 @@ impl DeploymentStore { src_deployment.manifest.history_blocks, )?; - // Analyze all tables for this deployment - for entity_name in dst.tables.keys() { - self.analyze_with_conn(site.cheap_clone(), entity_name.as_str(), conn)?; - } - // The `earliest_block` for `src` might have changed while // we did the copy if `src` was pruned while we copied; // adjusting it very late in the copy process ensures that @@ -1837,6 +1856,10 @@ impl DeploymentStore { }) .await } + + fn is_source(&self, site: &Site) -> Result { + self.primary.is_source(site) + } } /// Tries to fetch a [`Table`] either by its Entity name or its SQL name. diff --git a/store/postgres/src/detail.rs b/store/postgres/src/detail.rs index 807e238f4fe..168af5b5d51 100644 --- a/store/postgres/src/detail.rs +++ b/store/postgres/src/detail.rs @@ -17,7 +17,7 @@ use graph::prelude::{ BigDecimal, BlockPtr, DeploymentHash, StoreError, SubgraphDeploymentEntity, }; use graph::schema::InputSchema; -use graph::{constraint_violation, data::subgraph::status, prelude::web3::types::H256}; +use graph::{data::subgraph::status, internal_error, prelude::web3::types::H256}; use itertools::Itertools; use std::collections::HashMap; use std::convert::TryFrom; @@ -134,7 +134,7 @@ impl TryFrom for SubgraphError { _ => None, }; let subgraph_id = DeploymentHash::new(subgraph_id).map_err(|id| { - StoreError::ConstraintViolation(format!("invalid subgraph id `{}` in fatal error", id)) + StoreError::InternalError(format!("invalid subgraph id `{}` in fatal error", id)) })?; Ok(SubgraphError { subgraph_id, @@ -155,7 +155,7 @@ pub(crate) fn block( match (hash, number) { (Some(hash), Some(number)) => { let number = number.to_i32().ok_or_else(|| { - constraint_violation!( + internal_error!( "the block number {} for {} in {} is not representable as an i32", number, name, @@ -168,7 +168,7 @@ pub(crate) fn block( ))) } (None, None) => Ok(None), - (hash, number) => Err(constraint_violation!( + (hash, number) => Err(internal_error!( "the hash and number \ of a block pointer must either both be null or both have a \ value, but for `{}` the hash of {} is `{:?}` and the number is `{:?}`", @@ -208,7 +208,7 @@ pub(crate) fn info_from_details( let site = sites .iter() .find(|site| site.deployment.as_str() == deployment) - .ok_or_else(|| constraint_violation!("missing site for subgraph `{}`", deployment))?; + .ok_or_else(|| internal_error!("missing site for subgraph `{}`", deployment))?; // This needs to be filled in later since it lives in a // different shard @@ -227,7 +227,7 @@ pub(crate) fn info_from_details( latest_block, }; let entity_count = entity_count.to_u64().ok_or_else(|| { - constraint_violation!( + internal_error!( "the entityCount for {} is not representable as a u64", deployment ) @@ -438,13 +438,13 @@ impl StoredDeploymentEntity { .graft_base .map(DeploymentHash::new) .transpose() - .map_err(|b| constraint_violation!("invalid graft base `{}`", b))?; + .map_err(|b| internal_error!("invalid graft base `{}`", b))?; let debug_fork = detail .debug_fork .map(DeploymentHash::new) .transpose() - .map_err(|b| constraint_violation!("invalid debug fork `{}`", b))?; + .map_err(|b| internal_error!("invalid debug fork `{}`", b))?; Ok(SubgraphDeploymentEntity { manifest: manifest.as_manifest(schema), diff --git a/store/postgres/src/dynds/mod.rs b/store/postgres/src/dynds/mod.rs index 09385fb8a7d..27ab4e78a10 100644 --- a/store/postgres/src/dynds/mod.rs +++ b/store/postgres/src/dynds/mod.rs @@ -7,8 +7,8 @@ use crate::primary::Site; use diesel::PgConnection; use graph::{ components::store::{write, StoredDynamicDataSource}, - constraint_violation, data_source::CausalityRegion, + internal_error, prelude::{BlockNumber, StoreError}, }; @@ -60,7 +60,7 @@ pub(crate) fn update_offchain_status( true => { DataSourcesTable::new(site.namespace.clone()).update_offchain_status(conn, data_sources) } - false => Err(constraint_violation!( + false => Err(internal_error!( "shared schema does not support data source offchain_found", )), } diff --git a/store/postgres/src/dynds/private.rs b/store/postgres/src/dynds/private.rs index e8e7f4ce992..d4d21ad39c1 100644 --- a/store/postgres/src/dynds/private.rs +++ b/store/postgres/src/dynds/private.rs @@ -1,8 +1,9 @@ -use std::ops::Bound; +use std::{collections::HashMap, i32, ops::Bound}; use diesel::{ - pg::sql_types, + pg::{sql_types, Pg}, prelude::*, + query_builder::{AstPass, QueryFragment, QueryId}, sql_query, sql_types::{Binary, Bool, Integer, Jsonb, Nullable}, PgConnection, QueryDsl, RunQueryDsl, @@ -11,12 +12,12 @@ use diesel::{ use graph::{ anyhow::{anyhow, Context}, components::store::{write, StoredDynamicDataSource}, - constraint_violation, data_source::CausalityRegion, + internal_error, prelude::{serde_json, BlockNumber, StoreError}, }; -use crate::primary::Namespace; +use crate::{primary::Namespace, relational_queries::POSTGRES_MAX_PARAMETERS}; type DynTable = diesel_dynamic_schema::Table; type DynColumn = diesel_dynamic_schema::Column; @@ -163,7 +164,7 @@ impl DataSourcesTable { // Nested offchain data sources might not pass this check, as their `creation_block` // will be their parent's `creation_block`, not necessarily `block`. if causality_region == &CausalityRegion::ONCHAIN && creation_block != &Some(block) { - return Err(constraint_violation!( + return Err(internal_error!( "mismatching creation blocks `{:?}` and `{}`", creation_block, block @@ -226,16 +227,12 @@ impl DataSourcesTable { return Ok(count as usize); } - type Tuple = ( - (Bound, Bound), - i32, - Option>, - Option, - i32, - Option, - ); + let manifest_map = + ManifestIdxMap::new(src_manifest_idx_and_name, dst_manifest_idx_and_name); - let src_tuples = self + // Load all data sources that were created up to and including + // `target_block` and transform them ready for insertion + let dss: Vec<_> = self .table .clone() .filter( @@ -250,55 +247,18 @@ impl DataSourcesTable { &self.done_at, )) .order_by(&self.vid) - .load::(conn)?; + .load::(conn)? + .into_iter() + .map(|ds| ds.src_to_dst(target_block, &manifest_map, &self.namespace, &dst.namespace)) + .collect::>()?; + // Split all dss into chunks so that we never use more than + // `POSTGRES_MAX_PARAMETERS` bind variables per chunk + let chunk_size = POSTGRES_MAX_PARAMETERS / CopyDsQuery::BIND_PARAMS; let mut count = 0; - for (block_range, src_manifest_idx, param, context, causality_region, done_at) in src_tuples - { - let name = &src_manifest_idx_and_name - .iter() - .find(|(idx, _)| idx == &src_manifest_idx) - .with_context(|| { - anyhow!( - "the source {} does not have a template with index {}", - self.namespace, - src_manifest_idx - ) - })? - .1; - let dst_manifest_idx = dst_manifest_idx_and_name - .iter() - .find(|(_, n)| n == name) - .with_context(|| { - anyhow!( - "the destination {} is missing a template with name {}. The source {} created one at block {:?}", - dst.namespace, - name, self.namespace, block_range.0 - ) - })? - .0; - - let query = format!( - "\ - insert into {dst}(block_range, manifest_idx, param, context, causality_region, done_at) - values(case - when upper($2) <= $1 then $2 - else int4range(lower($2), null) - end, - $3, $4, $5, $6, $7) - ", - dst = dst.qname - ); - - count += sql_query(query) - .bind::(target_block) - .bind::, _>(block_range) - .bind::(dst_manifest_idx) - .bind::, _>(param) - .bind::, _>(context) - .bind::(causality_region) - .bind::, _>(done_at) - .execute(conn)?; + for chunk in dss.chunks(chunk_size) { + let query = CopyDsQuery::new(dst, chunk)?; + count += query.execute(conn)?; } // If the manifest idxes remained constant, we can test that both tables have the same @@ -333,7 +293,7 @@ impl DataSourcesTable { .execute(conn)?; if count > 1 { - return Err(constraint_violation!( + return Err(internal_error!( "expected to remove at most one offchain data source but would remove {}, causality region: {}", count, ds.causality_region @@ -361,3 +321,141 @@ impl DataSourcesTable { .optional()?) } } + +/// Map src manifest indexes to dst manifest indexes. If the +/// destination is missing an entry, put `None` as the value for the +/// source index +struct ManifestIdxMap { + map: HashMap, String)>, +} + +impl ManifestIdxMap { + fn new(src: &[(i32, String)], dst: &[(i32, String)]) -> Self { + let dst_idx_map: HashMap<&String, i32> = + HashMap::from_iter(dst.iter().map(|(idx, name)| (name, *idx))); + let map = src + .iter() + .map(|(src_idx, src_name)| { + ( + *src_idx, + (dst_idx_map.get(src_name).copied(), src_name.to_string()), + ) + }) + .collect(); + ManifestIdxMap { map } + } + + fn dst_idx( + &self, + src_idx: i32, + src_nsp: &Namespace, + src_created: BlockNumber, + dst_nsp: &Namespace, + ) -> Result { + let (dst_idx, name) = self.map.get(&src_idx).with_context(|| { + anyhow!( + "the source {src_nsp} does not have a template with \ + index {src_idx} but created one at block {src_created}" + ) + })?; + let dst_idx = dst_idx.with_context(|| { + anyhow!( + "the destination {dst_nsp} is missing a template with \ + name {name}. The source {src_nsp} created one at block {src_created}" + ) + })?; + Ok(dst_idx) + } +} + +#[derive(Queryable)] +struct DsForCopy { + block_range: (Bound, Bound), + idx: i32, + param: Option>, + context: Option, + causality_region: i32, + done_at: Option, +} + +impl DsForCopy { + fn src_to_dst( + mut self, + target_block: BlockNumber, + map: &ManifestIdxMap, + src_nsp: &Namespace, + dst_nsp: &Namespace, + ) -> Result { + // unclamp block range if it ends beyond target block + match self.block_range.1 { + Bound::Included(block) if block > target_block => self.block_range.1 = Bound::Unbounded, + Bound::Excluded(block) if block - 1 > target_block => { + self.block_range.1 = Bound::Unbounded + } + _ => { /* use block range as is */ } + } + // Translate manifest index + let src_created = match self.block_range.0 { + Bound::Included(block) => block, + Bound::Excluded(block) => block + 1, + Bound::Unbounded => 0, + }; + self.idx = map.dst_idx(self.idx, src_nsp, src_created, dst_nsp)?; + Ok(self) + } +} + +struct CopyDsQuery<'a> { + dst: &'a DataSourcesTable, + dss: &'a [DsForCopy], +} + +impl<'a> CopyDsQuery<'a> { + const BIND_PARAMS: usize = 6; + + fn new(dst: &'a DataSourcesTable, dss: &'a [DsForCopy]) -> Result { + Ok(CopyDsQuery { dst, dss }) + } +} + +impl<'a> QueryFragment for CopyDsQuery<'a> { + fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> QueryResult<()> { + out.unsafe_to_cache_prepared(); + out.push_sql("insert into "); + out.push_sql(&self.dst.qname); + out.push_sql( + "(block_range, manifest_idx, param, context, causality_region, done_at) values ", + ); + let mut first = true; + for ds in self.dss.iter() { + if first { + first = false; + } else { + out.push_sql(", "); + } + out.push_sql("("); + out.push_bind_param::, _>(&ds.block_range)?; + out.push_sql(", "); + out.push_bind_param::(&ds.idx)?; + out.push_sql(", "); + out.push_bind_param::, _>(&ds.param)?; + out.push_sql(", "); + out.push_bind_param::, _>(&ds.context)?; + out.push_sql(", "); + out.push_bind_param::(&ds.causality_region)?; + out.push_sql(", "); + out.push_bind_param::, _>(&ds.done_at)?; + out.push_sql(")"); + } + + Ok(()) + } +} + +impl<'a> QueryId for CopyDsQuery<'a> { + type QueryId = (); + + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl<'a, Conn> RunQueryDsl for CopyDsQuery<'a> {} diff --git a/store/postgres/src/dynds/shared.rs b/store/postgres/src/dynds/shared.rs index 34615a720e3..7fdec556ada 100644 --- a/store/postgres/src/dynds/shared.rs +++ b/store/postgres/src/dynds/shared.rs @@ -11,14 +11,14 @@ use diesel::{insert_into, pg::PgConnection}; use graph::{ components::store::{write, StoredDynamicDataSource}, - constraint_violation, data::store::scalar::ToPrimitive, data_source::CausalityRegion, + internal_error, prelude::{serde_json, BigDecimal, BlockNumber, DeploymentHash, StoreError}, }; -use crate::connection_pool::ForeignServer; use crate::primary::Site; +use crate::ForeignServer; table! { subgraphs.dynamic_ethereum_contract_data_source (vid) { @@ -62,7 +62,7 @@ pub(super) fn load( let mut data_sources: Vec = Vec::new(); for (vid, name, context, address, creation_block) in dds.into_iter() { if address.len() != 20 { - return Err(constraint_violation!( + return Err(internal_error!( "Data source address `0x{:?}` for dynamic data source {} should be 20 bytes long but is {} bytes long", address, vid, address.len() @@ -72,7 +72,7 @@ pub(super) fn load( let manifest_idx = manifest_idx_and_name .iter() .find(|(_, manifest_name)| manifest_name == &name) - .ok_or_else(|| constraint_violation!("data source name {} not found", name))? + .ok_or_else(|| internal_error!("data source name {} not found", name))? .0; let creation_block = creation_block.to_i32(); let data_source = StoredDynamicDataSource { @@ -88,7 +88,7 @@ pub(super) fn load( }; if data_sources.last().and_then(|d| d.creation_block) > data_source.creation_block { - return Err(StoreError::ConstraintViolation( + return Err(StoreError::InternalError( "data sources not ordered by creation block".to_string(), )); } @@ -126,7 +126,7 @@ pub(super) fn insert( } = ds; if causality_region != &CausalityRegion::ONCHAIN { - return Err(constraint_violation!( + return Err(internal_error!( "using shared data source schema with file data sources" )); } @@ -134,17 +134,13 @@ pub(super) fn insert( let address = match param { Some(param) => param, None => { - return Err(constraint_violation!( - "dynamic data sources must have an address", - )); + return Err(internal_error!("dynamic data sources must have an address",)); } }; let name = manifest_idx_and_name .iter() .find(|(idx, _)| *idx == ds.manifest_idx) - .ok_or_else(|| { - constraint_violation!("manifest idx {} not found", ds.manifest_idx) - })? + .ok_or_else(|| internal_error!("manifest idx {} not found", ds.manifest_idx))? .1 .clone(); Ok(( diff --git a/store/postgres/src/fork.rs b/store/postgres/src/fork.rs index 1a8e7a7c4ec..40457fb1739 100644 --- a/store/postgres/src/fork.rs +++ b/store/postgres/src/fork.rs @@ -7,10 +7,10 @@ use std::{ use graph::{ block_on, components::store::SubgraphFork as SubgraphForkTrait, - constraint_violation, + internal_error, prelude::{ - info, r::Value as RValue, reqwest, serde_json, DeploymentHash, Entity, Logger, Serialize, - StoreError, Value, ValueType, + anyhow, info, r::Value as RValue, reqwest, serde_json, DeploymentHash, Entity, Logger, + Serialize, StoreError, Value, ValueType, }, schema::Field, url::Url, @@ -69,9 +69,7 @@ impl SubgraphForkTrait for SubgraphFork { let entity_type = self.schema.entity_type(&entity_type_name)?; let fields = &entity_type .object_type() - .map_err(|_| { - constraint_violation!("no object type called `{}` found", entity_type_name) - })? + .map_err(|_| internal_error!("no object type called `{}` found", entity_type_name))? .fields; let query = Query { @@ -211,11 +209,9 @@ query Query ($id: String) {{ map }; - Ok(Some( - schema - .make_entity(map) - .map_err(|e| StoreError::EntityValidationError(e))?, - )) + Ok(Some(schema.make_entity(map).map_err(|e| { + StoreError::Unknown(anyhow!("entity validation failed: {e}")) + })?)) } } diff --git a/store/postgres/src/graphman/mod.rs b/store/postgres/src/graphman/mod.rs index c9aba751f50..4f538cd6e23 100644 --- a/store/postgres/src/graphman/mod.rs +++ b/store/postgres/src/graphman/mod.rs @@ -6,7 +6,7 @@ use graphman_store::Execution; use graphman_store::ExecutionId; use graphman_store::ExecutionStatus; -use crate::connection_pool::ConnectionPool; +use crate::ConnectionPool; mod schema; diff --git a/store/postgres/src/jobs.rs b/store/postgres/src/jobs.rs index 17d2d279ce3..a150598427e 100644 --- a/store/postgres/src/jobs.rs +++ b/store/postgres/src/jobs.rs @@ -10,7 +10,7 @@ use graph::prelude::{error, Logger, MetricsRegistry, StoreError, ENV_VARS}; use graph::prometheus::Gauge; use graph::util::jobs::{Job, Runner}; -use crate::connection_pool::ConnectionPool; +use crate::ConnectionPool; use crate::{unused, Store, SubgraphStore}; pub fn register( diff --git a/store/postgres/src/lib.rs b/store/postgres/src/lib.rs index 759e8601313..baf4d523ed5 100644 --- a/store/postgres/src/lib.rs +++ b/store/postgres/src/lib.rs @@ -2,8 +2,6 @@ //! [Store] for the details of how the store is organized across //! different databases/shards. -#[macro_use] -extern crate derive_more; #[macro_use] extern crate diesel; #[macro_use] @@ -17,7 +15,6 @@ mod block_store; mod catalog; mod chain_head_listener; mod chain_store; -pub mod connection_pool; mod copy; mod deployment; mod deployment_store; @@ -27,6 +24,7 @@ mod fork; mod functions; mod jobs; mod notification_listener; +mod pool; mod primary; pub mod query_store; mod relational; @@ -63,6 +61,7 @@ pub use self::chain_store::{ChainStore, ChainStoreMetrics, Storage}; pub use self::detail::DeploymentDetail; pub use self::jobs::register as register_jobs; pub use self::notification_listener::NotificationSender; +pub use self::pool::{ConnectionPool, ForeignServer, PoolCoordinator, PoolRole}; pub use self::primary::{db_version, UnusedDeployment}; pub use self::store::Store; pub use self::store_events::SubscriptionManager; @@ -86,5 +85,6 @@ pub mod command_support { } pub use crate::deployment::{on_sync, OnSync}; pub use crate::primary::Namespace; + pub use crate::relational::prune::{Phase, PruneState, PruneTableState, Viewer}; pub use crate::relational::{Catalog, Column, ColumnType, Layout, SqlName}; } diff --git a/store/postgres/src/pool/coordinator.rs b/store/postgres/src/pool/coordinator.rs new file mode 100644 index 00000000000..f58a553b693 --- /dev/null +++ b/store/postgres/src/pool/coordinator.rs @@ -0,0 +1,315 @@ +use graph::cheap_clone::CheapClone; +use graph::futures03::future::join_all; +use graph::futures03::FutureExt as _; +use graph::internal_error; +use graph::prelude::MetricsRegistry; +use graph::prelude::{crit, debug, error, info, o, StoreError}; +use graph::slog::Logger; + +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; + +use crate::advisory_lock::with_migration_lock; +use crate::{Shard, PRIMARY_SHARD}; + +use super::{ConnectionPool, ForeignServer, MigrationCount, PoolInner, PoolRole, PoolState}; + +/// Helper to coordinate propagating schema changes from the database that +/// changes schema to all other shards so they can update their fdw mappings +/// of tables imported from that shard +pub struct PoolCoordinator { + logger: Logger, + pools: Mutex>, + servers: Arc>, +} + +impl PoolCoordinator { + pub fn new(logger: &Logger, servers: Arc>) -> Self { + let logger = logger.new(o!("component" => "ConnectionPool", "component" => "Coordinator")); + Self { + logger, + pools: Mutex::new(HashMap::new()), + servers, + } + } + + pub fn create_pool( + self: Arc, + logger: &Logger, + name: &str, + pool_name: PoolRole, + postgres_url: String, + pool_size: u32, + fdw_pool_size: Option, + registry: Arc, + ) -> ConnectionPool { + let is_writable = !pool_name.is_replica(); + + let pool = ConnectionPool::create( + name, + pool_name, + postgres_url, + pool_size, + fdw_pool_size, + logger, + registry, + self.cheap_clone(), + ); + + // Ignore non-writable pools (replicas), there is no need (and no + // way) to coordinate schema changes with them + if is_writable { + self.pools + .lock() + .unwrap() + .insert(pool.shard.clone(), pool.inner.cheap_clone()); + } + + pool + } + + /// Propagate changes to the schema in `shard` to all other pools. Those + /// other pools will then recreate any tables that they imported from + /// `shard`. If `pool` is a new shard, we also map all other shards into + /// it. + /// + /// This tries to take the migration lock and must therefore be run from + /// code that does _not_ hold the migration lock as it will otherwise + /// deadlock + fn propagate(&self, pool: &PoolInner, count: MigrationCount) -> Result<(), StoreError> { + // We need to remap all these servers into `pool` if the list of + // tables that are mapped have changed from the code of the previous + // version. Since dropping and recreating the foreign table + // definitions can slow the startup of other nodes down because of + // locking, we try to only do this when it is actually needed + for server in self.servers.iter() { + if pool.needs_remap(server)? { + pool.remap(server)?; + } + } + + // pool had schema changes, refresh the import from pool into all + // other shards. This makes sure that schema changes to + // already-mapped tables are propagated to all other shards. Since + // we run `propagate` after migrations have been applied to `pool`, + // we can be sure that these mappings use the correct schema + if count.had_migrations() { + let server = self.server(&pool.shard)?; + for pool in self.pools.lock().unwrap().values() { + let pool = pool.get_unready(); + let remap_res = pool.remap(server); + if let Err(e) = remap_res { + error!(pool.logger, "Failed to map imports from {}", server.shard; "error" => e.to_string()); + return Err(e); + } + } + } + Ok(()) + } + + /// Return a list of all pools, regardless of whether they are ready or + /// not. + pub fn pools(&self) -> Vec> { + self.pools + .lock() + .unwrap() + .values() + .map(|state| state.get_unready()) + .collect::>() + } + + pub fn servers(&self) -> Arc> { + self.servers.clone() + } + + fn server(&self, shard: &Shard) -> Result<&ForeignServer, StoreError> { + self.servers + .iter() + .find(|server| &server.shard == shard) + .ok_or_else(|| internal_error!("unknown shard {shard}")) + } + + fn primary(&self) -> Result, StoreError> { + let map = self.pools.lock().unwrap(); + let pool_state = map.get(&*&PRIMARY_SHARD).ok_or_else(|| { + internal_error!("internal error: primary shard not found in pool coordinator") + })?; + + Ok(pool_state.get_unready()) + } + + /// Setup all pools the coordinator knows about and return the number of + /// pools that were successfully set up. + /// + /// # Panics + /// + /// If any errors besides a database not being available happen during + /// the migration, the process panics + pub async fn setup_all(&self, logger: &Logger) -> usize { + let pools = self + .pools + .lock() + .unwrap() + .values() + .cloned() + .collect::>(); + + let res = self.setup(pools).await; + + match res { + Ok(count) => { + info!(logger, "Setup finished"; "shards" => count); + count + } + Err(e) => { + crit!(logger, "database setup failed"; "error" => format!("{e}")); + panic!("database setup failed: {}", e); + } + } + } + + /// A helper to call `setup` from a non-async context. Returns `true` if + /// the setup was actually run, i.e. if `pool` was available + pub(crate) fn setup_bg(self: Arc, pool: PoolState) -> Result { + let migrated = graph::spawn_thread("database-setup", move || { + graph::block_on(self.setup(vec![pool.clone()])) + }) + .join() + // unwrap: propagate panics + .unwrap()?; + Ok(migrated == 1) + } + + /// Setup all pools by doing the following steps: + /// 1. Get the migration lock in the primary. This makes sure that only + /// one node runs migrations + /// 2. Remove the views in `sharded` as they might interfere with + /// running migrations + /// 3. In parallel, do the following in each pool: + /// 1. Configure fdw servers + /// 2. Run migrations in all pools in parallel + /// 4. In parallel, do the following in each pool: + /// 1. Create/update the mappings in `shard__subgraphs` and in + /// `primary_public` + /// 5. Create the views in `sharded` again + /// 6. Release the migration lock + /// + /// This method tolerates databases that are not available and will + /// simply ignore them. The returned count is the number of pools that + /// were successfully set up. + /// + /// When this method returns, the entries from `states` that were + /// successfully set up will be marked as ready. The method returns the + /// number of pools that were set up + async fn setup(&self, states: Vec) -> Result { + type MigrationCounts = Vec<(PoolState, MigrationCount)>; + + /// Filter out pools that are not available. We don't want to fail + /// because one of the pools is not available. We will just ignore + /// them and continue with the others. + fn filter_unavailable( + (state, res): (PoolState, Result), + ) -> Option> { + if let Err(StoreError::DatabaseUnavailable) = res { + error!( + state.logger, + "migrations failed because database was unavailable" + ); + None + } else { + Some(res.map(|count| (state, count))) + } + } + + /// Migrate all pools in parallel + async fn migrate( + pools: &[PoolState], + servers: &[ForeignServer], + ) -> Result { + let futures = pools + .iter() + .map(|state| { + state + .get_unready() + .cheap_clone() + .migrate(servers) + .map(|res| (state.cheap_clone(), res)) + }) + .collect::>(); + join_all(futures) + .await + .into_iter() + .filter_map(filter_unavailable) + .collect::, _>>() + } + + /// Propagate the schema changes to all other pools in parallel + async fn propagate( + this: &PoolCoordinator, + migrated: MigrationCounts, + ) -> Result, StoreError> { + let futures = migrated + .into_iter() + .map(|(state, count)| async move { + let pool = state.get_unready(); + let res = this.propagate(&pool, count); + (state.cheap_clone(), res) + }) + .collect::>(); + join_all(futures) + .await + .into_iter() + .filter_map(filter_unavailable) + .map(|res| res.map(|(state, ())| state)) + .collect::, _>>() + } + + let primary = self.primary()?; + + let mut pconn = primary.get().map_err(|_| StoreError::DatabaseUnavailable)?; + + let states: Vec<_> = states + .into_iter() + .filter(|pool| pool.needs_setup()) + .collect(); + if states.is_empty() { + return Ok(0); + } + + // Everything here happens under the migration lock. Anything called + // from here should not try to get that lock, otherwise the process + // will deadlock + debug!(self.logger, "Waiting for migration lock"); + let res = with_migration_lock(&mut pconn, |_| async { + debug!(self.logger, "Migration lock acquired"); + + // While we were waiting for the migration lock, another thread + // might have already run this + let states: Vec<_> = states + .into_iter() + .filter(|pool| pool.needs_setup()) + .collect(); + if states.is_empty() { + debug!(self.logger, "No pools to set up"); + return Ok(0); + } + + primary.drop_cross_shard_views()?; + + let migrated = migrate(&states, self.servers.as_ref()).await?; + + let propagated = propagate(&self, migrated).await?; + + primary.create_cross_shard_views(&self.servers)?; + + for state in &propagated { + state.set_ready(); + } + Ok(propagated.len()) + }) + .await; + debug!(self.logger, "Database setup finished"); + + res + } +} diff --git a/store/postgres/src/pool/foreign_server.rs b/store/postgres/src/pool/foreign_server.rs new file mode 100644 index 00000000000..3f8daf64b54 --- /dev/null +++ b/store/postgres/src/pool/foreign_server.rs @@ -0,0 +1,237 @@ +use diesel::{connection::SimpleConnection, pg::PgConnection}; + +use graph::{ + prelude::{ + anyhow::{self, anyhow, bail}, + StoreError, ENV_VARS, + }, + util::security::SafeDisplay, +}; + +use std::fmt::Write; + +use postgres::config::{Config, Host}; + +use crate::catalog; +use crate::primary::NAMESPACE_PUBLIC; +use crate::{Shard, PRIMARY_SHARD}; + +use super::{PRIMARY_PUBLIC, PRIMARY_TABLES, SHARDED_TABLES}; + +pub struct ForeignServer { + pub name: String, + pub shard: Shard, + pub user: String, + pub password: String, + pub host: String, + pub port: u16, + pub dbname: String, +} + +impl ForeignServer { + /// The name of the foreign server under which data for `shard` is + /// accessible + pub fn name(shard: &Shard) -> String { + format!("shard_{}", shard.as_str()) + } + + /// The name of the schema under which the `subgraphs` schema for + /// `shard` is accessible in shards that are not `shard`. In most cases + /// you actually want to use `metadata_schema_in` + pub fn metadata_schema(shard: &Shard) -> String { + format!("{}_subgraphs", Self::name(shard)) + } + + /// The name of the schema under which the `subgraphs` schema for + /// `shard` is accessible in the shard `current`. It is permissible for + /// `shard` and `current` to be the same. + pub fn metadata_schema_in(shard: &Shard, current: &Shard) -> String { + if shard == current { + "subgraphs".to_string() + } else { + Self::metadata_schema(&shard) + } + } + + pub fn new_from_raw(shard: String, postgres_url: &str) -> Result { + Self::new(Shard::new(shard)?, postgres_url) + } + + pub fn new(shard: Shard, postgres_url: &str) -> Result { + let config: Config = match postgres_url.parse() { + Ok(config) => config, + Err(e) => panic!( + "failed to parse Postgres connection string `{}`: {}", + SafeDisplay(postgres_url), + e + ), + }; + + let host = match config.get_hosts().get(0) { + Some(Host::Tcp(host)) => host.to_string(), + _ => bail!("can not find host name in `{}`", SafeDisplay(postgres_url)), + }; + + let user = config + .get_user() + .ok_or_else(|| anyhow!("could not find user in `{}`", SafeDisplay(postgres_url)))? + .to_string(); + let password = String::from_utf8( + config + .get_password() + .ok_or_else(|| { + anyhow!( + "could not find password in `{}`; you must provide one.", + SafeDisplay(postgres_url) + ) + })? + .into(), + )?; + let port = config.get_ports().first().cloned().unwrap_or(5432u16); + let dbname = config + .get_dbname() + .map(|s| s.to_string()) + .ok_or_else(|| anyhow!("could not find user in `{}`", SafeDisplay(postgres_url)))?; + + Ok(Self { + name: Self::name(&shard), + shard, + user, + password, + host, + port, + dbname, + }) + } + + /// Create a new foreign server and user mapping on `conn` for this foreign + /// server + pub(super) fn create(&self, conn: &mut PgConnection) -> Result<(), StoreError> { + let query = format!( + "\ + create server \"{name}\" + foreign data wrapper postgres_fdw + options (host '{remote_host}', \ + port '{remote_port}', \ + dbname '{remote_db}', \ + fetch_size '{fetch_size}', \ + updatable 'false'); + create user mapping + for current_user server \"{name}\" + options (user '{remote_user}', password '{remote_password}');", + name = self.name, + remote_host = self.host, + remote_port = self.port, + remote_db = self.dbname, + remote_user = self.user, + remote_password = self.password, + fetch_size = ENV_VARS.store.fdw_fetch_size, + ); + Ok(conn.batch_execute(&query)?) + } + + /// Update an existing user mapping with possibly new details + pub(super) fn update(&self, conn: &mut PgConnection) -> Result<(), StoreError> { + let options = catalog::server_options(conn, &self.name)?; + let set_or_add = |option: &str| -> &'static str { + if options.contains_key(option) { + "set" + } else { + "add" + } + }; + + let query = format!( + "\ + alter server \"{name}\" + options (set host '{remote_host}', \ + {set_port} port '{remote_port}', \ + set dbname '{remote_db}', \ + {set_fetch_size} fetch_size '{fetch_size}'); + alter user mapping + for current_user server \"{name}\" + options (set user '{remote_user}', set password '{remote_password}');", + name = self.name, + remote_host = self.host, + set_port = set_or_add("port"), + set_fetch_size = set_or_add("fetch_size"), + remote_port = self.port, + remote_db = self.dbname, + remote_user = self.user, + remote_password = self.password, + fetch_size = ENV_VARS.store.fdw_fetch_size, + ); + Ok(conn.batch_execute(&query)?) + } + + /// Map key tables from the primary into our local schema. If we are the + /// primary, set them up as views. + pub(super) fn map_primary(conn: &mut PgConnection, shard: &Shard) -> Result<(), StoreError> { + catalog::recreate_schema(conn, PRIMARY_PUBLIC)?; + + let mut query = String::new(); + for table_name in PRIMARY_TABLES { + let create_stmt = if shard == &*PRIMARY_SHARD { + format!( + "create view {nsp}.{table_name} as select * from public.{table_name};", + nsp = PRIMARY_PUBLIC, + table_name = table_name + ) + } else { + catalog::create_foreign_table( + conn, + NAMESPACE_PUBLIC, + table_name, + PRIMARY_PUBLIC, + Self::name(&PRIMARY_SHARD).as_str(), + )? + }; + write!(query, "{}", create_stmt)?; + } + conn.batch_execute(&query)?; + Ok(()) + } + + /// Map the `subgraphs` schema from the foreign server `self` into the + /// database accessible through `conn` + pub(super) fn map_metadata(&self, conn: &mut PgConnection) -> Result<(), StoreError> { + let nsp = Self::metadata_schema(&self.shard); + catalog::recreate_schema(conn, &nsp)?; + let mut query = String::new(); + for (src_nsp, src_tables) in SHARDED_TABLES { + for src_table in src_tables { + let create_stmt = + catalog::create_foreign_table(conn, src_nsp, src_table, &nsp, &self.name)?; + write!(query, "{}", create_stmt)?; + } + } + Ok(conn.batch_execute(&query)?) + } + + pub(super) fn needs_remap(&self, conn: &mut PgConnection) -> Result { + fn different(mut existing: Vec, mut needed: Vec) -> bool { + existing.sort(); + needed.sort(); + existing != needed + } + + if &self.shard == &*PRIMARY_SHARD { + let existing = catalog::foreign_tables(conn, PRIMARY_PUBLIC)?; + let needed = PRIMARY_TABLES + .into_iter() + .map(String::from) + .collect::>(); + if different(existing, needed) { + return Ok(true); + } + } + + let existing = catalog::foreign_tables(conn, &Self::metadata_schema(&self.shard))?; + let needed = SHARDED_TABLES + .iter() + .flat_map(|(_, tables)| *tables) + .map(|table| table.to_string()) + .collect::>(); + Ok(different(existing, needed)) + } +} diff --git a/store/postgres/src/connection_pool.rs b/store/postgres/src/pool/mod.rs similarity index 51% rename from store/postgres/src/connection_pool.rs rename to store/postgres/src/pool/mod.rs index 374a1adc5ab..628a977ff9b 100644 --- a/store/postgres/src/connection_pool.rs +++ b/store/postgres/src/pool/mod.rs @@ -1,7 +1,7 @@ use diesel::r2d2::Builder; use diesel::{connection::SimpleConnection, pg::PgConnection}; use diesel::{ - r2d2::{self, event as e, ConnectionManager, HandleEvent, Pool, PooledConnection}, + r2d2::{ConnectionManager, Pool, PooledConnection}, Connection, }; use diesel::{sql_query, RunQueryDsl}; @@ -9,34 +9,48 @@ use diesel::{sql_query, RunQueryDsl}; use diesel_migrations::{EmbeddedMigrations, HarnessWithOutput}; use graph::cheap_clone::CheapClone; use graph::components::store::QueryPermit; -use graph::constraint_violation; +use graph::derive::CheapClone; +use graph::internal_error; use graph::prelude::tokio::time::Instant; +use graph::prelude::{ + anyhow::anyhow, crit, debug, error, info, o, tokio::sync::Semaphore, CancelGuard, CancelHandle, + CancelToken as _, CancelableError, Gauge, Logger, MovingStats, PoolWaitStats, StoreError, + ENV_VARS, +}; use graph::prelude::{tokio, MetricsRegistry}; use graph::slog::warn; use graph::util::timed_rw_lock::TimedMutex; -use graph::{ - prelude::{ - anyhow::{self, anyhow, bail}, - crit, debug, error, info, o, - tokio::sync::Semaphore, - CancelGuard, CancelHandle, CancelToken as _, CancelableError, Counter, Gauge, Logger, - MovingStats, PoolWaitStats, StoreError, ENV_VARS, - }, - util::security::SafeDisplay, -}; -use std::fmt::{self, Write}; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, Mutex}; +use std::fmt::{self}; +use std::sync::Arc; use std::time::Duration; use std::{collections::HashMap, sync::RwLock}; -use postgres::config::{Config, Host}; - -use crate::primary::{self, NAMESPACE_PUBLIC}; -use crate::{advisory_lock, catalog}; +use crate::catalog; +use crate::primary::{self, Mirror, Namespace}; use crate::{Shard, PRIMARY_SHARD}; +mod coordinator; +mod foreign_server; +mod state_tracker; + +pub use coordinator::PoolCoordinator; +pub use foreign_server::ForeignServer; +use state_tracker::{ErrorHandler, EventHandler, StateTracker}; + +/// The namespace under which the `PRIMARY_TABLES` are mapped into each +/// shard +pub(crate) const PRIMARY_PUBLIC: &'static str = "primary_public"; + +/// Tables that we map from the primary into `primary_public` in each shard +const PRIMARY_TABLES: [&str; 3] = ["deployment_schemas", "chains", "active_copies"]; + +/// The namespace under which we create views in the primary that union all +/// the `SHARDED_TABLES` +pub(crate) const CROSS_SHARD_NSP: &'static str = "sharded"; + +/// Tables that we map from each shard into each other shard into the +/// `shard__subgraphs` namespace const SHARDED_TABLES: [(&str, &[&str]); 2] = [ ("public", &["ethereum_networks"]), ( @@ -47,237 +61,153 @@ const SHARDED_TABLES: [(&str, &[&str]); 2] = [ "dynamic_ethereum_contract_data_source", "subgraph_deployment", "subgraph_error", - "subgraph_features", "subgraph_manifest", "table_stats", + "subgraph", + "subgraph_version", + "subgraph_deployment_assignment", + "prune_state", + "prune_table_state", ], ), ]; -pub struct ForeignServer { - pub name: String, - pub shard: Shard, - pub user: String, - pub password: String, - pub host: String, - pub port: u16, - pub dbname: String, -} +/// Make sure that the tables that `jobs::MirrorJob` wants to mirror are +/// actually mapped into the various shards. A failure here is simply a +/// coding mistake +fn check_mirrored_tables() { + for table in Mirror::PUBLIC_TABLES { + if !PRIMARY_TABLES.contains(&table) { + panic!("table {} is not in PRIMARY_TABLES", table); + } + } -impl ForeignServer { - pub(crate) const PRIMARY_PUBLIC: &'static str = "primary_public"; - pub(crate) const CROSS_SHARD_NSP: &'static str = "sharded"; + let subgraphs_tables = *SHARDED_TABLES + .iter() + .find(|(nsp, _)| *nsp == "subgraphs") + .map(|(_, tables)| tables) + .unwrap(); - /// The name of the foreign server under which data for `shard` is - /// accessible - pub fn name(shard: &Shard) -> String { - format!("shard_{}", shard.as_str()) + for table in Mirror::SUBGRAPHS_TABLES { + if !subgraphs_tables.contains(&table) { + panic!("table {} is not in SHARDED_TABLES[subgraphs]", table); + } } +} - /// The name of the schema under which the `subgraphs` schema for - /// `shard` is accessible in shards that are not `shard`. In most cases - /// you actually want to use `metadata_schema_in` - pub fn metadata_schema(shard: &Shard) -> String { - format!("{}_subgraphs", Self::name(shard)) - } +/// How long to keep connections in the `fdw_pool` around before closing +/// them on idle. This is much shorter than the default of 10 minutes. +const FDW_IDLE_TIMEOUT: Duration = Duration::from_secs(60); - /// The name of the schema under which the `subgraphs` schema for - /// `shard` is accessible in the shard `current`. It is permissible for - /// `shard` and `current` to be the same. - pub fn metadata_schema_in(shard: &Shard, current: &Shard) -> String { - if shard == current { - "subgraphs".to_string() - } else { - Self::metadata_schema(&shard) +enum PoolStateInner { + /// A connection pool, and all the servers for which we need to + /// establish fdw mappings when we call `setup` on the pool + Created(Arc, Arc), + /// The pool has been successfully set up + Ready(Arc), +} + +/// A pool goes through several states, and this struct tracks what state we +/// are in, together with the `state_tracker` field on `ConnectionPool`. +/// When first created, the pool is in state `Created`; once we successfully +/// called `setup` on it, it moves to state `Ready`. During use, we use the +/// r2d2 callbacks to determine if the database is available or not, and set +/// the `available` field accordingly. Tracking that allows us to fail fast +/// and avoids having to wait for a connection timeout every time we need a +/// database connection. That avoids overall undesirable states like buildup +/// of queries; instead of queueing them until the database is available, +/// they return almost immediately with an error +#[derive(Clone, CheapClone)] +pub(super) struct PoolState { + logger: Logger, + inner: Arc>, +} + +impl PoolState { + fn new(logger: Logger, inner: PoolStateInner, name: String) -> Self { + let pool_name = format!("pool-{}", name); + Self { + logger, + inner: Arc::new(TimedMutex::new(inner, pool_name)), } } - pub fn new_from_raw(shard: String, postgres_url: &str) -> Result { - Self::new(Shard::new(shard)?, postgres_url) + fn created(pool: Arc, coord: Arc) -> Self { + let logger = pool.logger.clone(); + let name = pool.shard.to_string(); + let inner = PoolStateInner::Created(pool, coord); + Self::new(logger, inner, name) } - pub fn new(shard: Shard, postgres_url: &str) -> Result { - let config: Config = match postgres_url.parse() { - Ok(config) => config, - Err(e) => panic!( - "failed to parse Postgres connection string `{}`: {}", - SafeDisplay(postgres_url), - e - ), - }; - - let host = match config.get_hosts().get(0) { - Some(Host::Tcp(host)) => host.to_string(), - _ => bail!("can not find host name in `{}`", SafeDisplay(postgres_url)), - }; - - let user = config - .get_user() - .ok_or_else(|| anyhow!("could not find user in `{}`", SafeDisplay(postgres_url)))? - .to_string(); - let password = String::from_utf8( - config - .get_password() - .ok_or_else(|| { - anyhow!( - "could not find password in `{}`; you must provide one.", - SafeDisplay(postgres_url) - ) - })? - .into(), - )?; - let port = config.get_ports().first().cloned().unwrap_or(5432u16); - let dbname = config - .get_dbname() - .map(|s| s.to_string()) - .ok_or_else(|| anyhow!("could not find user in `{}`", SafeDisplay(postgres_url)))?; - - Ok(Self { - name: Self::name(&shard), - shard, - user, - password, - host, - port, - dbname, - }) + fn ready(pool: Arc) -> Self { + let logger = pool.logger.clone(); + let name = pool.shard.to_string(); + let inner = PoolStateInner::Ready(pool); + Self::new(logger, inner, name) } - /// Create a new foreign server and user mapping on `conn` for this foreign - /// server - fn create(&self, conn: &mut PgConnection) -> Result<(), StoreError> { - let query = format!( - "\ - create server \"{name}\" - foreign data wrapper postgres_fdw - options (host '{remote_host}', \ - port '{remote_port}', \ - dbname '{remote_db}', \ - fetch_size '{fetch_size}', \ - updatable 'false'); - create user mapping - for current_user server \"{name}\" - options (user '{remote_user}', password '{remote_password}');", - name = self.name, - remote_host = self.host, - remote_port = self.port, - remote_db = self.dbname, - remote_user = self.user, - remote_password = self.password, - fetch_size = ENV_VARS.store.fdw_fetch_size, - ); - Ok(conn.batch_execute(&query)?) + fn set_ready(&self) { + use PoolStateInner::*; + + let mut guard = self.inner.lock(&self.logger); + match &*guard { + Created(pool, _) => *guard = Ready(pool.clone()), + Ready(_) => { /* nothing to do */ } + } } - /// Update an existing user mapping with possibly new details - fn update(&self, conn: &mut PgConnection) -> Result<(), StoreError> { - let options = catalog::server_options(conn, &self.name)?; - let set_or_add = |option: &str| -> &'static str { - if options.contains_key(option) { - "set" - } else { - "add" + /// Get a connection pool that is ready, i.e., has been through setup + /// and running migrations + fn get_ready(&self) -> Result, StoreError> { + // We have to be careful here that we do not hold a lock when we + // call `setup_bg`, otherwise we will deadlock + let (pool, coord) = { + let guard = self.inner.lock(&self.logger); + + use PoolStateInner::*; + match &*guard { + Created(pool, coord) => (pool.cheap_clone(), coord.cheap_clone()), + Ready(pool) => return Ok(pool.clone()), } }; - let query = format!( - "\ - alter server \"{name}\" - options (set host '{remote_host}', \ - {set_port} port '{remote_port}', \ - set dbname '{remote_db}, \ - {set_fetch_size} fetch_size '{fetch_size}'); - alter user mapping - for current_user server \"{name}\" - options (set user '{remote_user}', set password '{remote_password}');", - name = self.name, - remote_host = self.host, - set_port = set_or_add("port"), - set_fetch_size = set_or_add("fetch_size"), - remote_port = self.port, - remote_db = self.dbname, - remote_user = self.user, - remote_password = self.password, - fetch_size = ENV_VARS.store.fdw_fetch_size, - ); - Ok(conn.batch_execute(&query)?) - } + // self is `Created` and needs to have setup run + coord.setup_bg(self.cheap_clone())?; - /// Map key tables from the primary into our local schema. If we are the - /// primary, set them up as views. - fn map_primary(conn: &mut PgConnection, shard: &Shard) -> Result<(), StoreError> { - catalog::recreate_schema(conn, Self::PRIMARY_PUBLIC)?; - - let mut query = String::new(); - for table_name in ["deployment_schemas", "chains", "active_copies"] { - let create_stmt = if shard == &*PRIMARY_SHARD { - format!( - "create view {nsp}.{table_name} as select * from public.{table_name};", - nsp = Self::PRIMARY_PUBLIC, - table_name = table_name - ) - } else { - catalog::create_foreign_table( - conn, - NAMESPACE_PUBLIC, - table_name, - Self::PRIMARY_PUBLIC, - Self::name(&PRIMARY_SHARD).as_str(), - )? - }; - write!(query, "{}", create_stmt)?; + // We just tried to set up the pool; if it is still not set up and + // we didn't have an error, it means the database is not available + if self.needs_setup() { + return Err(StoreError::DatabaseUnavailable); + } else { + Ok(pool) } - conn.batch_execute(&query)?; - Ok(()) } - /// Map the `subgraphs` schema from the foreign server `self` into the - /// database accessible through `conn` - fn map_metadata(&self, conn: &mut PgConnection) -> Result<(), StoreError> { - let nsp = Self::metadata_schema(&self.shard); - catalog::recreate_schema(conn, &nsp)?; - let mut query = String::new(); - for (src_nsp, src_tables) in SHARDED_TABLES { - for src_table in src_tables { - let create_stmt = - catalog::create_foreign_table(conn, src_nsp, src_table, &nsp, &self.name)?; - write!(query, "{}", create_stmt)?; - } + /// Get the inner pool, regardless of whether it has been set up or not. + /// Most uses should use `get_ready` instead + fn get_unready(&self) -> Arc { + use PoolStateInner::*; + + match &*self.inner.lock(&self.logger) { + Created(pool, _) | Ready(pool) => pool.cheap_clone(), } - Ok(conn.batch_execute(&query)?) } -} -/// How long to keep connections in the `fdw_pool` around before closing -/// them on idle. This is much shorter than the default of 10 minutes. -const FDW_IDLE_TIMEOUT: Duration = Duration::from_secs(60); + fn needs_setup(&self) -> bool { + let guard = self.inner.lock(&self.logger); -/// A pool goes through several states, and this enum tracks what state we -/// are in, together with the `state_tracker` field on `ConnectionPool`. -/// When first created, the pool is in state `Created`; once we successfully -/// called `setup` on it, it moves to state `Ready`. During use, we use the -/// r2d2 callbacks to determine if the database is available or not, and set -/// the `available` field accordingly. Tracking that allows us to fail fast -/// and avoids having to wait for a connection timeout every time we need a -/// database connection. That avoids overall undesirable states like buildup -/// of queries; instead of queueing them until the database is available, -/// they return almost immediately with an error -enum PoolState { - /// A connection pool, and all the servers for which we need to - /// establish fdw mappings when we call `setup` on the pool - Created(Arc, Arc), - /// The pool has been successfully set up - Ready(Arc), - /// The pool has been disabled by setting its size to 0 - Disabled, + use PoolStateInner::*; + match &*guard { + Created(_, _) => true, + Ready(_) => false, + } + } } - #[derive(Clone)] pub struct ConnectionPool { - inner: Arc>, - logger: Logger, + inner: PoolState, pub shard: Shard, - state_tracker: PoolStateTracker, + state_tracker: StateTracker, } impl fmt::Debug for ConnectionPool { @@ -288,60 +218,35 @@ impl fmt::Debug for ConnectionPool { } } -/// The name of the pool, mostly for logging, and what purpose it serves. +/// The role of the pool, mostly for logging, and what purpose it serves. /// The main pool will always be called `main`, and can be used for reading /// and writing. Replica pools can only be used for reading, and don't /// require any setup (migrations etc.) -pub enum PoolName { +pub enum PoolRole { Main, Replica(String), } -impl PoolName { +impl PoolRole { fn as_str(&self) -> &str { match self { - PoolName::Main => "main", - PoolName::Replica(name) => name, + PoolRole::Main => "main", + PoolRole::Replica(name) => name, } } fn is_replica(&self) -> bool { match self { - PoolName::Main => false, - PoolName::Replica(_) => true, - } - } -} - -#[derive(Clone)] -struct PoolStateTracker { - available: Arc, -} - -impl PoolStateTracker { - fn new() -> Self { - Self { - available: Arc::new(AtomicBool::new(true)), + PoolRole::Main => false, + PoolRole::Replica(_) => true, } } - - fn mark_available(&self) { - self.available.store(true, Ordering::Relaxed); - } - - fn mark_unavailable(&self) { - self.available.store(false, Ordering::Relaxed); - } - - fn is_available(&self) -> bool { - self.available.load(Ordering::Relaxed) - } } impl ConnectionPool { fn create( shard_name: &str, - pool_name: PoolName, + pool_name: PoolRole, postgres_url: String, pool_size: u32, fdw_pool_size: Option, @@ -349,33 +254,28 @@ impl ConnectionPool { registry: Arc, coord: Arc, ) -> ConnectionPool { - let state_tracker = PoolStateTracker::new(); + let state_tracker = StateTracker::new(); let shard = Shard::new(shard_name.to_string()).expect("shard_name is a valid name for a shard"); - let pool_state = { - if pool_size == 0 { - PoolState::Disabled + let inner = { + let pool = PoolInner::create( + shard.clone(), + pool_name.as_str(), + postgres_url, + pool_size, + fdw_pool_size, + logger, + registry, + state_tracker.clone(), + ); + if pool_name.is_replica() { + PoolState::ready(Arc::new(pool)) } else { - let pool = PoolInner::create( - shard.clone(), - pool_name.as_str(), - postgres_url, - pool_size, - fdw_pool_size, - logger, - registry, - state_tracker.clone(), - ); - if pool_name.is_replica() { - PoolState::Ready(Arc::new(pool)) - } else { - PoolState::Created(Arc::new(pool), coord) - } + PoolState::created(Arc::new(pool), coord) } }; ConnectionPool { - inner: Arc::new(TimedMutex::new(pool_state, format!("pool-{}", shard_name))), - logger: logger.clone(), + inner, shard, state_tracker, } @@ -384,11 +284,7 @@ impl ConnectionPool { /// This is only used for `graphman` to ensure it doesn't run migrations /// or other setup steps pub fn skip_setup(&self) { - let mut guard = self.inner.lock(&self.logger); - match &*guard { - PoolState::Created(pool, _) => *guard = PoolState::Ready(pool.clone()), - PoolState::Ready(_) | PoolState::Disabled => { /* nothing to do */ } - } + self.inner.set_ready(); } /// Return a pool that is ready, i.e., connected to the database. If the @@ -396,7 +292,6 @@ impl ConnectionPool { /// or the pool is marked as unavailable, return /// `StoreError::DatabaseUnavailable` fn get_ready(&self) -> Result, StoreError> { - let mut guard = self.inner.lock(&self.logger); if !self.state_tracker.is_available() { // We know that trying to use this pool is pointless since the // database is not available, and will only lead to other @@ -405,16 +300,12 @@ impl ConnectionPool { return Err(StoreError::DatabaseUnavailable); } - match &*guard { - PoolState::Created(pool, servers) => { - pool.setup(servers.clone())?; - let pool2 = pool.clone(); - *guard = PoolState::Ready(pool.clone()); + match self.inner.get_ready() { + Ok(pool) => { self.state_tracker.mark_available(); - Ok(pool2) + Ok(pool) } - PoolState::Ready(pool) => Ok(pool.clone()), - PoolState::Disabled => Err(StoreError::DatabaseDisabled), + Err(e) => Err(e), } } @@ -493,53 +384,32 @@ impl ConnectionPool { self.get_ready()?.get_fdw(logger, timeout) } - pub fn connection_detail(&self) -> Result { - let pool = self.get_ready()?; - ForeignServer::new(pool.shard.clone(), &pool.postgres_url).map_err(|e| e.into()) - } - - /// Check that we can connect to the database - pub fn check(&self) -> bool { - true - } - - /// Setup the database for this pool. This includes configuring foreign - /// data wrappers for cross-shard communication, and running any pending - /// schema migrations for this database. - /// - /// # Panics - /// - /// If any errors happen during the migration, the process panics - pub async fn setup(&self) { - let pool = self.clone(); - graph::spawn_blocking_allow_panic(move || { - pool.get_ready().ok(); - }) - .await - // propagate panics - .unwrap(); + /// Get a connection from the pool for foreign data wrapper access if + /// one is available + pub fn try_get_fdw( + &self, + logger: &Logger, + timeout: Duration, + ) -> Option>> { + let Ok(inner) = self.get_ready() else { + return None; + }; + self.state_tracker + .ignore_timeout(|| inner.try_get_fdw(logger, timeout)) } - pub(crate) async fn query_permit(&self) -> Result { - let pool = match &*self.inner.lock(&self.logger) { - PoolState::Created(pool, _) | PoolState::Ready(pool) => pool.clone(), - PoolState::Disabled => { - return Err(StoreError::DatabaseDisabled); - } - }; + pub(crate) async fn query_permit(&self) -> QueryPermit { + let pool = self.inner.get_unready(); let start = Instant::now(); let permit = pool.query_permit().await; - Ok(QueryPermit { + QueryPermit { permit, wait: start.elapsed(), - }) + } } - pub(crate) fn wait_stats(&self) -> Result { - match &*self.inner.lock(&self.logger) { - PoolState::Created(pool, _) | PoolState::Ready(pool) => Ok(pool.wait_stats.clone()), - PoolState::Disabled => Err(StoreError::DatabaseDisabled), - } + pub(crate) fn wait_stats(&self) -> PoolWaitStats { + self.inner.get_unready().wait_stats.cheap_clone() } /// Mirror key tables from the primary into our own schema. We do this @@ -553,166 +423,6 @@ impl ConnectionPool { } } -fn brief_error_msg(error: &dyn std::error::Error) -> String { - // For 'Connection refused' errors, Postgres includes the IP and - // port number in the error message. We want to suppress that and - // only use the first line from the error message. For more detailed - // analysis, 'Connection refused' manifests as a - // `ConnectionError(BadConnection("could not connect to server: - // Connection refused.."))` - error - .to_string() - .split('\n') - .next() - .unwrap_or("no error details provided") - .to_string() -} - -#[derive(Clone)] -struct ErrorHandler { - logger: Logger, - counter: Counter, - state_tracker: PoolStateTracker, -} - -impl ErrorHandler { - fn new(logger: Logger, counter: Counter, state_tracker: PoolStateTracker) -> Self { - Self { - logger, - counter, - state_tracker, - } - } -} -impl std::fmt::Debug for ErrorHandler { - fn fmt(&self, _f: &mut fmt::Formatter) -> fmt::Result { - fmt::Result::Ok(()) - } -} - -impl r2d2::HandleError for ErrorHandler { - fn handle_error(&self, error: r2d2::Error) { - let msg = brief_error_msg(&error); - - // Don't count canceling statements for timeouts etc. as a - // connection error. Unfortunately, we only have the textual error - // and need to infer whether the error indicates that the database - // is down or if something else happened. When querying a replica, - // these messages indicate that a query was canceled because it - // conflicted with replication, but does not indicate that there is - // a problem with the database itself. - // - // This check will break if users run Postgres (or even graph-node) - // in a locale other than English. In that case, their database will - // be marked as unavailable even though it is perfectly fine. - if msg.contains("canceling statement") - || msg.contains("terminating connection due to conflict with recovery") - { - return; - } - - self.counter.inc(); - if self.state_tracker.is_available() { - error!(self.logger, "Postgres connection error"; "error" => msg); - } - self.state_tracker.mark_unavailable(); - } -} - -#[derive(Clone)] -struct EventHandler { - logger: Logger, - count_gauge: Gauge, - wait_gauge: Gauge, - size_gauge: Gauge, - wait_stats: PoolWaitStats, - state_tracker: PoolStateTracker, -} - -impl EventHandler { - fn new( - logger: Logger, - registry: Arc, - wait_stats: PoolWaitStats, - const_labels: HashMap, - state_tracker: PoolStateTracker, - ) -> Self { - let count_gauge = registry - .global_gauge( - "store_connection_checkout_count", - "The number of Postgres connections currently checked out", - const_labels.clone(), - ) - .expect("failed to create `store_connection_checkout_count` counter"); - let wait_gauge = registry - .global_gauge( - "store_connection_wait_time_ms", - "Average connection wait time", - const_labels.clone(), - ) - .expect("failed to create `store_connection_wait_time_ms` counter"); - let size_gauge = registry - .global_gauge( - "store_connection_pool_size_count", - "Overall size of the connection pool", - const_labels, - ) - .expect("failed to create `store_connection_pool_size_count` counter"); - EventHandler { - logger, - count_gauge, - wait_gauge, - wait_stats, - size_gauge, - state_tracker, - } - } - - fn add_conn_wait_time(&self, duration: Duration) { - self.wait_stats - .write() - .unwrap() - .add_and_register(duration, &self.wait_gauge); - } -} - -impl std::fmt::Debug for EventHandler { - fn fmt(&self, _f: &mut fmt::Formatter) -> fmt::Result { - fmt::Result::Ok(()) - } -} - -impl HandleEvent for EventHandler { - fn handle_acquire(&self, _: e::AcquireEvent) { - self.size_gauge.inc(); - self.state_tracker.mark_available(); - } - - fn handle_release(&self, _: e::ReleaseEvent) { - self.size_gauge.dec(); - } - - fn handle_checkout(&self, event: e::CheckoutEvent) { - self.count_gauge.inc(); - self.add_conn_wait_time(event.duration()); - self.state_tracker.mark_available(); - } - - fn handle_timeout(&self, event: e::TimeoutEvent) { - self.add_conn_wait_time(event.timeout()); - if self.state_tracker.is_available() { - error!(self.logger, "Connection checkout timed out"; - "wait_ms" => event.timeout().as_millis() - ) - } - self.state_tracker.mark_unavailable(); - } - - fn handle_checkin(&self, _: e::CheckinEvent) { - self.count_gauge.dec(); - } -} - #[derive(Clone)] pub struct PoolInner { logger: Logger, @@ -751,8 +461,10 @@ impl PoolInner { fdw_pool_size: Option, logger: &Logger, registry: Arc, - state_tracker: PoolStateTracker, + state_tracker: StateTracker, ) -> PoolInner { + check_mirrored_tables(); + let logger_store = logger.new(o!("component" => "Store")); let logger_pool = logger.new(o!("component" => "ConnectionPool")); let const_labels = { @@ -935,18 +647,21 @@ impl PoolInner { self.pool.get().map_err(|_| StoreError::DatabaseUnavailable) } - pub fn get_with_timeout_warning( + /// Get the pool for fdw connections. It is an error if none is configured + fn fdw_pool( &self, logger: &Logger, - ) -> Result>, StoreError> { - loop { - match self.pool.get_timeout(ENV_VARS.store.connection_timeout) { - Ok(conn) => return Ok(conn), - Err(e) => error!(logger, "Error checking out connection, retrying"; - "error" => brief_error_msg(&e), - ), + ) -> Result<&Pool>, StoreError> { + let pool = match &self.fdw_pool { + Some(pool) => pool, + None => { + const MSG: &str = + "internal error: trying to get fdw connection on a pool that doesn't have any"; + error!(logger, "{}", MSG); + return Err(internal_error!(MSG)); } - } + }; + Ok(pool) } /// Get a connection from the pool for foreign data wrapper access; @@ -964,15 +679,7 @@ impl PoolInner { where F: FnMut() -> bool, { - let pool = match &self.fdw_pool { - Some(pool) => pool, - None => { - const MSG: &str = - "internal error: trying to get fdw connection on a pool that doesn't have any"; - error!(logger, "{}", MSG); - return Err(constraint_violation!(MSG)); - } - }; + let pool = self.fdw_pool(logger)?; loop { match pool.get() { Ok(conn) => return Ok(conn), @@ -985,6 +692,27 @@ impl PoolInner { } } + /// Get a connection from the fdw pool if one is available. We wait for + /// `timeout` for a connection which should be set just big enough to + /// allow establishing a connection + pub fn try_get_fdw( + &self, + logger: &Logger, + timeout: Duration, + ) -> Option>> { + // Any error trying to get a connection is treated as "couldn't get + // a connection in time". If there is a serious error with the + // database, e.g., because it's not available, the next database + // operation will run into it and report it. + let Ok(fdw_pool) = self.fdw_pool(logger) else { + return None; + }; + let Ok(conn) = fdw_pool.get_timeout(timeout) else { + return None; + }; + Some(conn) + } + pub fn connection_detail(&self) -> Result { ForeignServer::new(self.shard.clone(), &self.postgres_url).map_err(|e| e.into()) } @@ -998,73 +726,25 @@ impl PoolInner { .unwrap_or(false) } - /// Setup the database for this pool. This includes configuring foreign - /// data wrappers for cross-shard communication, and running any pending - /// schema migrations for this database. - /// - /// Returns `StoreError::DatabaseUnavailable` if we can't connect to the - /// database. Any other error causes a panic. - /// - /// # Panics - /// - /// If any errors happen during the migration, the process panics - fn setup(&self, coord: Arc) -> Result<(), StoreError> { - fn die(logger: &Logger, msg: &'static str, err: &dyn std::fmt::Display) -> ! { - crit!(logger, "{}", msg; "error" => format!("{:#}", err)); - panic!("{}: {}", msg, err); - } - - let pool = self.clone(); - let mut conn = self.get().map_err(|_| StoreError::DatabaseUnavailable)?; - - let start = Instant::now(); - - advisory_lock::lock_migration(&mut conn) - .unwrap_or_else(|err| die(&pool.logger, "failed to get migration lock", &err)); - // This code can cause a race in database setup: if pool A has had - // schema changes and pool B then tries to map tables from pool A, - // but does so before the concurrent thread running this code for - // pool B has at least finished `configure_fdw`, mapping tables will - // fail. In that case, the node must be restarted. The restart is - // guaranteed because this failure will lead to a panic in the setup - // for pool A - // - // This code can also leave the table mappings in a state where they - // have not been updated if the process is killed after migrating - // the schema but before finishing remapping in all shards. - // Addressing that would require keeping track of the need to remap - // in the database instead of just in memory - let result = pool - .configure_fdw(coord.servers.as_ref()) - .and_then(|()| pool.drop_cross_shard_views()) - .and_then(|()| migrate_schema(&pool.logger, &mut conn)) - .and_then(|count| { - pool.create_cross_shard_views(coord.servers.as_ref()) - .map(|()| count) - }); - debug!(&pool.logger, "Release migration lock"); - advisory_lock::unlock_migration(&mut conn).unwrap_or_else(|err| { - die(&pool.logger, "failed to release migration lock", &err); - }); - let result = result.and_then(|count| coord.propagate(&pool, count)); - result.unwrap_or_else(|err| die(&pool.logger, "migrations failed", &err)); - - // Locale check - if let Err(msg) = catalog::Locale::load(&mut conn)?.suitable() { - if &self.shard == &*PRIMARY_SHARD && primary::is_empty(&mut conn)? { - die( - &pool.logger, + fn locale_check( + &self, + logger: &Logger, + mut conn: PooledConnection>, + ) -> Result<(), StoreError> { + Ok( + if let Err(msg) = catalog::Locale::load(&mut conn)?.suitable() { + if &self.shard == &*PRIMARY_SHARD && primary::is_empty(&mut conn)? { + const MSG: &str = "Database does not use C locale. \ - Please check the graph-node documentation for how to set up the database locale", - &msg, - ); - } else { - warn!(pool.logger, "{}.\nPlease check the graph-node documentation for how to set up the database locale", msg); - } - } + Please check the graph-node documentation for how to set up the database locale"; - debug!(&pool.logger, "Setup finished"; "setup_time_s" => start.elapsed().as_secs()); - Ok(()) + crit!(logger, "{}: {}", MSG, msg); + panic!("{}: {}", MSG, msg); + } else { + warn!(logger, "{}.\nPlease check the graph-node documentation for how to set up the database locale", msg); + } + }, + ) } pub(crate) async fn query_permit(&self) -> tokio::sync::OwnedSemaphorePermit { @@ -1094,6 +774,28 @@ impl PoolInner { }) } + /// Do the part of database setup that only affects this pool. Those + /// steps are + /// 1. Configuring foreign servers and user mappings for talking to the + /// other shards + /// 2. Migrating the schema to the latest version + /// 3. Checking that the locale is set to C + async fn migrate( + self: Arc, + servers: &[ForeignServer], + ) -> Result { + self.configure_fdw(servers)?; + let mut conn = self.get()?; + let (this, count) = conn.transaction(|conn| -> Result<_, StoreError> { + let count = migrate_schema(&self.logger, conn)?; + Ok((self, count)) + })?; + + this.locale_check(&this.logger, conn)?; + + Ok(count) + } + /// If this is the primary shard, drop the namespace `CROSS_SHARD_NSP` fn drop_cross_shard_views(&self) -> Result<(), StoreError> { if self.shard != *PRIMARY_SHARD { @@ -1103,10 +805,7 @@ impl PoolInner { info!(&self.logger, "Dropping cross-shard views"); let mut conn = self.get()?; conn.transaction(|conn| { - let query = format!( - "drop schema if exists {} cascade", - ForeignServer::CROSS_SHARD_NSP - ); + let query = format!("drop schema if exists {} cascade", CROSS_SHARD_NSP); conn.batch_execute(&query)?; Ok(()) }) @@ -1137,14 +836,17 @@ impl PoolInner { return Ok(()); } - info!(&self.logger, "Creating cross-shard views"); let mut conn = self.get()?; + let sharded = Namespace::special(CROSS_SHARD_NSP); + if catalog::has_namespace(&mut conn, &sharded)? { + // We dropped the namespace before, but another node must have + // recreated it in the meantime so we don't need to do anything + return Ok(()); + } + info!(&self.logger, "Creating cross-shard views"); conn.transaction(|conn| { - let query = format!( - "create schema if not exists {}", - ForeignServer::CROSS_SHARD_NSP - ); + let query = format!("create schema {}", CROSS_SHARD_NSP); conn.batch_execute(&query)?; for (src_nsp, src_tables) in SHARDED_TABLES { // Pairs of (shard, nsp) for all servers @@ -1154,7 +856,7 @@ impl PoolInner { conn, src_nsp, src_table, - ForeignServer::CROSS_SHARD_NSP, + CROSS_SHARD_NSP, &nsps, )?; conn.batch_execute(&create_view)?; @@ -1178,9 +880,9 @@ impl PoolInner { .await } - // The foreign server `server` had schema changes, and we therefore need - // to remap anything that we are importing via fdw to make sure we are - // using this updated schema + /// The foreign server `server` had schema changes, and we therefore + /// need to remap anything that we are importing via fdw to make sure we + /// are using this updated schema pub fn remap(&self, server: &ForeignServer) -> Result<(), StoreError> { if &server.shard == &*PRIMARY_SHARD { info!(&self.logger, "Mapping primary"); @@ -1198,6 +900,15 @@ impl PoolInner { } Ok(()) } + + pub fn needs_remap(&self, server: &ForeignServer) -> Result { + if &server.shard == &self.shard { + return Ok(false); + } + + let mut conn = self.get()?; + server.needs_remap(&mut conn) + } } pub const MIGRATIONS: EmbeddedMigrations = embed_migrations!("./migrations"); @@ -1211,10 +922,6 @@ impl MigrationCount { fn had_migrations(&self) -> bool { self.old != self.new } - - fn is_new(&self) -> bool { - self.old == 0 - } } /// Run all schema migrations. @@ -1254,124 +961,9 @@ fn migrate_schema(logger: &Logger, conn: &mut PgConnection) -> Result>>, - servers: Arc>, -} - -impl PoolCoordinator { - pub fn new(servers: Arc>) -> Self { - Self { - pools: Mutex::new(HashMap::new()), - servers, - } - } - - pub fn create_pool( - self: Arc, - logger: &Logger, - name: &str, - pool_name: PoolName, - postgres_url: String, - pool_size: u32, - fdw_pool_size: Option, - registry: Arc, - ) -> ConnectionPool { - let is_writable = !pool_name.is_replica(); - - let pool = ConnectionPool::create( - name, - pool_name, - postgres_url, - pool_size, - fdw_pool_size, - logger, - registry, - self.cheap_clone(), - ); - - // Ignore non-writable pools (replicas), there is no need (and no - // way) to coordinate schema changes with them - if is_writable { - // It is safe to take this lock here since nobody has seen the pool - // yet. We remember the `PoolInner` so that later, when we have to - // call `remap()`, we do not have to take this lock as that will be - // already held in `get_ready()` - match &*pool.inner.lock(logger) { - PoolState::Created(inner, _) | PoolState::Ready(inner) => { - self.pools - .lock() - .unwrap() - .insert(pool.shard.clone(), inner.clone()); - } - PoolState::Disabled => { /* nothing to do */ } - } - } - pool - } - - /// Propagate changes to the schema in `shard` to all other pools. Those - /// other pools will then recreate any tables that they imported from - /// `shard`. If `pool` is a new shard, we also map all other shards into - /// it. - /// - /// This tries to take the migration lock and must therefore be run from - /// code that does _not_ hold the migration lock as it will otherwise - /// deadlock - fn propagate(&self, pool: &PoolInner, count: MigrationCount) -> Result<(), StoreError> { - // pool is a new shard, map all other shards into it - if count.is_new() { - for server in self.servers.iter() { - pool.remap(server)?; - } - } - // pool had schema changes, refresh the import from pool into all other shards - if count.had_migrations() { - let server = self.server(&pool.shard)?; - for pool in self.pools.lock().unwrap().values() { - let mut conn = pool.get()?; - let remap_res = { - advisory_lock::lock_migration(&mut conn)?; - let res = pool.remap(server); - advisory_lock::unlock_migration(&mut conn)?; - res - }; - if let Err(e) = remap_res { - error!(pool.logger, "Failed to map imports from {}", server.shard; "error" => e.to_string()); - return Err(e); - } - } - } - Ok(()) - } - - pub fn pools(&self) -> Vec> { - self.pools.lock().unwrap().values().cloned().collect() - } - - pub fn servers(&self) -> Arc> { - self.servers.clone() - } - - fn server(&self, shard: &Shard) -> Result<&ForeignServer, StoreError> { - self.servers - .iter() - .find(|server| &server.shard == shard) - .ok_or_else(|| constraint_violation!("unknown shard {shard}")) - } -} diff --git a/store/postgres/src/pool/state_tracker.rs b/store/postgres/src/pool/state_tracker.rs new file mode 100644 index 00000000000..231a66a9292 --- /dev/null +++ b/store/postgres/src/pool/state_tracker.rs @@ -0,0 +1,224 @@ +//! Event/error handlers for our r2d2 pools + +use diesel::r2d2::{self, event as e, HandleEvent}; + +use graph::prelude::error; +use graph::prelude::Counter; +use graph::prelude::Gauge; +use graph::prelude::MetricsRegistry; +use graph::prelude::PoolWaitStats; +use graph::slog::Logger; + +use std::collections::HashMap; +use std::fmt; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering; +use std::sync::Arc; +use std::time::Duration; + +/// Track whether a database is available or not using the event and error +/// handlers from this module. The pool must be set up with these handlers +/// when it is created +#[derive(Clone)] +pub(super) struct StateTracker { + available: Arc, + ignore_timeout: Arc, +} + +impl StateTracker { + pub(super) fn new() -> Self { + Self { + available: Arc::new(AtomicBool::new(true)), + ignore_timeout: Arc::new(AtomicBool::new(false)), + } + } + + pub(super) fn mark_available(&self) { + self.available.store(true, Ordering::Relaxed); + } + + fn mark_unavailable(&self) { + self.available.store(false, Ordering::Relaxed); + } + + pub(super) fn is_available(&self) -> bool { + self.available.load(Ordering::Relaxed) + } + + fn timeout_is_ignored(&self) -> bool { + self.ignore_timeout.load(Ordering::Relaxed) + } + + pub(super) fn ignore_timeout(&self, f: F) -> R + where + F: FnOnce() -> R, + { + self.ignore_timeout.store(true, Ordering::Relaxed); + let res = f(); + self.ignore_timeout.store(false, Ordering::Relaxed); + res + } +} + +#[derive(Clone)] +pub(super) struct ErrorHandler { + logger: Logger, + counter: Counter, + state_tracker: StateTracker, +} + +impl ErrorHandler { + pub(super) fn new(logger: Logger, counter: Counter, state_tracker: StateTracker) -> Self { + Self { + logger, + counter, + state_tracker, + } + } +} +impl std::fmt::Debug for ErrorHandler { + fn fmt(&self, _f: &mut fmt::Formatter) -> fmt::Result { + fmt::Result::Ok(()) + } +} + +impl r2d2::HandleError for ErrorHandler { + fn handle_error(&self, error: r2d2::Error) { + let msg = brief_error_msg(&error); + + // Don't count canceling statements for timeouts etc. as a + // connection error. Unfortunately, we only have the textual error + // and need to infer whether the error indicates that the database + // is down or if something else happened. When querying a replica, + // these messages indicate that a query was canceled because it + // conflicted with replication, but does not indicate that there is + // a problem with the database itself. + // + // This check will break if users run Postgres (or even graph-node) + // in a locale other than English. In that case, their database will + // be marked as unavailable even though it is perfectly fine. + if msg.contains("canceling statement") + || msg.contains("terminating connection due to conflict with recovery") + { + return; + } + + self.counter.inc(); + if self.state_tracker.is_available() { + error!(self.logger, "Postgres connection error"; "error" => msg); + } + self.state_tracker.mark_unavailable(); + } +} + +#[derive(Clone)] +pub(super) struct EventHandler { + logger: Logger, + count_gauge: Gauge, + wait_gauge: Gauge, + size_gauge: Gauge, + wait_stats: PoolWaitStats, + state_tracker: StateTracker, +} + +impl EventHandler { + pub(super) fn new( + logger: Logger, + registry: Arc, + wait_stats: PoolWaitStats, + const_labels: HashMap, + state_tracker: StateTracker, + ) -> Self { + let count_gauge = registry + .global_gauge( + "store_connection_checkout_count", + "The number of Postgres connections currently checked out", + const_labels.clone(), + ) + .expect("failed to create `store_connection_checkout_count` counter"); + let wait_gauge = registry + .global_gauge( + "store_connection_wait_time_ms", + "Average connection wait time", + const_labels.clone(), + ) + .expect("failed to create `store_connection_wait_time_ms` counter"); + let size_gauge = registry + .global_gauge( + "store_connection_pool_size_count", + "Overall size of the connection pool", + const_labels, + ) + .expect("failed to create `store_connection_pool_size_count` counter"); + EventHandler { + logger, + count_gauge, + wait_gauge, + wait_stats, + size_gauge, + state_tracker, + } + } + + fn add_conn_wait_time(&self, duration: Duration) { + self.wait_stats + .write() + .unwrap() + .add_and_register(duration, &self.wait_gauge); + } +} + +impl std::fmt::Debug for EventHandler { + fn fmt(&self, _f: &mut fmt::Formatter) -> fmt::Result { + fmt::Result::Ok(()) + } +} + +impl HandleEvent for EventHandler { + fn handle_acquire(&self, _: e::AcquireEvent) { + self.size_gauge.inc(); + self.state_tracker.mark_available(); + } + + fn handle_release(&self, _: e::ReleaseEvent) { + self.size_gauge.dec(); + } + + fn handle_checkout(&self, event: e::CheckoutEvent) { + self.count_gauge.inc(); + self.add_conn_wait_time(event.duration()); + self.state_tracker.mark_available(); + } + + fn handle_timeout(&self, event: e::TimeoutEvent) { + if self.state_tracker.timeout_is_ignored() { + return; + } + self.add_conn_wait_time(event.timeout()); + if self.state_tracker.is_available() { + error!(self.logger, "Connection checkout timed out"; + "wait_ms" => event.timeout().as_millis() + ) + } + self.state_tracker.mark_unavailable(); + } + + fn handle_checkin(&self, _: e::CheckinEvent) { + self.count_gauge.dec(); + } +} + +fn brief_error_msg(error: &dyn std::error::Error) -> String { + // For 'Connection refused' errors, Postgres includes the IP and + // port number in the error message. We want to suppress that and + // only use the first line from the error message. For more detailed + // analysis, 'Connection refused' manifests as a + // `ConnectionError(BadConnection("could not connect to server: + // Connection refused.."))` + error + .to_string() + .split('\n') + .next() + .unwrap_or("no error details provided") + .to_string() +} diff --git a/store/postgres/src/primary.rs b/store/postgres/src/primary.rs index ab6be9ee0ba..6b22b8c8e35 100644 --- a/store/postgres/src/primary.rs +++ b/store/postgres/src/primary.rs @@ -3,10 +3,10 @@ //! for the primary shard. use crate::{ block_range::UNVERSIONED_RANGE, - connection_pool::{ConnectionPool, ForeignServer}, detail::DeploymentDetail, + pool::PRIMARY_PUBLIC, subgraph_store::{unused, Shard, PRIMARY_SHARD}, - NotificationSender, + ConnectionPool, ForeignServer, NotificationSender, }; use diesel::{ connection::SimpleConnection, @@ -31,11 +31,12 @@ use diesel::{ }; use graph::{ components::store::DeploymentLocator, - constraint_violation, data::{ store::scalar::ToPrimitive, subgraph::{status, DeploymentFeatures}, }, + derive::CheapClone, + internal_error, prelude::{ anyhow, chrono::{DateTime, Utc}, @@ -53,9 +54,9 @@ use maybe_owned::MaybeOwnedMut; use std::{ borrow::Borrow, collections::HashMap, - convert::TryFrom, - convert::TryInto, + convert::{TryFrom, TryInto}, fmt, + sync::Arc, time::{SystemTime, UNIX_EPOCH}, }; @@ -266,6 +267,13 @@ impl Namespace { Namespace(format!("prune{id}")) } + /// A namespace that is not a deployment namespace. This is used for + /// special namespaces we use. No checking is done on `s` and the caller + /// must ensure it's a valid namespace name + pub fn special(s: impl Into) -> Self { + Namespace(s.into()) + } + pub fn as_str(&self) -> &str { &self.0 } @@ -376,9 +384,9 @@ impl TryFrom for Site { fn try_from(schema: Schema) -> Result { let deployment = DeploymentHash::new(&schema.subgraph) - .map_err(|s| constraint_violation!("Invalid deployment id {}", s))?; + .map_err(|s| internal_error!("Invalid deployment id {}", s))?; let namespace = Namespace::new(schema.name.clone()).map_err(|nsp| { - constraint_violation!( + internal_error!( "Invalid schema name {} for deployment {}", nsp, &schema.subgraph @@ -442,8 +450,9 @@ mod queries { use diesel::sql_types::Text; use graph::prelude::NodeId; use graph::{ - constraint_violation, + components::store::DeploymentId as GraphDeploymentId, data::subgraph::status, + internal_error, prelude::{DeploymentHash, StoreError, SubgraphName}, }; use std::{collections::HashMap, convert::TryFrom, convert::TryInto}; @@ -502,7 +511,7 @@ mod queries { .optional()?; match id { Some(id) => DeploymentHash::new(id) - .map_err(|id| constraint_violation!("illegal deployment id: {}", id)), + .map_err(|id| internal_error!("illegal deployment id: {}", id)), None => Err(StoreError::DeploymentNotFound(name.to_string())), } } @@ -638,18 +647,18 @@ mod queries { conn: &mut PgConnection, infos: &mut [status::Info], ) -> Result<(), StoreError> { - let ids: Vec<_> = infos.iter().map(|info| &info.subgraph).collect(); + let ids: Vec<_> = infos.iter().map(|info| &info.id).collect(); let nodes: HashMap<_, _> = a::table .inner_join(ds::table.on(ds::id.eq(a::id))) - .filter(ds::subgraph.eq_any(ids)) - .select((ds::subgraph, a::node_id, a::paused_at.is_not_null())) - .load::<(String, String, bool)>(conn)? + .filter(ds::id.eq_any(ids)) + .select((ds::id, a::node_id, a::paused_at.is_not_null())) + .load::<(GraphDeploymentId, String, bool)>(conn)? .into_iter() - .map(|(subgraph, node, paused)| (subgraph, (node, paused))) + .map(|(id, node, paused)| (id, (node, paused))) .collect(); for info in infos { - info.node = nodes.get(&info.subgraph).map(|(node, _)| node.clone()); - info.paused = nodes.get(&info.subgraph).map(|(_, paused)| *paused); + info.node = nodes.get(&info.id).map(|(node, _)| node.clone()); + info.paused = nodes.get(&info.id).map(|(_, paused)| *paused); } Ok(()) } @@ -665,7 +674,7 @@ mod queries { .optional()? .map(|node| { NodeId::new(&node).map_err(|()| { - constraint_violation!( + internal_error!( "invalid node id `{}` in assignment for `{}`", node, site.deployment @@ -690,7 +699,7 @@ mod queries { .optional()? .map(|(node, ts)| { let node_id = NodeId::new(&node).map_err(|()| { - constraint_violation!( + internal_error!( "invalid node id `{}` in assignment for `{}`", node, site.deployment @@ -829,7 +838,7 @@ impl<'a> Connection<'a> { DeploymentHash::new(hash) .map(|hash| AssignmentChange::removed(DeploymentLocator::new(id.into(), hash))) .map_err(|id| { - StoreError::ConstraintViolation(format!( + StoreError::InternalError(format!( "invalid id `{}` for deployment assignment", id )) @@ -1310,7 +1319,7 @@ impl<'a> Connection<'a> { .cloned() .ok_or_else(|| anyhow!("failed to read schema name for {} back", deployment))?; let namespace = Namespace::new(namespace).map_err(|name| { - constraint_violation!("Generated database schema name {} is invalid", name) + internal_error!("Generated database schema name {} is invalid", name) })?; Ok(Site { @@ -1514,7 +1523,7 @@ impl<'a> Connection<'a> { .transpose() // This can't really happen since we filtered by valid NodeId's .map_err(|node| { - constraint_violation!("database has assignment for illegal node name {:?}", node) + internal_error!("database has assignment for illegal node name {:?}", node) }) } @@ -1551,7 +1560,7 @@ impl<'a> Connection<'a> { .map(|(shard, _)| Shard::new(shard.to_string())) .transpose() // This can't really happen since we filtered by valid shards - .map_err(|e| constraint_violation!("database has illegal shard name: {}", e)) + .map_err(|e| internal_error!("database has illegal shard name: {}", e)) } #[cfg(debug_assertions)] @@ -1721,10 +1730,7 @@ impl<'a> Connection<'a> { let ts = chrono::offset::Local::now() .checked_sub_signed(duration) .ok_or_else(|| { - StoreError::ConstraintViolation(format!( - "duration {} is too large", - duration - )) + StoreError::InternalError(format!("duration {} is too large", duration)) })?; Ok(u::table .filter(u::removed_at.is_null()) @@ -1819,6 +1825,52 @@ impl<'a> Connection<'a> { } } +/// A limited interface to query the primary database. +#[derive(Clone, CheapClone)] +pub struct Primary { + pool: Arc, +} + +impl Primary { + pub fn new(pool: Arc) -> Self { + // This really indicates a programming error + if pool.shard != *PRIMARY_SHARD { + panic!("Primary pool must be the primary shard"); + } + + Primary { pool } + } + + /// Return `true` if the site is the source of a copy operation. The copy + /// operation might be just queued or in progress already. This method will + /// block until a fdw connection becomes available. + pub fn is_source(&self, site: &Site) -> Result { + use active_copies as ac; + + let mut conn = self.pool.get()?; + + select(diesel::dsl::exists( + ac::table + .filter(ac::src.eq(site.id)) + .filter(ac::cancelled_at.is_null()), + )) + .get_result::(&mut conn) + .map_err(StoreError::from) + } + + pub fn is_copy_cancelled(&self, dst: &Site) -> Result { + use active_copies as ac; + + let mut conn = self.pool.get()?; + + ac::table + .filter(ac::dst.eq(dst.id)) + .select(ac::cancelled_at.is_not_null()) + .get_result::(&mut conn) + .map_err(StoreError::from) + } +} + /// Return `true` if we deem this installation to be empty, defined as /// having no deployments and no subgraph names in the database pub fn is_empty(conn: &mut PgConnection) -> Result { @@ -1839,6 +1891,20 @@ pub struct Mirror { } impl Mirror { + // The tables that we mirror + // + // `chains` needs to be mirrored before `deployment_schemas` because + // of the fk constraint on `deployment_schemas.network`. We don't + // care much about mirroring `active_copies` but it has a fk + // constraint on `deployment_schemas` and is tiny, therefore it's + // easiest to just mirror it + pub(crate) const PUBLIC_TABLES: [&str; 3] = ["chains", "deployment_schemas", "active_copies"]; + pub(crate) const SUBGRAPHS_TABLES: [&str; 3] = [ + "subgraph_deployment_assignment", + "subgraph", + "subgraph_version", + ]; + pub fn new(pools: &HashMap) -> Mirror { let primary = pools .get(&PRIMARY_SHARD) @@ -1895,18 +1961,6 @@ impl Mirror { conn: &mut PgConnection, handle: &CancelHandle, ) -> Result<(), StoreError> { - // `chains` needs to be mirrored before `deployment_schemas` because - // of the fk constraint on `deployment_schemas.network`. We don't - // care much about mirroring `active_copies` but it has a fk - // constraint on `deployment_schemas` and is tiny, therefore it's - // easiest to just mirror it - const PUBLIC_TABLES: [&str; 3] = ["chains", "deployment_schemas", "active_copies"]; - const SUBGRAPHS_TABLES: [&str; 3] = [ - "subgraph_deployment_assignment", - "subgraph", - "subgraph_version", - ]; - fn run_query(conn: &mut PgConnection, query: String) -> Result<(), StoreError> { conn.batch_execute(&query).map_err(StoreError::from) } @@ -1938,11 +1992,11 @@ impl Mirror { // Truncate all tables at once, otherwise truncation can fail // because of foreign key constraints - let tables = PUBLIC_TABLES + let tables = Self::PUBLIC_TABLES .iter() .map(|name| (NAMESPACE_PUBLIC, name)) .chain( - SUBGRAPHS_TABLES + Self::SUBGRAPHS_TABLES .iter() .map(|name| (NAMESPACE_SUBGRAPHS, name)), ) @@ -1953,13 +2007,8 @@ impl Mirror { check_cancel()?; // Repopulate `PUBLIC_TABLES` by copying their data wholesale - for table_name in PUBLIC_TABLES { - copy_table( - conn, - ForeignServer::PRIMARY_PUBLIC, - NAMESPACE_PUBLIC, - table_name, - )?; + for table_name in Self::PUBLIC_TABLES { + copy_table(conn, PRIMARY_PUBLIC, NAMESPACE_PUBLIC, table_name)?; check_cancel()?; } diff --git a/store/postgres/src/query_store.rs b/store/postgres/src/query_store.rs index 8fc2da822e4..fe7d084030b 100644 --- a/store/postgres/src/query_store.rs +++ b/store/postgres/src/query_store.rs @@ -112,7 +112,7 @@ impl QueryStoreTrait for QueryStore { self.chain_store.block_numbers(block_hashes).await } - fn wait_stats(&self) -> Result { + fn wait_stats(&self) -> PoolWaitStats { self.store.wait_stats(self.replica_id) } @@ -137,7 +137,7 @@ impl QueryStoreTrait for QueryStore { &self.site.network } - async fn query_permit(&self) -> Result { + async fn query_permit(&self) -> QueryPermit { self.store.query_permit(self.replica_id).await } diff --git a/store/postgres/src/relational.rs b/store/postgres/src/relational.rs index d148060efc2..35e35a35746 100644 --- a/store/postgres/src/relational.rs +++ b/store/postgres/src/relational.rs @@ -16,7 +16,7 @@ mod query_tests; pub(crate) mod dsl; pub(crate) mod index; -mod prune; +pub(crate) mod prune; mod rollup; pub(crate) mod value; @@ -32,12 +32,11 @@ use graph::blockchain::block_stream::{EntityOperationKind, EntitySourceOperation use graph::blockchain::BlockTime; use graph::cheap_clone::CheapClone; use graph::components::store::write::{RowGroup, WriteChunk}; -use graph::components::subgraph::PoICausalityRegion; -use graph::constraint_violation; use graph::data::graphql::TypeExt as _; use graph::data::query::Trace; use graph::data::value::Word; use graph::data_source::CausalityRegion; +use graph::internal_error; use graph::prelude::{q, EntityQuery, StopwatchMetrics, ENV_VARS}; use graph::schema::{ EntityKey, EntityType, Field, FulltextConfig, FulltextDefinition, InputSchema, @@ -69,7 +68,7 @@ use crate::{ }, }; use graph::components::store::{AttributeNames, DerivedEntityQuery}; -use graph::data::store::{Id, IdList, IdType, BYTES_SCALAR}; +use graph::data::store::{IdList, IdType, BYTES_SCALAR}; use graph::data::subgraph::schema::POI_TABLE; use graph::prelude::{ anyhow, info, BlockNumber, DeploymentHash, Entity, EntityOperation, Logger, @@ -78,7 +77,7 @@ use graph::prelude::{ use crate::block_range::{BoundSide, BLOCK_COLUMN, BLOCK_RANGE_COLUMN}; pub use crate::catalog::Catalog; -use crate::connection_pool::ForeignServer; +use crate::ForeignServer; use crate::{catalog, deployment}; use self::rollup::Rollup; @@ -504,7 +503,7 @@ impl Layout { let key = entity_type.key_in(entity_data.id(), CausalityRegion::from_entity(&entity_data)); if entities.contains_key(&key) { - return Err(constraint_violation!( + return Err(internal_error!( "duplicate entity {}[{}] in result set, block = {}", key.entity_type, key.entity_id, @@ -911,7 +910,7 @@ impl Layout { .map(|id| id.to_string()) .collect::>() .join(", "); - return Err(constraint_violation!( + return Err(internal_error!( "entities of type `{}` can not be updated since they are immutable. Entity ids are [{}]", group.entity_type, ids @@ -969,7 +968,7 @@ impl Layout { let table = self.table_for_entity(&group.entity_type)?; if table.immutable { - return Err(constraint_violation!( + return Err(internal_error!( "entities of type `{}` can not be deleted since they are immutable. Entity ids are [{}]", table.object, group.ids().join(", ") )); @@ -1113,32 +1112,6 @@ impl Layout { Ok(Arc::new(layout)) } - pub(crate) fn block_time( - &self, - conn: &mut PgConnection, - block: BlockNumber, - ) -> Result, StoreError> { - let block_time_name = self.input_schema.poi_block_time(); - let poi_type = self.input_schema.poi_type(); - let id = Id::String(Word::from(PoICausalityRegion::from_network( - &self.site.network, - ))); - let key = poi_type.key(id); - - let block_time = self - .find(conn, &key, block)? - .and_then(|entity| { - entity.get(&block_time_name).map(|value| { - value - .as_int8() - .ok_or_else(|| constraint_violation!("block_time must have type Int8")) - }) - }) - .transpose()? - .map(|value| BlockTime::since_epoch(value, 0)); - Ok(block_time) - } - /// Find the time of the last rollup for the subgraph. We do this by /// looking for the maximum timestamp in any aggregation table and /// adding a little bit more than the corresponding interval to it. This @@ -1165,11 +1138,11 @@ impl Layout { let source_type = mapping.source_type(schema); let source_table = tables .get(&source_type) - .ok_or_else(|| constraint_violation!("Table for {source_type} is missing"))?; + .ok_or_else(|| internal_error!("Table for {source_type} is missing"))?; let agg_type = mapping.agg_type(schema); let agg_table = tables .get(&agg_type) - .ok_or_else(|| constraint_violation!("Table for {agg_type} is missing"))?; + .ok_or_else(|| internal_error!("Table for {agg_type} is missing"))?; let aggregation = mapping.aggregation(schema); let rollup = Rollup::new( mapping.interval, @@ -1639,9 +1612,9 @@ impl Table { ) -> Result { SqlName::check_valid_identifier(defn.as_str(), "object")?; - let object_type = defn.object_type().map_err(|_| { - constraint_violation!("The type `{}` is not an object type", defn.as_str()) - })?; + let object_type = defn + .object_type() + .map_err(|_| internal_error!("The type `{}` is not an object type", defn.as_str()))?; let table_name = SqlName::from(defn.as_str()); let columns = object_type diff --git a/store/postgres/src/relational/ddl.rs b/store/postgres/src/relational/ddl.rs index 55e116272d1..a3c4ed6885e 100644 --- a/store/postgres/src/relational/ddl.rs +++ b/store/postgres/src/relational/ddl.rs @@ -269,7 +269,11 @@ impl Table { (method, index_expr) } - pub(crate) fn create_postponed_indexes(&self, skip_colums: Vec) -> Vec { + pub(crate) fn create_postponed_indexes( + &self, + skip_colums: Vec, + concurrently: bool, + ) -> Vec { let mut indexing_queries = vec![]; let columns = self.columns_to_index(); @@ -281,8 +285,9 @@ impl Table { && column.name.as_str() != "id" && !skip_colums.contains(&column.name.to_string()) { + let conc = if concurrently { "concurrently " } else { "" }; let sql = format!( - "create index concurrently if not exists attr_{table_index}_{column_index}_{table_name}_{column_name}\n on {qname} using {method}({index_expr});\n", + "create index {conc}if not exists attr_{table_index}_{column_index}_{table_name}_{column_name}\n on {qname} using {method}({index_expr});\n", table_index = self.position, table_name = self.name, column_name = column.name, @@ -403,15 +408,23 @@ impl Table { if index_def.is_some() && ENV_VARS.postpone_attribute_index_creation { let arr = index_def .unwrap() - .indexes_for_table(&self.nsp, &self.name.to_string(), &self, false, false) + .indexes_for_table( + &self.nsp, + &self.name.to_string(), + &self, + false, + false, + false, + ) .map_err(|_| fmt::Error)?; for (_, sql) in arr { writeln!(out, "{};", sql).expect("properly formated index statements") } } else { self.create_attribute_indexes(out)?; + self.create_aggregate_indexes(schema, out)?; } - self.create_aggregate_indexes(schema, out) + Ok(()) } pub fn exclusion_ddl(&self, out: &mut String) -> fmt::Result { diff --git a/store/postgres/src/relational/ddl_tests.rs b/store/postgres/src/relational/ddl_tests.rs index 86e9f232d49..b15a40cecfb 100644 --- a/store/postgres/src/relational/ddl_tests.rs +++ b/store/postgres/src/relational/ddl_tests.rs @@ -158,7 +158,7 @@ fn generate_postponed_indexes() { let layout = test_layout(THING_GQL); let table = layout.table(&SqlName::from("Scalar")).unwrap(); let skip_colums = vec!["id".to_string()]; - let query_vec = table.create_postponed_indexes(skip_colums); + let query_vec = table.create_postponed_indexes(skip_colums, true); assert!(query_vec.len() == 7); let queries = query_vec.join(" "); check_eqv(THING_POSTPONED_INDEXES, &queries) @@ -352,6 +352,97 @@ fn can_copy_from() { ); } +/// Check that we do not create the index on `block$` twice. There was a bug +/// that if an immutable entity type had a `block` field and index creation +/// was postponed, we would emit the index on `block$` twice, once from +/// `Table.create_time_travel_indexes` and once through +/// `IndexList.indexes_for_table` +#[test] +fn postponed_indexes_with_block_column() { + fn index_list() -> IndexList { + // To generate this list, print the output of `layout.as_ddl(None)`, run + // that in Postgres and do `select indexdef from pg_indexes where + // schemaname = 'sgd0815'` + const INDEX_DEFS: &[&str] = &[ + "CREATE UNIQUE INDEX data_pkey ON sgd0815.data USING btree (vid)", + "CREATE UNIQUE INDEX data_id_key ON sgd0815.data USING btree (id)", + "CREATE INDEX data_block ON sgd0815.data USING btree (block$)", + "CREATE INDEX attr_1_0_data_block ON sgd0815.data USING btree (block, \"block$\")", + ]; + + let mut indexes: HashMap> = HashMap::new(); + indexes.insert( + "data".to_string(), + INDEX_DEFS + .iter() + .map(|def| CreateIndex::parse(def.to_string())) + .collect(), + ); + IndexList { indexes } + } + + fn cr(index: &str) -> String { + format!("create index{}", index) + } + + fn cre(index: &str) -> String { + format!("create index if not exists{}", index) + } + + // Names of the two indexes we are interested in. Not the leading space + // to guard a little against overlapping names + const BLOCK_IDX: &str = " data_block"; + const ATTR_IDX: &str = " attr_1_0_data_block"; + + let layout = test_layout(BLOCK_GQL); + + // Create everything + let sql = layout.as_ddl(None).unwrap(); + assert!(sql.contains(&cr(BLOCK_IDX))); + assert!(sql.contains(&cr(ATTR_IDX))); + + // Defer attribute indexes + let sql = layout.as_ddl(Some(index_list())).unwrap(); + assert!(sql.contains(&cr(BLOCK_IDX))); + assert!(!sql.contains(ATTR_IDX)); + // This used to be duplicated + let count = sql.matches(BLOCK_IDX).count(); + assert_eq!(1, count); + + let table = layout.table(&SqlName::from("Data")).unwrap(); + let sql = table.create_postponed_indexes(vec![], false); + assert_eq!(1, sql.len()); + assert!(!sql[0].contains(BLOCK_IDX)); + assert!(sql[0].contains(&cre(ATTR_IDX))); + + let dst_nsp = Namespace::new("sgd2".to_string()).unwrap(); + let arr = index_list() + .indexes_for_table( + &dst_nsp, + &table.name.to_string(), + &table, + true, + false, + false, + ) + .unwrap(); + assert_eq!(1, arr.len()); + assert!(!arr[0].1.contains(BLOCK_IDX)); + assert!(arr[0].1.contains(&cr(ATTR_IDX))); + + let arr = index_list() + .indexes_for_table( + &dst_nsp, + &table.name.to_string(), + &table, + false, + false, + false, + ) + .unwrap(); + assert_eq!(0, arr.len()); +} + const THING_GQL: &str = r#" type Thing @entity { id: ID! @@ -1109,3 +1200,15 @@ on "sgd0815"."stats_3_day" using btree("volume"); create index stats_3_day_dims on "sgd0815"."stats_3_day"(group_2, group_1, timestamp); "#; + +const BLOCK_GQL: &str = r#" +type Block @entity(immutable: true) { + id: ID! + number: Int! +} + +type Data @entity(immutable: true) { + id: ID! + block: Block! +} +"#; diff --git a/store/postgres/src/relational/index.rs b/store/postgres/src/relational/index.rs index 4f72e773ee6..efa82e901f0 100644 --- a/store/postgres/src/relational/index.rs +++ b/store/postgres/src/relational/index.rs @@ -123,7 +123,7 @@ impl Display for Expr { Expr::Column(s) => write!(f, "{s}")?, Expr::Prefix(s, _) => write!(f, "{s}")?, Expr::Vid => write!(f, "vid")?, - Expr::Block => write!(f, "block")?, + Expr::Block => write!(f, "{BLOCK_COLUMN}")?, Expr::BlockRange => write!(f, "block_range")?, Expr::BlockRangeLower => write!(f, "lower(block_range)")?, Expr::BlockRangeUpper => write!(f, "upper(block_range)")?, @@ -488,12 +488,29 @@ impl CreateIndex { && columns[1] == Expr::BlockRange } Method::Brin => false, - Method::BTree | Method::Gin => { + Method::Gin => { + // 'using gin()' columns.len() == 1 && columns[0].is_attribute() && cond.is_none() && with.is_none() } + Method::BTree => { + match columns.len() { + 1 => { + // 'using btree()' + columns[0].is_attribute() && cond.is_none() && with.is_none() + } + 2 => { + // 'using btree(, block$)' + columns[0].is_attribute() + && columns[1] == Expr::Block + && cond.is_none() + && with.is_none() + } + _ => false, + } + } Method::Unknown(_) => false, } } @@ -537,6 +554,7 @@ impl CreateIndex { None, ), dummy(false, BTree, &[Expr::BlockRangeUpper], Some(Cond::Closed)), + dummy(false, BTree, &[Expr::Block], None), ] }; } @@ -630,7 +648,7 @@ impl CreateIndex { } pub fn fields_exist_in_dest<'a>(&self, dest_table: &'a Table) -> bool { - fn column_exists<'a>(it: &mut impl Iterator, column_name: &String) -> bool { + fn column_exists<'a>(it: &mut impl Iterator, column_name: &str) -> bool { it.any(|c| *c == *column_name) } @@ -667,7 +685,7 @@ impl CreateIndex { } Expr::Vid => (), Expr::Block => { - if !column_exists(cols, &"block".to_string()) { + if !dest_table.immutable { return false; } } @@ -768,7 +786,8 @@ impl IndexList { table_name: &String, dest_table: &Table, postponed: bool, - concurrent_if_not_exist: bool, + concurrent: bool, + if_not_exists: bool, ) -> Result, String)>, Error> { let mut arr = vec![]; if let Some(vec) = self.indexes.get(table_name) { @@ -776,7 +795,7 @@ impl IndexList { // First we check if the fields do exist in the destination subgraph. // In case of grafting that is not given. if ci.fields_exist_in_dest(dest_table) - // Then we check if the index is one of the default indexes not based on + // Then we check if the index is one of the default indexes not based on // the attributes. Those will be created anyway and we should skip them. && !ci.is_default_non_attr_index() // Then ID based indexes in the immutable tables are also created initially @@ -789,7 +808,7 @@ impl IndexList { { if let Ok(sql) = ci .with_nsp(namespace.to_string())? - .to_sql(concurrent_if_not_exist, concurrent_if_not_exist) + .to_sql(concurrent, if_not_exists) { arr.push((ci.name(), sql)) } @@ -813,7 +832,7 @@ impl IndexList { let namespace = &layout.catalog.site.namespace; for table in layout.tables.values() { for (ind_name, create_query) in - self.indexes_for_table(namespace, &table.name.to_string(), table, true, true)? + self.indexes_for_table(namespace, &table.name.to_string(), table, true, true, true)? { if let Some(index_name) = ind_name { let table_name = table.name.clone(); diff --git a/store/postgres/src/relational/prune.rs b/store/postgres/src/relational/prune.rs index 5c3035ce172..6d5295e5535 100644 --- a/store/postgres/src/relational/prune.rs +++ b/store/postgres/src/relational/prune.rs @@ -28,6 +28,8 @@ use super::{ Catalog, Layout, Namespace, }; +pub use status::{Phase, PruneState, PruneTableState, Viewer}; + /// Utility to copy relevant data out of a source table and into a new /// destination table and replace the source table with the destination /// table @@ -90,6 +92,7 @@ impl TablePair { &self, conn: &mut PgConnection, reporter: &mut dyn PruneReporter, + tracker: &status::Tracker, earliest_block: BlockNumber, final_block: BlockNumber, cancel: &CancelHandle, @@ -99,6 +102,7 @@ impl TablePair { // Determine the last vid that we need to copy let range = VidRange::for_prune(conn, &self.src, earliest_block, final_block)?; let mut batcher = VidBatcher::load(conn, &self.src_nsp, &self.src, range)?; + tracker.start_copy_final(conn, &self.src, range)?; while !batcher.finished() { let (_, rows) = batcher.step(|start, end| { @@ -132,11 +136,13 @@ impl TablePair { .map_err(StoreError::from) }) })?; + let rows = rows.unwrap_or(0); + tracker.finish_batch(conn, &self.src, rows as i64, &batcher)?; cancel.check_cancel()?; reporter.prune_batch( self.src.name.as_str(), - rows.unwrap_or(0), + rows, PrunePhase::CopyFinal, batcher.finished(), ); @@ -151,6 +157,7 @@ impl TablePair { &self, conn: &mut PgConnection, reporter: &mut dyn PruneReporter, + tracker: &status::Tracker, final_block: BlockNumber, ) -> Result<(), StoreError> { let column_list = self.column_list(); @@ -158,6 +165,7 @@ impl TablePair { // Determine the last vid that we need to copy let range = VidRange::for_prune(conn, &self.src, final_block + 1, BLOCK_NUMBER_MAX)?; let mut batcher = VidBatcher::load(conn, &self.src.nsp, &self.src, range)?; + tracker.start_copy_nonfinal(conn, &self.src, range)?; while !batcher.finished() { let (_, rows) = batcher.step(|start, end| { @@ -186,10 +194,13 @@ impl TablePair { .map_err(StoreError::from) }) })?; + let rows = rows.unwrap_or(0); + + tracker.finish_batch(conn, &self.src, rows as i64, &batcher)?; reporter.prune_batch( self.src.name.as_str(), - rows.unwrap_or(0), + rows, PrunePhase::CopyNonfinal, batcher.finished(), ); @@ -222,6 +233,7 @@ impl TablePair { query, "select setval('{dst_nsp}.{vid_seq}', nextval('{src_nsp}.{vid_seq}'));" )?; + writeln!(query, "drop sequence {src_nsp}.{vid_seq} cascade;")?; } writeln!(query, "drop table {src_qname};")?; @@ -352,18 +364,21 @@ impl Layout { /// time. The rebuild strategy never blocks reads, it only ever blocks /// writes. pub fn prune( - &self, + self: Arc, logger: &Logger, reporter: &mut dyn PruneReporter, conn: &mut PgConnection, req: &PruneRequest, cancel: &CancelHandle, ) -> Result<(), CancelableError> { + let tracker = status::Tracker::new(conn, self.clone())?; + reporter.start(req); let stats = self.version_stats(conn, reporter, true, cancel)?; let prunable_tables: Vec<_> = self.prunable_tables(&stats, req).into_iter().collect(); + tracker.start(conn, req, &prunable_tables)?; // create a shadow namespace where we will put the copies of our // tables, but only create it in the database if we really need it @@ -382,6 +397,7 @@ impl Layout { // is the definition of 'final' for (table, strat) in &prunable_tables { reporter.start_table(table.name.as_str()); + tracker.start_table(conn, table)?; match strat { PruningStrategy::Rebuild => { if recreate_dst_nsp { @@ -401,6 +417,7 @@ impl Layout { pair.copy_final_entities( conn, reporter, + &tracker, req.earliest_block, req.final_block, cancel, @@ -410,7 +427,7 @@ impl Layout { // see also: deployment-lock-for-update reporter.start_switch(); deployment::with_lock(conn, &self.site, |conn| -> Result<_, StoreError> { - pair.copy_nonfinal_entities(conn, reporter, req.final_block)?; + pair.copy_nonfinal_entities(conn, reporter, &tracker, req.final_block)?; cancel.check_cancel().map_err(CancelableError::from)?; conn.transaction(|conn| pair.switch(logger, conn))?; @@ -426,6 +443,7 @@ impl Layout { let range = VidRange::for_prune(conn, &table, 0, req.earliest_block)?; let mut batcher = VidBatcher::load(conn, &self.site.namespace, &table, range)?; + tracker.start_delete(conn, table, range, &batcher)?; while !batcher.finished() { let (_, rows) = batcher.step(|start, end| {sql_query(format!( "/* controller=prune,phase=delete,start_vid={start},batch_size={batch_size} */ \ @@ -439,10 +457,13 @@ impl Layout { .bind::(start) .bind::(end) .execute(conn).map_err(StoreError::from)})?; + let rows = rows.unwrap_or(0); + + tracker.finish_batch(conn, table, -(rows as i64), &batcher)?; reporter.prune_batch( table.name.as_str(), - rows.unwrap_or(0), + rows, PrunePhase::Delete, batcher.finished(), ); @@ -450,6 +471,7 @@ impl Layout { } } reporter.finish_table(table.name.as_str()); + tracker.finish_table(conn, table)?; } // Get rid of the temporary prune schema if we actually created it if !recreate_dst_nsp { @@ -465,7 +487,436 @@ impl Layout { self.analyze_tables(conn, reporter, tables, cancel)?; reporter.finish(); + tracker.finish(conn)?; Ok(()) } } + +mod status { + use std::sync::Arc; + + use chrono::{DateTime, Utc}; + use diesel::{ + deserialize::FromSql, + dsl::insert_into, + pg::{Pg, PgValue}, + query_builder::QueryFragment, + serialize::{Output, ToSql}, + sql_types::Text, + table, update, AsChangeset, Connection, ExpressionMethods as _, OptionalExtension, + PgConnection, QueryDsl as _, RunQueryDsl as _, + }; + use graph::{ + components::store::{PruneRequest, PruningStrategy, StoreResult}, + env::ENV_VARS, + prelude::StoreError, + }; + + use crate::{ + relational::{Layout, Table}, + vid_batcher::{VidBatcher, VidRange}, + ConnectionPool, + }; + + table! { + subgraphs.prune_state(vid) { + vid -> Integer, + // Deployment id (sgd) + id -> Integer, + run -> Integer, + // The first block in the subgraph when the prune started + first_block -> Integer, + final_block -> Integer, + latest_block -> Integer, + // The amount of history configured + history_blocks -> Integer, + + started_at -> Timestamptz, + finished_at -> Nullable, + } + } + + table! { + subgraphs.prune_table_state(vid) { + vid -> Integer, + // Deployment id (sgd) + id -> Integer, + run -> Integer, + table_name -> Text, + + strategy -> Char, + // see enum Phase + phase -> Text, + + start_vid -> Nullable, + final_vid -> Nullable, + nonfinal_vid -> Nullable, + rows -> Nullable, + + next_vid -> Nullable, + batch_size -> Nullable, + + started_at -> Nullable, + finished_at -> Nullable, + } + } + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow)] + #[diesel(sql_type = Text)] + pub enum Phase { + Queued, + Started, + /// Only used when strategy is Rebuild + CopyFinal, + /// Only used when strategy is Rebuild + CopyNonfinal, + /// Only used when strategy is Delete + Delete, + Done, + /// Not a real phase, indicates that the database has an invalid + /// value + Unknown, + } + + impl Phase { + pub fn from_str(phase: &str) -> Self { + use Phase::*; + match phase { + "queued" => Queued, + "started" => Started, + "copy_final" => CopyFinal, + "copy_nonfinal" => CopyNonfinal, + "delete" => Delete, + "done" => Done, + _ => Unknown, + } + } + + pub fn as_str(&self) -> &str { + use Phase::*; + match self { + Queued => "queued", + Started => "started", + CopyFinal => "copy_final", + CopyNonfinal => "copy_nonfinal", + Delete => "delete", + Done => "done", + Unknown => "*unknown*", + } + } + } + + impl ToSql for Phase { + fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, Pg>) -> diesel::serialize::Result { + let phase = self.as_str(); + >::to_sql(phase, &mut out.reborrow()) + } + } + + impl FromSql for Phase { + fn from_sql(bytes: PgValue) -> diesel::deserialize::Result { + Ok(Phase::from_str(std::str::from_utf8(bytes.as_bytes())?)) + } + } + + /// Information about one pruning run for a deployment + #[derive(Queryable)] + pub struct PruneState { + pub vid: i32, + pub id: i32, + pub run: i32, + pub first_block: i32, + pub final_block: i32, + pub latest_block: i32, + pub history_blocks: i32, + + pub started_at: DateTime, + pub finished_at: Option>, + } + + /// Per-table information about the pruning run for a deployment + #[derive(Queryable)] + pub struct PruneTableState { + pub vid: i32, + pub id: i32, + pub run: i32, + pub table_name: String, + + // 'r' for rebuild or 'd' for delete + pub strategy: String, + pub phase: Phase, + + pub start_vid: Option, + pub final_vid: Option, + pub nonfinal_vid: Option, + pub rows: Option, + + pub next_vid: Option, + pub batch_size: Option, + + pub started_at: Option>, + pub finished_at: Option>, + } + + /// A helper to persist pruning progress in the database + pub(super) struct Tracker { + layout: Arc, + run: i32, + } + + impl Tracker { + pub(super) fn new(conn: &mut PgConnection, layout: Arc) -> StoreResult { + use prune_state as ps; + let run = ps::table + .filter(ps::id.eq(layout.site.id)) + .order(ps::run.desc()) + .select(ps::run) + .get_result::(conn) + .optional() + .map_err(StoreError::from)? + .unwrap_or(0) + + 1; + + // Delete old prune state. Keep the initial run and the last + // `prune_keep_history` runs (including this one) + diesel::delete(ps::table) + .filter(ps::id.eq(layout.site.id)) + .filter(ps::run.gt(1)) + .filter(ps::run.lt(run - (ENV_VARS.store.prune_keep_history as i32 - 1))) + .execute(conn) + .map_err(StoreError::from)?; + + Ok(Tracker { layout, run }) + } + + pub(super) fn start( + &self, + conn: &mut PgConnection, + req: &PruneRequest, + prunable_tables: &[(&Arc
, PruningStrategy)], + ) -> StoreResult<()> { + use prune_state as ps; + use prune_table_state as pts; + + conn.transaction(|conn| { + insert_into(ps::table) + .values(( + ps::id.eq(self.layout.site.id), + ps::run.eq(self.run), + ps::first_block.eq(req.first_block), + ps::final_block.eq(req.final_block), + ps::latest_block.eq(req.latest_block), + ps::history_blocks.eq(req.history_blocks), + ps::started_at.eq(diesel::dsl::now), + )) + .execute(conn)?; + + for (table, strat) in prunable_tables { + let strat = match strat { + PruningStrategy::Rebuild => "r", + PruningStrategy::Delete => "d", + }; + insert_into(pts::table) + .values(( + pts::id.eq(self.layout.site.id), + pts::run.eq(self.run), + pts::table_name.eq(table.name.as_str()), + pts::strategy.eq(strat), + pts::phase.eq(Phase::Queued), + )) + .execute(conn)?; + } + Ok(()) + }) + } + + pub(crate) fn start_table( + &self, + conn: &mut PgConnection, + table: &Table, + ) -> StoreResult<()> { + use prune_table_state as pts; + + self.update_table_state( + conn, + table, + ( + pts::started_at.eq(diesel::dsl::now), + pts::phase.eq(Phase::Started), + ), + )?; + + Ok(()) + } + + pub(crate) fn start_copy_final( + &self, + conn: &mut PgConnection, + table: &Table, + range: VidRange, + ) -> StoreResult<()> { + use prune_table_state as pts; + + let values = ( + pts::phase.eq(Phase::CopyFinal), + pts::start_vid.eq(range.min), + pts::next_vid.eq(range.min), + pts::final_vid.eq(range.max), + pts::rows.eq(0), + ); + + self.update_table_state(conn, table, values) + } + + pub(crate) fn start_copy_nonfinal( + &self, + conn: &mut PgConnection, + table: &Table, + range: VidRange, + ) -> StoreResult<()> { + use prune_table_state as pts; + + let values = ( + pts::phase.eq(Phase::CopyNonfinal), + pts::nonfinal_vid.eq(range.max), + ); + self.update_table_state(conn, table, values) + } + + pub(crate) fn finish_batch( + &self, + conn: &mut PgConnection, + src: &Table, + rows: i64, + batcher: &VidBatcher, + ) -> StoreResult<()> { + use prune_table_state as pts; + + let values = ( + pts::next_vid.eq(batcher.next_vid()), + pts::batch_size.eq(batcher.batch_size() as i64), + pts::rows.eq(pts::rows + rows), + ); + + self.update_table_state(conn, src, values) + } + + pub(crate) fn finish_table( + &self, + conn: &mut PgConnection, + table: &Table, + ) -> StoreResult<()> { + use prune_table_state as pts; + + let values = ( + pts::finished_at.eq(diesel::dsl::now), + pts::phase.eq(Phase::Done), + ); + + self.update_table_state(conn, table, values) + } + + pub(crate) fn start_delete( + &self, + conn: &mut PgConnection, + table: &Table, + range: VidRange, + batcher: &VidBatcher, + ) -> StoreResult<()> { + use prune_table_state as pts; + + let values = ( + pts::phase.eq(Phase::Delete), + pts::start_vid.eq(range.min), + pts::final_vid.eq(range.max), + pts::nonfinal_vid.eq(range.max), + pts::rows.eq(0), + pts::next_vid.eq(range.min), + pts::batch_size.eq(batcher.batch_size() as i64), + ); + + self.update_table_state(conn, table, values) + } + + fn update_table_state( + &self, + conn: &mut PgConnection, + table: &Table, + values: V, + ) -> StoreResult<()> + where + V: AsChangeset, + C: QueryFragment, + { + use prune_table_state as pts; + + update(pts::table) + .filter(pts::id.eq(self.layout.site.id)) + .filter(pts::run.eq(self.run)) + .filter(pts::table_name.eq(table.name.as_str())) + .set(values) + .execute(conn)?; + Ok(()) + } + + pub(crate) fn finish(&self, conn: &mut PgConnection) -> StoreResult<()> { + use prune_state as ps; + + update(ps::table) + .filter(ps::id.eq(self.layout.site.id)) + .filter(ps::run.eq(self.run)) + .set((ps::finished_at.eq(diesel::dsl::now),)) + .execute(conn)?; + Ok(()) + } + } + + /// A helper to read pruning progress from the database + pub struct Viewer { + pool: ConnectionPool, + layout: Arc, + } + + impl Viewer { + pub fn new(pool: ConnectionPool, layout: Arc) -> Self { + Self { pool, layout } + } + + pub fn runs(&self) -> StoreResult> { + use prune_state as ps; + + let mut conn = self.pool.get()?; + let runs = ps::table + .filter(ps::id.eq(self.layout.site.id)) + .select(ps::run) + .order(ps::run.asc()) + .load::(&mut conn) + .map_err(StoreError::from)?; + let runs = runs.into_iter().map(|run| run as usize).collect::>(); + Ok(runs) + } + + pub fn state(&self, run: usize) -> StoreResult)>> { + use prune_state as ps; + use prune_table_state as pts; + + let mut conn = self.pool.get()?; + + let ptss = pts::table + .filter(pts::id.eq(self.layout.site.id)) + .filter(pts::run.eq(run as i32)) + .order(pts::table_name.asc()) + .load::(&mut conn) + .map_err(StoreError::from)?; + + ps::table + .filter(ps::id.eq(self.layout.site.id)) + .filter(ps::run.eq(run as i32)) + .first::(&mut conn) + .optional() + .map_err(StoreError::from) + .map(|state| state.map(|state| (state, ptss))) + } + } +} diff --git a/store/postgres/src/relational/rollup.rs b/store/postgres/src/relational/rollup.rs index b9177a0052b..9a9830f6b5a 100644 --- a/store/postgres/src/relational/rollup.rs +++ b/store/postgres/src/relational/rollup.rs @@ -63,8 +63,8 @@ use diesel::{sql_query, PgConnection, RunQueryDsl as _}; use diesel::sql_types::{Integer, Nullable, Timestamptz}; use graph::blockchain::BlockTime; use graph::components::store::{BlockNumber, StoreError}; -use graph::constraint_violation; use graph::data::store::IdType; +use graph::internal_error; use graph::schema::{ Aggregate, AggregateFn, Aggregation, AggregationInterval, ExprVisitor, VisitExpr, }; @@ -111,7 +111,7 @@ fn rewrite<'a>(table: &'a Table, expr: &str) -> Result<(String, Vec<&'a str>), S fn not_supported(&mut self, msg: String) { if self.error.is_none() { - self.error = Some(constraint_violation!( + self.error = Some(internal_error!( "Schema validation should have found expression errors: {}", msg )); diff --git a/store/postgres/src/relational_queries.rs b/store/postgres/src/relational_queries.rs index 19f9400c470..533990c42b9 100644 --- a/store/postgres/src/relational_queries.rs +++ b/store/postgres/src/relational_queries.rs @@ -53,7 +53,7 @@ use crate::{ const BASE_SQL_COLUMNS: [&str; 2] = ["id", "vid"]; /// The maximum number of bind variables that can be used in a query -const POSTGRES_MAX_PARAMETERS: usize = u16::MAX as usize; // 65535 +pub(crate) const POSTGRES_MAX_PARAMETERS: usize = u16::MAX as usize; // 65535 const SORT_KEY_COLUMN: &str = "sort_key$"; @@ -94,9 +94,9 @@ impl From for diesel::result::Error { } } -// Similar to graph::prelude::constraint_violation, but returns a Diesel +// Similar to graph::prelude::internal_error, but returns a Diesel // error for use in the guts of query generation -macro_rules! constraint_violation { +macro_rules! internal_error { ($msg:expr) => {{ diesel::result::Error::QueryBuilderError(anyhow!("{}", $msg).into()) }}; @@ -431,7 +431,7 @@ pub fn parse_id(id_type: IdType, json: serde_json::Value) -> Result SqlValue<'a> { String(s) => match column_type { ColumnType::String|ColumnType::Enum(_)|ColumnType::TSVector(_) => S::Text(s), ColumnType::Int8 => S::Int8(s.parse::().map_err(|e| { - constraint_violation!("failed to convert `{}` to an Int8: {}", s, e.to_string()) + internal_error!("failed to convert `{}` to an Int8: {}", s, e.to_string()) })?), ColumnType::Bytes => { let bytes = scalar::Bytes::from_str(s) @@ -913,7 +913,7 @@ impl PrefixType { match column.column_type() { ColumnType::String => Ok(PrefixType::String), ColumnType::Bytes => Ok(PrefixType::Bytes), - _ => Err(constraint_violation!( + _ => Err(internal_error!( "cannot setup prefix comparison for column {} of type {}", column, column.column_type().sql_type() @@ -1086,7 +1086,7 @@ impl<'a> QueryFragment for PrefixComparison<'a> { // For `op` either `<=` or `>=`, we can write (using '<=' as an example) // uv <= st <=> u < s || u = s && uv <= st let large = self.kind.is_large(&self.value).map_err(|()| { - constraint_violation!( + internal_error!( "column {} has type {} and can't be compared with the value `{}` using {}", self.column, self.column.column_type().sql_type(), @@ -2237,7 +2237,7 @@ impl<'a> InsertRow<'a> { .filter_map(|field| row.entity.get(field)) .map(|value| match value { Value::String(s) => Ok(s), - _ => Err(constraint_violation!( + _ => Err(internal_error!( "fulltext fields must be strings but got {:?}", value )), @@ -3178,7 +3178,7 @@ impl<'a> FilterCollection<'a> { if windows.iter().map(FilterWindow::parent_type).all_equal() { Ok(Some(windows[0].parent_type()?)) } else { - Err(graph::constraint_violation!( + Err(graph::internal_error!( "all implementors of an interface must use the same type for their `id`" )) } @@ -3448,7 +3448,7 @@ impl<'a> SortKey<'a> { true => ( parent_table.primary_key(), child_table.column_for_field(&join_attribute).map_err(|_| { - graph::constraint_violation!( + graph::internal_error!( "Column for a join attribute `{}` of `{}` table not found", join_attribute, child_table.name() @@ -3459,7 +3459,7 @@ impl<'a> SortKey<'a> { parent_table .column_for_field(&join_attribute) .map_err(|_| { - graph::constraint_violation!( + graph::internal_error!( "Column for a join attribute `{}` of `{}` table not found", join_attribute, parent_table.name() @@ -3535,7 +3535,7 @@ impl<'a> SortKey<'a> { child_table .column_for_field(&child.join_attribute) .map_err(|_| { - graph::constraint_violation!( + graph::internal_error!( "Column for a join attribute `{}` of `{}` table not found", child.join_attribute, child_table.name() @@ -3546,7 +3546,7 @@ impl<'a> SortKey<'a> { parent_table .column_for_field(&child.join_attribute) .map_err(|_| { - graph::constraint_violation!( + graph::internal_error!( "Column for a join attribute `{}` of `{}` table not found", child.join_attribute, parent_table.name() @@ -3586,7 +3586,7 @@ impl<'a> SortKey<'a> { direction: SortDirection, ) -> Result, QueryExecutionError> { if entity_types.is_empty() { - return Err(QueryExecutionError::ConstraintViolation( + return Err(QueryExecutionError::InternalError( "Cannot order by child interface with no implementing entity types".to_string(), )); } @@ -3744,7 +3744,7 @@ impl<'a> SortKey<'a> { direction: _, } => { if column.is_primary_key() { - return Err(constraint_violation!("SortKey::Key never uses 'id'")); + return Err(internal_error!("SortKey::Key never uses 'id'")); } match select_statement_level { @@ -3764,7 +3764,7 @@ impl<'a> SortKey<'a> { match nested { ChildKey::Single(child) => { if child.sort_by_column.is_primary_key() { - return Err(constraint_violation!("SortKey::Key never uses 'id'")); + return Err(internal_error!("SortKey::Key never uses 'id'")); } match select_statement_level { @@ -3781,7 +3781,7 @@ impl<'a> SortKey<'a> { ChildKey::Many(_, children) => { for child in children.iter() { if child.sort_by_column.is_primary_key() { - return Err(constraint_violation!("SortKey::Key never uses 'id'")); + return Err(internal_error!("SortKey::Key never uses 'id'")); } out.push_sql(", "); child.sort_by_column.walk_ast(out.reborrow())?; @@ -3930,9 +3930,7 @@ impl<'a> SortKey<'a> { ) -> QueryResult<()> { if column.is_primary_key() { // This shouldn't happen since we'd use SortKey::IdAsc/Desc - return Err(constraint_violation!( - "sort_expr called with primary key column" - )); + return Err(internal_error!("sort_expr called with primary key column")); } fn push_prefix(prefix: Option<&str>, out: &mut AstPass) { @@ -3990,14 +3988,14 @@ impl<'a> SortKey<'a> { let sort_by = &child.sort_by_column; if sort_by.is_primary_key() { // This shouldn't happen since we'd use SortKey::ManyIdAsc/ManyDesc - return Err(constraint_violation!( + return Err(internal_error!( "multi_sort_expr called with primary key column" )); } match sort_by.column_type() { ColumnType::TSVector(_) => { - return Err(constraint_violation!("TSVector is not supported")); + return Err(internal_error!("TSVector is not supported")); } _ => {} } @@ -4565,7 +4563,7 @@ impl<'a> ClampRangeQuery<'a> { block: BlockNumber, ) -> Result { if table.immutable { - Err(graph::constraint_violation!( + Err(graph::internal_error!( "immutable entities can not be deleted or updated (table `{}`)", table.qualified_name )) @@ -4674,7 +4672,7 @@ pub struct RevertClampQuery<'a> { impl<'a> RevertClampQuery<'a> { pub(crate) fn new(table: &'a Table, block: BlockNumber) -> Result { if table.immutable { - Err(graph::constraint_violation!( + Err(graph::internal_error!( "can not revert clamping in immutable table `{}`", table.qualified_name )) @@ -4809,7 +4807,7 @@ impl<'a> QueryFragment for CopyEntityBatchQuery<'a> { fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> QueryResult<()> { out.unsafe_to_cache_prepared(); - let has_vid_seq = self.src.object.has_vid_seq(); + let has_vid_seq = self.dst.object.has_vid_seq(); // Construct a query // insert into {dst}({columns}) @@ -4894,7 +4892,7 @@ impl<'a> QueryFragment for CopyEntityBatchQuery<'a> { out.push_sql(", 0"); } (true, false) => { - return Err(constraint_violation!( + return Err(internal_error!( "can not copy entity type {} to {} because the src has a causality region but the dst does not", self.src.object.as_str(), self.dst.object.as_str() diff --git a/store/postgres/src/store.rs b/store/postgres/src/store.rs index 50a5e4b21e0..bda5b2da136 100644 --- a/store/postgres/src/store.rs +++ b/store/postgres/src/store.rs @@ -9,8 +9,8 @@ use graph::{ StatusStore, Store as StoreTrait, }, }, - constraint_violation, data::subgraph::status, + internal_error, prelude::{ web3::types::Address, BlockNumber, BlockPtr, CheapClone, DeploymentHash, PartialBlockPtr, QueryExecutionError, StoreError, @@ -87,7 +87,7 @@ impl QueryStoreManager for Store { .and_then(|x| x)?; let chain_store = self.block_store.chain_store(&site.network).ok_or_else(|| { - constraint_violation!( + internal_error!( "Subgraphs index a known network, but {} indexes `{}` which we do not know about. This is most likely a configuration error.", site.deployment, site.network @@ -167,8 +167,8 @@ impl StatusStore for Store { .await } - async fn query_permit(&self) -> Result { + async fn query_permit(&self) -> QueryPermit { // Status queries go to the primary shard. - Ok(self.block_store.query_permit_primary().await) + self.block_store.query_permit_primary().await } } diff --git a/store/postgres/src/subgraph_store.rs b/store/postgres/src/subgraph_store.rs index e9f5f2cce34..d19cc68f44a 100644 --- a/store/postgres/src/subgraph_store.rs +++ b/store/postgres/src/subgraph_store.rs @@ -21,9 +21,9 @@ use graph::{ PruneReporter, PruneRequest, SubgraphFork, }, }, - constraint_violation, data::query::QueryTarget, data::subgraph::{schema::DeploymentCreate, status, DeploymentFeatures}, + internal_error, prelude::{ anyhow, lazy_static, o, web3::types::Address, ApiVersion, BlockNumber, BlockPtr, ChainStore, DeploymentHash, EntityOperation, Logger, MetricsRegistry, NodeId, @@ -37,15 +37,15 @@ use graph::{ }; use crate::{ - connection_pool::ConnectionPool, deployment::{OnSync, SubgraphHealth}, - primary::{self, DeploymentId, Mirror as PrimaryMirror, Site}, + primary::{self, DeploymentId, Mirror as PrimaryMirror, Primary, Site}, relational::{ + self, index::{IndexList, Method}, Layout, }, writable::{SourceableStore, WritableStore}, - NotificationSender, + ConnectionPool, NotificationSender, }; use crate::{ deployment_store::{DeploymentStore, ReplicaId}, @@ -360,6 +360,12 @@ impl SubgraphStoreInner { sender: Arc, registry: Arc, ) -> Self { + let primary = stores + .iter() + .find(|(name, _, _, _)| name == &*PRIMARY_SHARD) + .map(|(_, pool, _, _)| Primary::new(Arc::new(pool.clone()))) + .expect("primary shard must be present"); + let mirror = { let pools = HashMap::from_iter( stores @@ -376,6 +382,7 @@ impl SubgraphStoreInner { name, Arc::new(DeploymentStore::new( &logger, + primary.cheap_clone(), main_pool, read_only_pools, weights, @@ -436,7 +443,7 @@ impl SubgraphStoreInner { fn evict(&self, id: &DeploymentHash) -> Result<(), StoreError> { if let Some((site, _)) = self.sites.remove(id) { let store = self.stores.get(&site.shard).ok_or_else(|| { - constraint_violation!( + internal_error!( "shard {} for deployment sgd{} not found when evicting", site.shard, site.id @@ -533,9 +540,7 @@ impl SubgraphStoreInner { let placement = self .placer .place(name.as_str(), network_name) - .map_err(|msg| { - constraint_violation!("illegal indexer name in deployment rule: {}", msg) - })?; + .map_err(|msg| internal_error!("illegal indexer name in deployment rule: {}", msg))?; match placement { None => Ok((PRIMARY_SHARD.clone(), default_node)), @@ -699,12 +704,6 @@ impl SubgraphStoreInner { ))); } let deployment = src_store.load_deployment(src.clone())?; - if deployment.failed { - return Err(StoreError::Unknown(anyhow!( - "can not copy deployment {} because it has failed", - src_loc - ))); - } let index_def = src_store.load_indexes(src.clone())?; // Transmogrify the deployment into a new one @@ -984,7 +983,7 @@ impl SubgraphStoreInner { pub(crate) fn version_info(&self, version: &str) -> Result { if let Some((deployment_id, created_at)) = self.mirror.version_info(version)? { let id = DeploymentHash::new(deployment_id.clone()) - .map_err(|id| constraint_violation!("illegal deployment id {}", id))?; + .map_err(|id| internal_error!("illegal deployment id {}", id))?; let (store, site) = self.store(&id)?; let statuses = store.deployment_statuses(&[site.clone()])?; let status = statuses @@ -993,7 +992,7 @@ impl SubgraphStoreInner { let chain = status .chains .first() - .ok_or_else(|| constraint_violation!("no chain info for {}", deployment_id))?; + .ok_or_else(|| internal_error!("no chain info for {}", deployment_id))?; let latest_ethereum_block_number = chain.latest_block.as_ref().map(|block| block.number()); let subgraph_info = store.subgraph_info(site.cheap_clone())?; @@ -1251,6 +1250,16 @@ impl SubgraphStoreInner { store.prune(reporter, site, req).await } + pub async fn prune_viewer( + &self, + deployment: &DeploymentLocator, + ) -> Result { + let site = self.find_site(deployment.id.into())?; + let store = self.for_site(&site)?; + + store.prune_viewer(site).await + } + pub fn set_history_blocks( &self, deployment: &DeploymentLocator, @@ -1600,7 +1609,7 @@ impl SubgraphStoreTrait for SubgraphStore { fn active_locator(&self, hash: &str) -> Result, StoreError> { let sites = self.mirror.find_sites(&[hash.to_string()], true)?; if sites.len() > 1 { - return Err(constraint_violation!( + return Err(internal_error!( "There are {} active deployments for {hash}, there should only be one", sites.len() )); diff --git a/store/postgres/src/vid_batcher.rs b/store/postgres/src/vid_batcher.rs index 81da5382e3d..c1e69ebe017 100644 --- a/store/postgres/src/vid_batcher.rs +++ b/store/postgres/src/vid_batcher.rs @@ -112,20 +112,6 @@ pub(crate) struct VidBatcher { } impl VidBatcher { - fn histogram_bounds( - conn: &mut PgConnection, - nsp: &Namespace, - table: &Table, - range: VidRange, - ) -> Result, StoreError> { - let bounds = catalog::histogram_bounds(conn, nsp, &table.name, VID_COLUMN)? - .into_iter() - .filter(|bound| range.min < *bound && range.max > *bound) - .chain(vec![range.min, range.max].into_iter()) - .collect::>(); - Ok(bounds) - } - /// Initialize a batcher for batching through entries in `table` with /// `vid` in the given `vid_range` /// @@ -138,7 +124,7 @@ impl VidBatcher { table: &Table, vid_range: VidRange, ) -> Result { - let bounds = Self::histogram_bounds(conn, nsp, table, vid_range)?; + let bounds = catalog::histogram_bounds(conn, nsp, &table.name, VID_COLUMN)?; let batch_size = AdaptiveBatchSize::new(table); Self::new(bounds, vid_range, batch_size) } @@ -150,6 +136,26 @@ impl VidBatcher { ) -> Result { let start = range.min; + let bounds = { + // Keep only histogram bounds that are relevent for the range + let mut bounds = bounds + .into_iter() + .filter(|bound| range.min <= *bound && range.max >= *bound) + .collect::>(); + // The first and last entry in `bounds` are Postgres' estimates + // of the min and max `vid` values in the table. We use the + // actual min and max `vid` values from the `vid_range` instead + let len = bounds.len(); + if len > 1 { + bounds[0] = range.min; + bounds[len - 1] = range.max; + } else { + // If Postgres doesn't have a histogram, just use one bucket + // from min to max + bounds = vec![range.min, range.max]; + } + bounds + }; let mut ogive = if range.is_empty() { None } else { @@ -220,16 +226,25 @@ impl VidBatcher { let duration = self.step_timer.elapsed(); let batch_size = self.batch_size.adapt(duration); - self.start = self.end + 1; + // We can't possibly copy farther than `max_vid` + self.start = (self.end + 1).min(self.max_vid + 1); self.end = ogive.next_point(self.start, batch_size as usize)?; Ok((duration, Some(res))) } } } + + pub(crate) fn set_batch_size(&mut self, size: usize) { + self.batch_size.size = size as i64; + self.end = match &self.ogive { + Some(ogive) => ogive.next_point(self.start, size as usize).unwrap(), + None => self.start + size as i64, + }; + } } -#[derive(Copy, Clone, QueryableByName)] +#[derive(Debug, Copy, Clone, QueryableByName)] pub(crate) struct VidRange { #[diesel(sql_type = BigInt, column_name = "min_vid")] pub min: i64, @@ -248,7 +263,10 @@ impl VidRange { } pub fn is_empty(&self) -> bool { - self.max == -1 + // min > max can happen when we restart a copy job that has finished + // some tables. For those, min (the next_vid) will be larger than + // max (the target_vid) + self.max == -1 || self.min > self.max } pub fn size(&self) -> usize { @@ -371,6 +389,17 @@ mod tests { } } + impl std::fmt::Debug for Batcher { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Batcher") + .field("start", &self.vid.start) + .field("end", &self.vid.end) + .field("size", &self.vid.batch_size.size) + .field("duration", &self.vid.batch_size.target.as_secs()) + .finish() + } + } + #[test] fn simple() { let bounds = vec![10, 20, 30, 40, 49]; @@ -422,4 +451,122 @@ mod tests { batcher.at(360, 359, 80); batcher.step(360, 359, S010); } + + #[test] + fn vid_batcher_adjusts_bounds() { + // The first and last entry in `bounds` are estimats of the min and + // max that are slightly off compared to the actual min and max we + // put in `vid_range`. Check that `VidBatcher` uses the actual min + // and max from `vid_range`. + let bounds = vec![639, 20_000, 40_000, 60_000, 80_000, 90_000]; + let vid_range = VidRange::new(1, 100_000); + let batch_size = AdaptiveBatchSize { + size: 1000, + target: S100, + }; + + let vid_batcher = VidBatcher::new(bounds, vid_range, batch_size).unwrap(); + let ogive = vid_batcher.ogive.as_ref().unwrap(); + assert_eq!(1, ogive.start()); + assert_eq!(100_000, ogive.end()); + } + + #[test] + fn vid_batcher_handles_large_vid() { + // An example with very large `vid` values which come from the new + // schema of setting the `vid` to `block_num << 32 + sequence_num`. + // These values are taken from an actual example subgraph and cuased + // errors because of numerical roundoff issues + const MIN: i64 = 186155521970012263; + const MAX: i64 = 187989601854423140; + const BOUNDS: &[i64] = &[ + 186155521970012263, + 186155552034783334, + 186166744719556711, + 187571594162339943, + 187571628522078310, + 187576619274076263, + 187576649338847334, + 187580570643988583, + 187590242910339175, + 187590268680142950, + 187963647367053415, + 187970828552372324, + 187986749996138596, + 187989601854423140, + ]; + + // The start, end, and batch size we expect when we run through the + // `vid_batcher` we set up below with `MIN`, `MAX` and `BOUNDS` + const STEPS: &[(i64, i64, i64)] = &[ + (186155521970012263, 186155521970012265, 2), + (186155521970012266, 186155521970012269, 3), + (186155521970012270, 186155521970012276, 6), + (186155521970012277, 186155521970012289, 12), + (186155521970012290, 186155521970012312, 22), + (186155521970012313, 186155521970012353, 40), + (186155521970012354, 186155521970012426, 72), + (186155521970012427, 186155521970012557, 130), + (186155521970012558, 186155521970012792, 234), + (186155521970012793, 186155521970013215, 422), + (186155521970013216, 186155521970013976, 760), + (186155521970013977, 186155521970015346, 1369), + (186155521970015347, 186155521970017812, 2465), + (186155521970017813, 186155521970022250, 4437), + (186155521970022251, 186155521970030238, 7987), + (186155521970030239, 186155521970044616, 14377), + (186155521970044617, 186155521970070495, 25878), + (186155521970070496, 186155521970117077, 46581), + (186155521970117078, 186155521970200925, 83847), + (186155521970200926, 186155521970351851, 150925), + (186155521970351852, 186155521970623517, 271665), + (186155521970623518, 186155521971112515, 488997), + (186155521971112516, 186155521971992710, 880194), + (186155521971992711, 186155521973577061, 1584350), + (186155521973577062, 186155521976428893, 2851831), + (186155521976428894, 186155521981562190, 5133296), + (186155521981562191, 186155521990802124, 9239933), + (186155521990802125, 186155522007434004, 16631879), + (186155522007434005, 186155522037371388, 29937383), + (186155522037371389, 186155522091258678, 53887289), + (186155522091258679, 186155522188255800, 96997121), + (186155522188255801, 186155522362850619, 174594818), + (186155522362850620, 186155522677121292, 314270672), + (186155522677121293, 186155523242808503, 565687210), + (186155523242808504, 186155524261045483, 1018236979), + (186155524261045484, 186155526093872046, 1832826562), + (186155526093872047, 186155529392959859, 3299087812), + (186155529392959860, 186155535331317922, 5938358062), + (186155535331317923, 186155546020362436, 10689044513), + (186155546020362437, 186160475833232786, 4929812870349), + (186160475833232787, 186998193536485260, 837717703252473), + (186998193536485261, 187574948946679478, 576755410194217), + (187574948946679479, 187590253155585376, 15304208905897), + (187590253155585377, 187989601854423140, 399348698837763), + ]; + + let vid_range = VidRange::new(MIN, MAX); + let batch_size = AdaptiveBatchSize { + size: 10000, + target: Duration::from_secs(180), + }; + + let mut vid_batcher = VidBatcher::new(BOUNDS.to_vec(), vid_range, batch_size).unwrap(); + vid_batcher.step_timer.set(Duration::from_secs(100)); + + // Run through the entire `vid_batcher`, collecting start and end in + // `steps` + let steps = std::iter::from_fn(|| { + vid_batcher + .step(|start, end| Ok((start, end, end - start))) + .unwrap() + .1 + }) + .fold(Vec::new(), |mut steps, (start, end, step)| { + steps.push((start, end, step)); + steps + }); + + assert_eq!(STEPS, &steps); + } } diff --git a/store/postgres/src/writable.rs b/store/postgres/src/writable.rs index 07d116790c0..628b1741e24 100644 --- a/store/postgres/src/writable.rs +++ b/store/postgres/src/writable.rs @@ -9,10 +9,10 @@ use async_trait::async_trait; use graph::blockchain::block_stream::{EntitySourceOperation, FirehoseCursor}; use graph::blockchain::BlockTime; use graph::components::store::{Batch, DeploymentCursorTracker, DerivedEntityQuery, ReadStore}; -use graph::constraint_violation; use graph::data::store::IdList; use graph::data::subgraph::schema; use graph::data_source::CausalityRegion; +use graph::internal_error; use graph::prelude::{ BlockNumber, CacheWeight, Entity, MetricsRegistry, SubgraphDeploymentEntity, SubgraphStore as _, BLOCK_NUMBER_MAX, @@ -95,8 +95,8 @@ impl LastRollup { let kind = match (has_aggregations, block) { (false, _) => LastRollup::NotNeeded, (true, None) => LastRollup::Unknown, - (true, Some(block)) => { - let block_time = store.block_time(site, block)?; + (true, Some(_)) => { + let block_time = store.block_time(site)?; block_time .map(|b| LastRollup::Some(b)) .unwrap_or(LastRollup::Unknown) @@ -133,7 +133,7 @@ impl LastRollupTracker { *last = LastRollup::Some(block_time); } (LastRollup::Some(_) | LastRollup::Unknown, None) => { - constraint_violation!("block time cannot be unset"); + internal_error!("block time cannot be unset"); } } @@ -220,8 +220,10 @@ impl SyncStore { } None => None, }; - self.writable - .start_subgraph(logger, self.site.clone(), graft_base)?; + graph::block_on( + self.writable + .start_subgraph(logger, self.site.clone(), graft_base), + )?; self.store.primary_conn()?.copy_finished(self.site.as_ref()) }) } @@ -238,9 +240,7 @@ impl SyncStore { firehose_cursor, )?; - let block_time = self - .writable - .block_time(self.site.cheap_clone(), block_ptr_to.number)?; + let block_time = self.writable.block_time(self.site.cheap_clone())?; self.last_rollup.set(block_time) }) } @@ -684,8 +684,8 @@ impl Request { let batch = batch.read().unwrap(); if let Some(err) = &batch.error { // This can happen when appending to the batch failed - // because of a constraint violation. Returning an `Err` - // here will poison and shut down the queue + // because of an internal error. Returning an `Err` here + // will poison and shut down the queue return Err(err.clone()); } let res = store @@ -1342,7 +1342,7 @@ impl Writer { // If there was an error, report that instead of a naked 'writer not running' queue.check_err()?; if join_handle.is_finished() { - Err(constraint_violation!( + Err(internal_error!( "Subgraph writer for {} is not running", queue.store.site )) @@ -1679,7 +1679,7 @@ impl WritableStoreTrait for WritableStore { if let Some(block_ptr) = self.block_ptr.lock().unwrap().as_ref() { if block_ptr_to.number <= block_ptr.number { - return Err(constraint_violation!( + return Err(internal_error!( "transact_block_operations called for block {} but its head is already at {}", block_ptr_to, block_ptr diff --git a/store/test-store/Cargo.toml b/store/test-store/Cargo.toml index fe05f12233e..2435b447570 100644 --- a/store/test-store/Cargo.toml +++ b/store/test-store/Cargo.toml @@ -18,4 +18,4 @@ prost-types = { workspace = true } [dev-dependencies] hex = "0.4.3" -pretty_assertions = "1.4.0" +pretty_assertions = "1.4.1" diff --git a/store/test-store/src/store.rs b/store/test-store/src/store.rs index 2fa96148ba9..b191916a9b6 100644 --- a/store/test-store/src/store.rs +++ b/store/test-store/src/store.rs @@ -25,10 +25,9 @@ use graph_graphql::prelude::{ use graph_graphql::test_support::GraphQLMetrics; use graph_node::config::{Config, Opt}; use graph_node::store_builder::StoreBuilder; -use graph_store_postgres::layout_for_tests::FAKE_NETWORK_SHARED; -use graph_store_postgres::{connection_pool::ConnectionPool, Shard, SubscriptionManager}; use graph_store_postgres::{ - BlockStore as DieselBlockStore, DeploymentPlacer, SubgraphStore as DieselSubgraphStore, + layout_for_tests::FAKE_NETWORK_SHARED, BlockStore as DieselBlockStore, ConnectionPool, + DeploymentPlacer, Shard, SubgraphStore as DieselSubgraphStore, SubscriptionManager, PRIMARY_SHARD, }; use hex_literal::hex; diff --git a/store/test-store/tests/chain/ethereum/manifest.rs b/store/test-store/tests/chain/ethereum/manifest.rs index 9d094ae5817..f025be2e626 100644 --- a/store/test-store/tests/chain/ethereum/manifest.rs +++ b/store/test-store/tests/chain/ethereum/manifest.rs @@ -47,9 +47,10 @@ specVersion: 1.3.0 "; const SOURCE_SUBGRAPH_SCHEMA: &str = " -type TestEntity @entity { id: ID! } -type User @entity { id: ID! } -type Profile @entity { id: ID! } +type TestEntity @entity(immutable: true) { id: ID! } +type MutableEntity @entity { id: ID! } +type User @entity(immutable: true) { id: ID! } +type Profile @entity(immutable: true) { id: ID! } type TokenData @entity(timeseries: true) { id: Int8! @@ -1761,6 +1762,7 @@ specVersion: 1.3.0 let result = try_resolve_manifest(yaml, SPEC_VERSION_1_3_0).await; assert!(result.is_err()); let err = result.unwrap_err(); + println!("Error: {}", err); assert!(err .to_string() .contains("Subgraph datasources cannot be used alongside onchain datasources")); @@ -1852,8 +1854,78 @@ specVersion: 1.3.0 assert!(matches!(e, SubgraphManifestResolveError::ResolveError(_))); let error_msg = e.to_string(); println!("{}", error_msg); - assert!(error_msg.contains("Nested subgraph data sources are not supported.")); + assert!(error_msg + .contains("Nested subgraph data sources [SubgraphSource] are not supported.")); } } }) } + +#[tokio::test] +async fn subgraph_ds_manifest_mutable_entities_should_fail() { + let yaml = " +schema: + file: + /: /ipfs/Qmschema +dataSources: + - name: SubgraphSource + kind: subgraph + entities: + - Gravatar + network: mainnet + source: + address: 'QmSource' + startBlock: 9562480 + mapping: + apiVersion: 0.0.6 + language: wasm/assemblyscript + entities: + - TestEntity + file: + /: /ipfs/Qmmapping + handlers: + - handler: handleEntity + entity: MutableEntity # This is a mutable entity and should fail +specVersion: 1.3.0 +"; + + let result = try_resolve_manifest(yaml, SPEC_VERSION_1_3_0).await; + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err + .to_string() + .contains("Entity MutableEntity is not immutable and cannot be used as a mapping entity")); +} + +#[tokio::test] +async fn subgraph_ds_manifest_immutable_entities_should_succeed() { + let yaml = " +schema: + file: + /: /ipfs/Qmschema +dataSources: + - name: SubgraphSource + kind: subgraph + entities: + - Gravatar + network: mainnet + source: + address: 'QmSource' + startBlock: 9562480 + mapping: + apiVersion: 0.0.6 + language: wasm/assemblyscript + entities: + - TestEntity + file: + /: /ipfs/Qmmapping + handlers: + - handler: handleEntity + entity: User # This is an immutable entity and should succeed +specVersion: 1.3.0 +"; + + let result = try_resolve_manifest(yaml, SPEC_VERSION_1_3_0).await; + + assert!(result.is_ok()); +} diff --git a/store/test-store/tests/graphql/introspection.rs b/store/test-store/tests/graphql/introspection.rs index 6139e673767..8bc76213e6b 100644 --- a/store/test-store/tests/graphql/introspection.rs +++ b/store/test-store/tests/graphql/introspection.rs @@ -53,15 +53,15 @@ impl Resolver for MockResolver { Ok(r::Value::Null) } - async fn query_permit(&self) -> Result { + async fn query_permit(&self) -> QueryPermit { let permit = Arc::new(tokio::sync::Semaphore::new(1)) .acquire_owned() .await .unwrap(); - Ok(QueryPermit { + QueryPermit { permit, wait: Duration::from_secs(0), - }) + } } } diff --git a/store/test-store/tests/postgres/writable.rs b/store/test-store/tests/postgres/writable.rs index 2e3e138d567..d83ec8cbf48 100644 --- a/store/test-store/tests/postgres/writable.rs +++ b/store/test-store/tests/postgres/writable.rs @@ -449,6 +449,7 @@ fn read_range_pool_created_test() { let pool_created_type = TEST_SUBGRAPH_SCHEMA.entity_type("PoolCreated").unwrap(); let entity_types = vec![pool_created_type.clone()]; + let mut last_op: Option = None; for count in (1..=2).map(|x| x as i64) { let id = if count == 1 { "0xff80818283848586" @@ -478,6 +479,7 @@ fn read_range_pool_created_test() { data, }; + last_op = Some(op.clone()); transact_entity_operations( &subgraph_store, &deployment, @@ -500,5 +502,21 @@ fn read_range_pool_created_test() { let a = result_entities[index as usize].clone(); assert_eq!(a, format!("{:?}", en)); } + + // Make sure we get a constraint violation + let op = last_op.take().unwrap(); + + transact_entity_operations(&subgraph_store, &deployment, block_pointer(3), vec![op]) + .await + .unwrap(); + let res = writable.flush().await; + let exp = "duplicate key value violates unique constraint \"pool_created_pkey\": Key (vid)=(2) already exists."; + match res { + Ok(_) => panic!("Expected error, but got success"), + Err(StoreError::ConstraintViolation(msg)) => { + assert_eq!(msg, exp); + } + Err(e) => panic!("Expected constraint violation, but got {:?}", e), + } }) } diff --git a/tests/Cargo.toml b/tests/Cargo.toml index ad4a4a9c785..6f5e317fa8b 100644 --- a/tests/Cargo.toml +++ b/tests/Cargo.toml @@ -19,7 +19,7 @@ graph-runtime-wasm = { path = "../runtime/wasm" } serde = { workspace = true } serde_yaml = { workspace = true } slog = { version = "2.7.0", features = ["release_max_level_trace", "max_level_trace"] } -tokio = { version = "1.38.0", features = ["rt", "macros", "process"] } +tokio = { version = "1.44.2", features = ["rt", "macros", "process"] } # Once graph upgrades to web3 0.19, we don't need this anymore. The version # here needs to be kept in sync with the web3 version that the graph crate # uses until then diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml index 9f05a680e7c..7385b4b08a2 100644 --- a/tests/docker-compose.yml +++ b/tests/docker-compose.yml @@ -1,7 +1,7 @@ version: '3' services: ipfs: - image: docker.io/ipfs/kubo:v0.17.0 + image: docker.io/ipfs/kubo:v0.34.1 ports: - '127.0.0.1:3001:5001' postgres: @@ -20,10 +20,10 @@ services: POSTGRES_DB: graph-node POSTGRES_INITDB_ARGS: "-E UTF8 --locale=C" anvil: - image: ghcr.io/foundry-rs/foundry:latest + image: ghcr.io/foundry-rs/foundry:stable ports: - '3021:8545' - command: "'anvil --host 0.0.0.0 --gas-limit 100000000000 --base-fee 1 --block-time 5 --mnemonic \"test test test test test test test test test test test junk\"'" + command: "'anvil --host 0.0.0.0 --gas-limit 100000000000 --base-fee 1 --block-time 2 --timestamp 1743944919 --mnemonic \"test test test test test test test test test test test junk\"'" # graph-node ports: # json-rpc: 8020 diff --git a/tests/integration-tests/base/abis/Contract.abi b/tests/integration-tests/base/abis/Contract.abi new file mode 100644 index 00000000000..02da1a9e7f3 --- /dev/null +++ b/tests/integration-tests/base/abis/Contract.abi @@ -0,0 +1,33 @@ +[ + { + "inputs": [], + "stateMutability": "nonpayable", + "type": "constructor" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": false, + "internalType": "uint16", + "name": "x", + "type": "uint16" + } + ], + "name": "Trigger", + "type": "event" + }, + { + "inputs": [ + { + "internalType": "uint16", + "name": "x", + "type": "uint16" + } + ], + "name": "emitTrigger", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + } +] diff --git a/tests/integration-tests/base/package.json b/tests/integration-tests/base/package.json new file mode 100644 index 00000000000..2cfb6b94def --- /dev/null +++ b/tests/integration-tests/base/package.json @@ -0,0 +1,25 @@ +{ + "name": "base-subgraph", + "version": "0.1.0", + "scripts": { + "build-contracts": "../../common/build-contracts.sh", + "codegen": "graph codegen --skip-migrations", + "test": "yarn build-contracts && truffle test --compile-none --network test", + "create:test": "graph create test/base-subgraph --node $GRAPH_NODE_ADMIN_URI", + "deploy:test": "graph deploy test/base-subgraph --version-label v0.0.1 --ipfs $IPFS_URI --node $GRAPH_NODE_ADMIN_URI" + }, + "devDependencies": { + "@graphprotocol/graph-cli": "0.69.0", + "@graphprotocol/graph-ts": "0.34.0", + "solc": "^0.8.2" + }, + "dependencies": { + "@truffle/contract": "^4.3", + "@truffle/hdwallet-provider": "^1.2", + "apollo-fetch": "^0.7.0", + "babel-polyfill": "^6.26.0", + "babel-register": "^6.26.0", + "gluegun": "^4.6.1", + "truffle": "^5.2" + } +} \ No newline at end of file diff --git a/tests/integration-tests/base/schema.graphql b/tests/integration-tests/base/schema.graphql new file mode 100644 index 00000000000..f7034353d73 --- /dev/null +++ b/tests/integration-tests/base/schema.graphql @@ -0,0 +1,5 @@ +type BaseData @entity(immutable: true) { + id: ID! + data: String! + blockNumber: BigInt! +} \ No newline at end of file diff --git a/tests/integration-tests/base/src/mapping.ts b/tests/integration-tests/base/src/mapping.ts new file mode 100644 index 00000000000..11767070a5b --- /dev/null +++ b/tests/integration-tests/base/src/mapping.ts @@ -0,0 +1,9 @@ +import { ethereum } from '@graphprotocol/graph-ts' +import { BaseData } from '../generated/schema' + +export function handleBlock(block: ethereum.Block): void { + let entity = new BaseData(block.number.toString()) + entity.data = 'from base' + entity.blockNumber = block.number + entity.save() +} \ No newline at end of file diff --git a/tests/integration-tests/base/subgraph.yaml b/tests/integration-tests/base/subgraph.yaml new file mode 100644 index 00000000000..808b446c622 --- /dev/null +++ b/tests/integration-tests/base/subgraph.yaml @@ -0,0 +1,25 @@ +specVersion: 0.0.5 +description: Base Subgraph +repository: https://github.com/graphprotocol/graph-node +schema: + file: ./schema.graphql +dataSources: + - kind: ethereum/contract + name: SimpleContract + network: test + source: + address: "0x5FbDB2315678afecb367f032d93F642f64180aa3" + abi: SimpleContract + startBlock: 0 + mapping: + kind: ethereum/events + apiVersion: 0.0.6 + language: wasm/assemblyscript + entities: + - BaseData + abis: + - name: SimpleContract + file: ./abis/Contract.abi + blockHandlers: + - handler: handleBlock + file: ./src/mapping.ts \ No newline at end of file diff --git a/tests/integration-tests/grafted/abis/Contract.abi b/tests/integration-tests/grafted/abis/Contract.abi new file mode 100644 index 00000000000..02da1a9e7f3 --- /dev/null +++ b/tests/integration-tests/grafted/abis/Contract.abi @@ -0,0 +1,33 @@ +[ + { + "inputs": [], + "stateMutability": "nonpayable", + "type": "constructor" + }, + { + "anonymous": false, + "inputs": [ + { + "indexed": false, + "internalType": "uint16", + "name": "x", + "type": "uint16" + } + ], + "name": "Trigger", + "type": "event" + }, + { + "inputs": [ + { + "internalType": "uint16", + "name": "x", + "type": "uint16" + } + ], + "name": "emitTrigger", + "outputs": [], + "stateMutability": "nonpayable", + "type": "function" + } +] diff --git a/tests/integration-tests/grafted/package.json b/tests/integration-tests/grafted/package.json new file mode 100644 index 00000000000..d45b6fc6727 --- /dev/null +++ b/tests/integration-tests/grafted/package.json @@ -0,0 +1,25 @@ +{ + "name": "grafted-subgraph", + "version": "0.1.0", + "scripts": { + "build-contracts": "../../common/build-contracts.sh", + "codegen": "graph codegen --skip-migrations", + "test": "yarn build-contracts && truffle test --compile-none --network test", + "create:test": "graph create test/grafted-subgraph --node $GRAPH_NODE_ADMIN_URI", + "deploy:test": "graph deploy test/grafted-subgraph --version-label v0.0.1 --ipfs $IPFS_URI --node $GRAPH_NODE_ADMIN_URI" + }, + "devDependencies": { + "@graphprotocol/graph-cli": "0.69.0", + "@graphprotocol/graph-ts": "0.34.0", + "solc": "^0.8.2" + }, + "dependencies": { + "@truffle/contract": "^4.3", + "@truffle/hdwallet-provider": "^1.2", + "apollo-fetch": "^0.7.0", + "babel-polyfill": "^6.26.0", + "babel-register": "^6.26.0", + "gluegun": "^4.6.1", + "truffle": "^5.2" + } +} \ No newline at end of file diff --git a/tests/integration-tests/grafted/schema.graphql b/tests/integration-tests/grafted/schema.graphql new file mode 100644 index 00000000000..b83083fd466 --- /dev/null +++ b/tests/integration-tests/grafted/schema.graphql @@ -0,0 +1,5 @@ +type GraftedData @entity(immutable: true) { + id: ID! + data: String! + blockNumber: BigInt! +} \ No newline at end of file diff --git a/tests/integration-tests/grafted/src/mapping.ts b/tests/integration-tests/grafted/src/mapping.ts new file mode 100644 index 00000000000..742d5d67c54 --- /dev/null +++ b/tests/integration-tests/grafted/src/mapping.ts @@ -0,0 +1,9 @@ +import { ethereum } from '@graphprotocol/graph-ts' +import { GraftedData } from '../generated/schema' + +export function handleBlock(block: ethereum.Block): void { + let entity = new GraftedData(block.number.toString()) + entity.data = 'to grafted' + entity.blockNumber = block.number + entity.save() +} \ No newline at end of file diff --git a/tests/integration-tests/grafted/subgraph.yaml b/tests/integration-tests/grafted/subgraph.yaml new file mode 100644 index 00000000000..f946f201941 --- /dev/null +++ b/tests/integration-tests/grafted/subgraph.yaml @@ -0,0 +1,30 @@ +specVersion: 0.0.6 +description: Grafted Subgraph +repository: https://github.com/graphprotocol/graph-node +schema: + file: ./schema.graphql +dataSources: + - kind: ethereum/contract + name: SimpleContract + network: test + source: + address: "0x5FbDB2315678afecb367f032d93F642f64180aa3" + abi: SimpleContract + startBlock: 0 + mapping: + kind: ethereum/events + apiVersion: 0.0.6 + language: wasm/assemblyscript + entities: + - GraftedData + abis: + - name: SimpleContract + file: ./abis/Contract.abi + blockHandlers: + - handler: handleBlock + file: ./src/mapping.ts +features: + - grafting +graft: + base: QmQpiC9bJGFssQfeZippfQ7rcTv7QA67X7jUejc8nV125F + block: 2 \ No newline at end of file diff --git a/tests/integration-tests/multiple-subgraph-datasources/src/mapping.ts b/tests/integration-tests/multiple-subgraph-datasources/src/mapping.ts index 649d92d3f5f..373ddd7e99e 100644 --- a/tests/integration-tests/multiple-subgraph-datasources/src/mapping.ts +++ b/tests/integration-tests/multiple-subgraph-datasources/src/mapping.ts @@ -1,28 +1,26 @@ import { dataSource, EntityTrigger, log } from '@graphprotocol/graph-ts' import { AggregatedData } from '../generated/schema' -import { SourceAData } from '../generated/subgraph-QmPWnNsD4m8T9EEF1ec5d8wetFxrMebggLj1efFHzdnZhx' -import { SourceBData } from '../generated/subgraph-Qma4Rk2D1w6mFiP15ZtHHx7eWkqFR426RWswreLiDanxej' +import { SourceAData } from '../generated/subgraph-QmYHp1bPEf7EoYBpEtJUpZv1uQHYQfWE4AhvR6frjB1Huj' +import { SourceBData } from '../generated/subgraph-QmYBEzastJi7bsa722ac78tnZa6xNnV9vvweerY4kVyJtq' -export function handleSourceAData(data: EntityTrigger): void { - let aggregated = AggregatedData.load(data.data.id) - if (!aggregated) { - aggregated = new AggregatedData(data.data.id) - aggregated.sourceA = data.data.data - aggregated.first = 'sourceA' - } else { - aggregated.sourceA = data.data.data - } + +// We know this handler will run first since its defined first in the manifest +// So we dont need to check if the Aggregated data exists +export function handleSourceAData(data: SourceAData): void { + let aggregated = new AggregatedData(data.id) + aggregated.sourceA = data.data + aggregated.first = 'sourceA' aggregated.save() } -export function handleSourceBData(data: EntityTrigger): void { - let aggregated = AggregatedData.load(data.data.id) +export function handleSourceBData(data: SourceBData): void { + let aggregated = AggregatedData.load(data.id) if (!aggregated) { - aggregated = new AggregatedData(data.data.id) - aggregated.sourceB = data.data.data + aggregated = new AggregatedData(data.id) + aggregated.sourceB = data.data aggregated.first = 'sourceB' } else { - aggregated.sourceB = data.data.data + aggregated.sourceB = data.data } aggregated.save() } diff --git a/tests/integration-tests/multiple-subgraph-datasources/subgraph.yaml b/tests/integration-tests/multiple-subgraph-datasources/subgraph.yaml index 296777c578c..4dc4fc7a9b6 100644 --- a/tests/integration-tests/multiple-subgraph-datasources/subgraph.yaml +++ b/tests/integration-tests/multiple-subgraph-datasources/subgraph.yaml @@ -6,7 +6,7 @@ dataSources: name: SourceA network: test source: - address: 'QmPWnNsD4m8T9EEF1ec5d8wetFxrMebggLj1efFHzdnZhx' + address: 'QmYHp1bPEf7EoYBpEtJUpZv1uQHYQfWE4AhvR6frjB1Huj' startBlock: 0 mapping: apiVersion: 0.0.7 @@ -22,7 +22,7 @@ dataSources: name: SourceB network: test source: - address: 'Qma4Rk2D1w6mFiP15ZtHHx7eWkqFR426RWswreLiDanxej' + address: 'QmYBEzastJi7bsa722ac78tnZa6xNnV9vvweerY4kVyJtq' startBlock: 0 mapping: apiVersion: 0.0.7 diff --git a/tests/integration-tests/source-subgraph-a/schema.graphql b/tests/integration-tests/source-subgraph-a/schema.graphql index 10be822d900..2348c9b5c57 100644 --- a/tests/integration-tests/source-subgraph-a/schema.graphql +++ b/tests/integration-tests/source-subgraph-a/schema.graphql @@ -1,4 +1,4 @@ -type SourceAData @entity { +type SourceAData @entity(immutable: true) { id: ID! data: String! blockNumber: BigInt! diff --git a/tests/integration-tests/source-subgraph-b/schema.graphql b/tests/integration-tests/source-subgraph-b/schema.graphql index 9a84bdcbba3..0b012273112 100644 --- a/tests/integration-tests/source-subgraph-b/schema.graphql +++ b/tests/integration-tests/source-subgraph-b/schema.graphql @@ -1,4 +1,4 @@ -type SourceBData @entity { +type SourceBData @entity(immutable: true) { id: ID! data: String! blockNumber: BigInt! diff --git a/tests/integration-tests/source-subgraph/schema.graphql b/tests/integration-tests/source-subgraph/schema.graphql index 15bb2a33921..4fab5be71b9 100644 --- a/tests/integration-tests/source-subgraph/schema.graphql +++ b/tests/integration-tests/source-subgraph/schema.graphql @@ -1,11 +1,10 @@ -type Block @entity { +type Block @entity(immutable: true) { id: ID! number: BigInt! hash: Bytes! - testMessage: String } -type Block2 @entity { +type Block2 @entity(immutable: true) { id: ID! number: BigInt! hash: Bytes! diff --git a/tests/integration-tests/source-subgraph/src/mapping.ts b/tests/integration-tests/source-subgraph/src/mapping.ts index ad27c43c2a3..119fb9b912b 100644 --- a/tests/integration-tests/source-subgraph/src/mapping.ts +++ b/tests/integration-tests/source-subgraph/src/mapping.ts @@ -1,6 +1,5 @@ import { ethereum, log, store } from '@graphprotocol/graph-ts'; import { Block, Block2 } from '../generated/schema'; -import { BigInt } from '@graphprotocol/graph-ts'; export function handleBlock(block: ethereum.Block): void { log.info('handleBlock {}', [block.number.toString()]); @@ -21,37 +20,6 @@ export function handleBlock(block: ethereum.Block): void { let blockEntity3 = new Block2(id3); blockEntity3.number = block.number; blockEntity3.hash = block.hash; + blockEntity3.testMessage = block.number.toString().concat('-message'); blockEntity3.save(); - - if (block.number.equals(BigInt.fromI32(1))) { - let id = 'TEST'; - let entity = new Block(id); - entity.number = block.number; - entity.hash = block.hash; - entity.testMessage = 'Created at block 1'; - log.info('Created entity at block 1', []); - entity.save(); - } - - if (block.number.equals(BigInt.fromI32(2))) { - let id = 'TEST'; - let blockEntity1 = Block.load(id); - if (blockEntity1) { - // Update the block entity - blockEntity1.testMessage = 'Updated at block 2'; - log.info('Updated entity at block 2', []); - blockEntity1.save(); - } - } - - if (block.number.equals(BigInt.fromI32(3))) { - let id = 'TEST'; - let blockEntity1 = Block.load(id); - if (blockEntity1) { - blockEntity1.testMessage = 'Deleted at block 3'; - log.info('Deleted entity at block 3', []); - blockEntity1.save(); - store.remove('Block', id); - } - } } diff --git a/tests/integration-tests/subgraph-data-sources/src/mapping.ts b/tests/integration-tests/subgraph-data-sources/src/mapping.ts index 45ecbd41076..9062970361a 100644 --- a/tests/integration-tests/subgraph-data-sources/src/mapping.ts +++ b/tests/integration-tests/subgraph-data-sources/src/mapping.ts @@ -1,26 +1,26 @@ -import { Entity, log, store, BigInt, EntityTrigger, EntityOp } from '@graphprotocol/graph-ts'; -import { Block } from '../generated/subgraph-QmVz1Pt7NhgCkz4gfavmNrMhojnMT9hW81QDqVjy56ZMUP'; +import { log, store } from '@graphprotocol/graph-ts'; +import { Block, Block2 } from '../generated/subgraph-QmWi3H11QFE2PiWx6WcQkZYZdA5UasaBptUJqGn54MFux5'; import { MirrorBlock } from '../generated/schema'; -export function handleEntity(trigger: EntityTrigger): void { - let blockEntity = trigger.data; - let id = blockEntity.id; +export function handleEntity(block: Block): void { + let id = block.id; - if (trigger.operation === EntityOp.Remove) { - log.info('Removing block entity with id: {}', [id]); - store.remove('MirrorBlock', id); - return; - } + let blockEntity = loadOrCreateMirrorBlock(id); + blockEntity.number = block.number; + blockEntity.hash = block.hash; - let block = loadOrCreateMirrorBlock(id); - block.number = blockEntity.number; - block.hash = blockEntity.hash; - - if (blockEntity.testMessage) { - block.testMessage = blockEntity.testMessage; - } + blockEntity.save(); +} + +export function handleEntity2(block: Block2): void { + let id = block.id; + + let blockEntity = loadOrCreateMirrorBlock(id); + blockEntity.number = block.number; + blockEntity.hash = block.hash; + blockEntity.testMessage = block.testMessage; - block.save(); + blockEntity.save(); } export function loadOrCreateMirrorBlock(id: string): MirrorBlock { diff --git a/tests/integration-tests/subgraph-data-sources/subgraph.yaml b/tests/integration-tests/subgraph-data-sources/subgraph.yaml index 3fdc76ac089..92dc7140514 100644 --- a/tests/integration-tests/subgraph-data-sources/subgraph.yaml +++ b/tests/integration-tests/subgraph-data-sources/subgraph.yaml @@ -6,7 +6,7 @@ dataSources: name: Contract network: test source: - address: 'QmVz1Pt7NhgCkz4gfavmNrMhojnMT9hW81QDqVjy56ZMUP' + address: 'QmWi3H11QFE2PiWx6WcQkZYZdA5UasaBptUJqGn54MFux5' startBlock: 0 mapping: apiVersion: 0.0.7 @@ -16,6 +16,6 @@ dataSources: handlers: - handler: handleEntity entity: Block - - handler: handleEntity + - handler: handleEntity2 entity: Block2 file: ./src/mapping.ts diff --git a/tests/src/config.rs b/tests/src/config.rs index 09e3b55fa47..6cdd97a216f 100644 --- a/tests/src/config.rs +++ b/tests/src/config.rs @@ -175,7 +175,8 @@ impl Config { .stdout(stdout) .stderr(stderr) .args(args) - .env("GRAPH_STORE_WRITE_BATCH_DURATION", "5"); + .env("GRAPH_STORE_WRITE_BATCH_DURATION", "5") + .env("ETHEREUM_REORG_THRESHOLD", "0"); status!( "graph-node", @@ -210,7 +211,6 @@ impl Config { let setup = format!( r#" create extension pg_trgm; - create extension pg_stat_statements; create extension btree_gist; create extension postgres_fdw; grant usage on foreign data wrapper postgres_fdw to "{}"; diff --git a/tests/src/contract.rs b/tests/src/contract.rs index 4fdf767b041..05fda947839 100644 --- a/tests/src/contract.rs +++ b/tests/src/contract.rs @@ -7,7 +7,7 @@ use graph::prelude::{ api::{Eth, Namespace}, contract::{tokens::Tokenize, Contract as Web3Contract, Options}, transports::Http, - types::{Address, Bytes, TransactionReceipt}, + types::{Address, Block, BlockId, BlockNumber, Bytes, TransactionReceipt, H256}, }, }; // web3 version 0.18 does not expose this; once the graph crate updates to @@ -165,4 +165,13 @@ impl Contract { } Ok(contracts) } + + pub async fn latest_block() -> Option> { + let eth = Self::eth(); + let block = eth + .block(BlockId::Number(BlockNumber::Latest)) + .await + .unwrap_or_default(); + block + } } diff --git a/tests/src/fixture/ethereum.rs b/tests/src/fixture/ethereum.rs index fc651a512db..d93ac25c235 100644 --- a/tests/src/fixture/ethereum.rs +++ b/tests/src/fixture/ethereum.rs @@ -64,7 +64,7 @@ pub async fn chain( triggers_adapter, Arc::new(NoopRuntimeAdapterBuilder {}), eth_adapters, - ENV_VARS.reorg_threshold, + ENV_VARS.reorg_threshold(), ENV_VARS.ingestor_polling_interval, // We assume the tested chain is always ingestible for now true, diff --git a/tests/src/subgraph.rs b/tests/src/subgraph.rs index 810b87cbb78..92e42836b68 100644 --- a/tests/src/subgraph.rs +++ b/tests/src/subgraph.rs @@ -164,7 +164,7 @@ impl Subgraph { } /// Make a GraphQL query to the index node API - pub async fn index_with_vars(&self, text: &str, vars: Value) -> anyhow::Result { + pub async fn query_with_vars(text: &str, vars: Value) -> anyhow::Result { let endpoint = CONFIG.graph_node.index_node_uri(); graphql_query_with_vars(&endpoint, text, vars).await } diff --git a/tests/tests/integration_tests.rs b/tests/tests/integration_tests.rs index d10df25698b..9df36f7145a 100644 --- a/tests/tests/integration_tests.rs +++ b/tests/tests/integration_tests.rs @@ -11,7 +11,7 @@ use std::future::Future; use std::pin::Pin; -use std::time::{Duration, Instant}; +use std::time::{self, Duration, Instant}; use anyhow::{anyhow, bail, Context, Result}; use graph::futures03::StreamExt; @@ -25,6 +25,8 @@ use tokio::process::{Child, Command}; use tokio::task::JoinError; use tokio::time::sleep; +const SUBGRAPH_LAST_GRAFTING_BLOCK: i32 = 3; + type TestFn = Box< dyn FnOnce(TestContext) -> Pin> + Send>> + Sync @@ -110,6 +112,15 @@ impl TestCase { } } + fn new_with_grafting(name: &str, test: fn(TestContext) -> T, base_subgraph: &str) -> Self + where + T: Future> + Send + 'static, + { + let mut test_case = Self::new(name, test); + test_case.source_subgraph = Some(base_subgraph.to_string()); + test_case + } + fn new_with_source_subgraph( name: &str, test: fn(TestContext) -> T, @@ -246,7 +257,7 @@ impl TestCase { let subgraph = self.deploy_and_wait(source, contracts).await?; status!( source, - "source subgraph deployed with hash {}", + "Source subgraph deployed with hash {}", subgraph.deployment ); } @@ -456,9 +467,8 @@ async fn test_block_handlers(ctx: TestContext) -> anyhow::Result<()> { .await?; // test subgraphFeatures endpoint returns handlers correctly - let subgraph_features = subgraph - .index_with_vars( - "query GetSubgraphFeatures($deployment: String!) { + let subgraph_features = Subgraph::query_with_vars( + "query GetSubgraphFeatures($deployment: String!) { subgraphFeatures(subgraphId: $deployment) { specVersion apiVersion @@ -468,9 +478,9 @@ async fn test_block_handlers(ctx: TestContext) -> anyhow::Result<()> { handlers } }", - json!({ "deployment": subgraph.deployment }), - ) - .await?; + json!({ "deployment": subgraph.deployment }), + ) + .await?; let handlers = &subgraph_features["data"]["subgraphFeatures"]["handlers"]; assert!( handlers.is_array(), @@ -523,79 +533,34 @@ async fn subgraph_data_sources(ctx: TestContext) -> anyhow::Result<()> { assert!(subgraph.healthy); let expected_response = json!({ "mirrorBlocks": [ - { "id": "1-v1", "number": "1" }, - { "id": "1-v2", "number": "1" }, - { "id": "1-v3", "number": "1" }, - { "id": "2-v1", "number": "2" }, - { "id": "2-v2", "number": "2" }, - { "id": "2-v3", "number": "2" }, - { "id": "3-v1", "number": "3" }, - { "id": "3-v2", "number": "3" }, - { "id": "3-v3", "number": "3" }, - { "id": "4-v1", "number": "4" }, - { "id": "4-v2", "number": "4" }, - { "id": "4-v3", "number": "4" }, - { "id": "5-v1", "number": "5" }, - { "id": "5-v2", "number": "5" }, - { "id": "5-v3", "number": "5" }, - { "id": "6-v1", "number": "6" }, - { "id": "6-v2", "number": "6" }, - { "id": "6-v3", "number": "6" }, - { "id": "7-v1", "number": "7" }, - { "id": "7-v2", "number": "7" }, - { "id": "7-v3", "number": "7" }, - { "id": "8-v1", "number": "8" }, - { "id": "8-v2", "number": "8" }, - { "id": "8-v3", "number": "8" }, - { "id": "9-v1", "number": "9" }, - { "id": "9-v2", "number": "9" }, - { "id": "9-v3", "number": "9" }, - { "id": "10-v1", "number": "10" }, - { "id": "10-v2", "number": "10" }, - { "id": "10-v3", "number": "10" }, + { "id": "1-v1", "number": "1", "testMessage": null }, + { "id": "1-v2", "number": "1", "testMessage": null }, + { "id": "1-v3", "number": "1", "testMessage": "1-message" }, + { "id": "2-v1", "number": "2", "testMessage": null }, + { "id": "2-v2", "number": "2", "testMessage": null }, + { "id": "2-v3", "number": "2", "testMessage": "2-message" }, + { "id": "3-v1", "number": "3", "testMessage": null }, + { "id": "3-v2", "number": "3", "testMessage": null }, + { "id": "3-v3", "number": "3", "testMessage": "3-message" }, ] }); query_succeeds( - "Blocks should be right", - &subgraph, - "{ mirrorBlocks(where: {number_lte: 10}, orderBy: number) { id, number } }", - expected_response, - ) - .await?; - - let expected_response = json!({ - "mirrorBlock": { "id": "TEST", "number": "1", "testMessage": "Created at block 1" }, - }); - - query_succeeds( - "Blocks should be right", - &subgraph, - "{ mirrorBlock(id: \"TEST\", block: {number: 1}) { id, number, testMessage } }", - expected_response, - ) - .await?; - - let expected_response = json!({ - "mirrorBlock": { "id": "TEST", "number": "1", "testMessage": "Updated at block 2" }, - }); - - query_succeeds( - "Blocks should be right", + "Query all blocks with testMessage", &subgraph, - "{ mirrorBlock(id: \"TEST\", block: {number: 2}) { id, number, testMessage } }", + "{ mirrorBlocks(where: {number_lte: 3}, orderBy: number) { id, number, testMessage } }", expected_response, ) .await?; let expected_response = json!({ - "mirrorBlock": null, + "mirrorBlock": { "id": "1-v3", "number": "1", "testMessage": "1-message" }, }); query_succeeds( - "Blocks should be right", + "Query specific block with testMessage", &subgraph, - "{ mirrorBlock(id: \"TEST\", block: {number: 3}) { id, number, testMessage } }", + "{ mirrorBlock(id: \"1-v3\") { id, number, testMessage } }", expected_response, ) .await?; @@ -742,9 +707,8 @@ async fn test_non_fatal_errors(ctx: TestContext) -> anyhow::Result<()> { } }"; - let resp = subgraph - .index_with_vars(query, json!({ "deployment" : subgraph.deployment })) - .await?; + let resp = + Subgraph::query_with_vars(query, json!({ "deployment" : subgraph.deployment })).await?; let subgraph_features = &resp["data"]["subgraphFeatures"]; let exp = json!({ "specVersion": "0.0.4", @@ -841,6 +805,82 @@ async fn test_remove_then_update(ctx: TestContext) -> anyhow::Result<()> { Ok(()) } +async fn test_subgraph_grafting(ctx: TestContext) -> anyhow::Result<()> { + async fn get_block_hash(block_number: i32) -> Option { + const FETCH_BLOCK_HASH: &str = r#" + query blockHashFromNumber($network: String!, $blockNumber: Int!) { + hash: blockHashFromNumber( + network: $network, + blockNumber: $blockNumber, + ) } "#; + let vars = json!({ + "network": "test", + "blockNumber": block_number + }); + + let resp = Subgraph::query_with_vars(FETCH_BLOCK_HASH, vars) + .await + .unwrap(); + assert_eq!(None, resp.get("errors")); + resp["data"]["hash"].as_str().map(|s| s.to_owned()) + } + + let subgraph = ctx.subgraph; + + assert!(subgraph.healthy); + + let block_hashes: Vec<&str> = vec![ + "384c705d4d1933ae8ba89026f016f09854057a267e1143e47bb7511d772a35d4", + "b90423eead33404dae0684169d35edd494b36802b721fb8de0bb8bc036c10480", + "2a6c4b65d659e0485371a93bc1ac0f0d7bc0f25a454b5f23a842335fea0638d5", + ]; + + let pois: Vec<&str> = vec![ + "0xde9e5650e22e61def6990d3fc4bd5915a4e8e0dd54af0b6830bf064aab16cc03", + "0x5d790dca3e37bd9976345d32d437b84ba5ea720a0b6ea26231a866e9f078bd52", + "0x719c04b78e01804c86f2bd809d20f481e146327af07227960e2242da365754ef", + ]; + + for i in 1..4 { + let block_hash = get_block_hash(i).await.unwrap(); + // We need to make sure that the preconditions for POI are fulfiled + // namely that the blockchain produced the proper block hashes for the + // blocks of which we will check the POI. + assert_eq!(block_hash, block_hashes[(i - 1) as usize]); + + const FETCH_POI: &str = r#" + query proofOfIndexing($subgraph: String!, $blockNumber: Int!, $blockHash: String!, $indexer: String!) { + proofOfIndexing( + subgraph: $subgraph, + blockNumber: $blockNumber, + blockHash: $blockHash, + indexer: $indexer + ) } "#; + + let zero_addr = "0000000000000000000000000000000000000000"; + let vars = json!({ + "subgraph": subgraph.deployment, + "blockNumber": i, + "blockHash": block_hash, + "indexer": zero_addr, + }); + let resp = Subgraph::query_with_vars(FETCH_POI, vars).await?; + assert_eq!(None, resp.get("errors")); + assert!(resp["data"]["proofOfIndexing"].is_string()); + let poi = resp["data"]["proofOfIndexing"].as_str().unwrap(); + // Check the expected value of the POI. The transition from the old legacy + // hashing to the new one is done in the block #2 anything before that + // should not change as the legacy code will not be updated. Any change + // after that might indicate a change in the way new POI is now calculated. + // Change on the block #2 would mean a change in the transitioning + // from the old to the new algorithm hence would be reflected only + // subgraphs that are grafting from pre 0.0.5 to 0.0.6 or newer. + assert_eq!(poi, pois[(i - 1) as usize]); + } + + Ok(()) +} + async fn test_poi_for_failed_subgraph(ctx: TestContext) -> anyhow::Result<()> { let subgraph = ctx.subgraph; const INDEXING_STATUS: &str = r#" @@ -874,9 +914,9 @@ async fn test_poi_for_failed_subgraph(ctx: TestContext) -> anyhow::Result<()> { } async fn fetch_status(subgraph: &Subgraph) -> anyhow::Result { - let resp = subgraph - .index_with_vars(INDEXING_STATUS, json!({ "subgraphName": subgraph.name })) - .await?; + let resp = + Subgraph::query_with_vars(INDEXING_STATUS, json!({ "subgraphName": subgraph.name })) + .await?; assert_eq!(None, resp.get("errors")); let statuses = &resp["data"]["statuses"]; assert_eq!(1, statuses.as_array().unwrap().len()); @@ -922,7 +962,7 @@ async fn test_poi_for_failed_subgraph(ctx: TestContext) -> anyhow::Result<()> { "blockNumber": block_number, "blockHash": status.latest_block["hash"], }); - let resp = subgraph.index_with_vars(FETCH_POI, vars).await?; + let resp = Subgraph::query_with_vars(FETCH_POI, vars).await?; assert_eq!(None, resp.get("errors")); assert!(resp["data"]["proofOfIndexing"].is_string()); Ok(()) @@ -960,6 +1000,25 @@ async fn test_multiple_subgraph_datasources(ctx: TestContext) -> anyhow::Result< Ok(()) } +async fn wait_for_blockchain_block(block_number: i32) -> bool { + // Wait up to 5 minutes for the expected block to appear + const STATUS_WAIT: Duration = Duration::from_secs(300); + const REQUEST_REPEATING: Duration = time::Duration::from_secs(1); + let start = Instant::now(); + while start.elapsed() < STATUS_WAIT { + let latest_block = Contract::latest_block().await; + if let Some(latest_block) = latest_block { + if let Some(number) = latest_block.number { + if number >= block_number.into() { + return true; + } + } + } + tokio::time::sleep(REQUEST_REPEATING).await; + } + false +} + /// The main test entrypoint. #[tokio::test] async fn integration_tests() -> anyhow::Result<()> { @@ -981,6 +1040,7 @@ async fn integration_tests() -> anyhow::Result<()> { TestCase::new("timestamp", test_timestamp), TestCase::new("ethereum-api-tests", test_eth_api), TestCase::new("topic-filter", test_topic_filters), + TestCase::new_with_grafting("grafted", test_subgraph_grafting, "base"), TestCase::new_with_source_subgraph( "subgraph-data-sources", subgraph_data_sources, @@ -1003,6 +1063,11 @@ async fn integration_tests() -> anyhow::Result<()> { cases }; + // Here we wait for a block in the blockchain in order not to influence + // block hashes for all the blocks until the end of the grafting tests. + // Currently the last used block for grafting test is the block 3. + assert!(wait_for_blockchain_block(SUBGRAPH_LAST_GRAFTING_BLOCK).await); + let contracts = Contract::deploy_all().await?; status!("setup", "Resetting database"); diff --git a/tests/tests/runner_tests.rs b/tests/tests/runner_tests.rs index ac645884b5d..261c886dfea 100644 --- a/tests/tests/runner_tests.rs +++ b/tests/tests/runner_tests.rs @@ -12,7 +12,7 @@ use graph::data::store::scalar::Bytes; use graph::data::subgraph::schema::{SubgraphError, SubgraphHealth}; use graph::data::value::Word; use graph::data_source::CausalityRegion; -use graph::env::EnvVars; +use graph::env::{EnvVars, TEST_WITH_NO_REORG}; use graph::ipfs; use graph::ipfs::test_utils::add_files_to_local_ipfs_node_for_testing; use graph::object; @@ -109,6 +109,8 @@ fn assert_eq_ignore_backtrace(err: &SubgraphError, expected: &SubgraphError) { #[tokio::test] async fn data_source_revert() -> anyhow::Result<()> { + *TEST_WITH_NO_REORG.lock().unwrap() = true; + let RunnerTestRecipe { stores, test_info } = RunnerTestRecipe::new("data_source_revert", "data-source-revert").await; @@ -179,6 +181,8 @@ async fn data_source_revert() -> anyhow::Result<()> { // since it uses the same deployment id. data_source_long_revert().await.unwrap(); + *TEST_WITH_NO_REORG.lock().unwrap() = false; + Ok(()) }