Compare commits

...

20 commits

Author SHA1 Message Date
d1b08f1b07 Remove old lints 2024-05-04 19:48:38 +02:00
34e665154d Improve tracing 2024-05-04 16:10:45 +02:00
3d0a05f3a9 Add tracing 2024-05-04 13:50:04 +02:00
8f2ea89301 Partial inbox handler 2024-05-03 23:44:01 +02:00
288c181cc9 make post federation work 2024-05-03 18:35:05 +02:00
f0d7d793ca Use LEGACY fetch mode for better compat 2024-05-03 17:16:59 +02:00
21b47409f1 More macros 2024-05-02 23:03:31 +02:00
edc21b4403 Improve error handling in server::api 2024-05-02 21:23:45 +02:00
564771931f Major refactor
* Reorganize the fetch component
* Organize the server code a little more
* Move verification to the server and clean it up
* Improve the error handling around the fetch code
2024-05-02 19:41:23 +02:00
8d350e8cd9 A whole bunch of different refactors 2024-05-02 19:41:12 +02:00
09cf289b75 [wip] remove space from signature header formatting 2024-04-30 00:09:53 +02:00
fc4e4595c2 [wip] YET MORE signatures cleanup and fixing 2024-04-29 23:36:57 +02:00
9eaad3d7bb [wip] http signatures refactor 2024-04-29 20:17:56 +02:00
9845603846 [wip] signatures refactor 2024-04-29 13:14:25 +02:00
c784966d20 Split fetch::signatures into its own file 2024-04-29 00:09:17 +02:00
37acb67aa5 Major cleanup
* Rename `fetch::keys` to `fetch::signatures`
* Clean up the public api of `fetch::signatures`
* Switch from axum to hyper
* Add request signature validation (buggy, wip)
2024-04-28 23:40:37 +02:00
b91da3c4ab god forsaken http signatures 2024-04-27 22:01:28 +02:00
bb26926edb Serve actors by ID 2024-04-27 09:32:00 +02:00
7ea8938c49 Documented some stuff, improved follow request logic 2024-04-26 23:56:46 +02:00
29f90ad918 Store api overhaul 2024-04-24 23:18:19 +02:00
36 changed files with 5450 additions and 1356 deletions

493
Cargo.lock generated
View file

@ -89,78 +89,12 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "async-trait"
version = "0.1.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.60",
]
[[package]]
name = "autocfg"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80"
[[package]]
name = "axum"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
dependencies = [
"async-trait",
"axum-core",
"bytes",
"futures-util",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-util",
"itoa",
"matchit",
"memchr",
"mime",
"percent-encoding",
"pin-project-lite",
"rustversion",
"serde",
"serde_json",
"serde_path_to_error",
"serde_urlencoded",
"sync_wrapper 1.0.1",
"tokio",
"tower",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "axum-core"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3"
dependencies = [
"async-trait",
"bytes",
"futures-util",
"http",
"http-body",
"http-body-util",
"mime",
"pin-project-lite",
"rustversion",
"sync_wrapper 0.1.2",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "backtrace"
version = "0.3.71"
@ -176,18 +110,18 @@ dependencies = [
"rustc-demangle",
]
[[package]]
name = "base64"
version = "0.21.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
[[package]]
name = "base64"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51"
[[package]]
name = "base64ct"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]]
name = "bincode"
version = "2.0.0-rc.3"
@ -239,12 +173,27 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "bumpalo"
version = "3.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "bytes"
version = "1.6.0"
@ -381,6 +330,12 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "const-oid"
version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
[[package]]
name = "convert_case"
version = "0.4.0"
@ -403,6 +358,25 @@ version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
[[package]]
name = "cpufeatures"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504"
dependencies = [
"libc",
]
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "csv"
version = "1.3.0"
@ -424,6 +398,17 @@ dependencies = [
"memchr",
]
[[package]]
name = "der"
version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f55bf8e7b65898637379c1b74eb1551107c8294ed26d855ceb9fd1a09cfc9bc0"
dependencies = [
"const-oid",
"pem-rfc7468",
"zeroize",
]
[[package]]
name = "derive_more"
version = "0.99.17"
@ -437,6 +422,17 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"const-oid",
"crypto-common",
]
[[package]]
name = "either"
version = "1.11.0"
@ -478,9 +474,18 @@ checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984"
name = "fetch"
version = "0.0.0"
dependencies = [
"base64",
"chrono",
"derive_more",
"http",
"http-body-util",
"pem",
"rand",
"reqwest",
"rsa",
"serde_json",
"sigh",
"spki",
"tracing",
]
[[package]]
@ -552,6 +557,16 @@ dependencies = [
"pin-utils",
]
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "getrandom"
version = "0.2.14"
@ -804,6 +819,9 @@ name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
dependencies = [
"spin",
]
[[package]]
name = "lazycell"
@ -827,6 +845,12 @@ dependencies = [
"windows-targets 0.52.5",
]
[[package]]
name = "libm"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
[[package]]
name = "librocksdb-sys"
version = "0.17.0+9.0.0"
@ -886,10 +910,14 @@ dependencies = [
]
[[package]]
name = "matchit"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
name = "macro"
version = "0.0.0"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.60",
]
[[package]]
name = "memchr"
@ -957,6 +985,53 @@ dependencies = [
"minimal-lexical",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
dependencies = [
"overload",
"winapi",
]
[[package]]
name = "num-bigint-dig"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151"
dependencies = [
"byteorder",
"lazy_static",
"libm",
"num-integer",
"num-iter",
"num-traits",
"rand",
"smallvec",
"zeroize",
]
[[package]]
name = "num-integer"
version = "0.1.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
dependencies = [
"num-traits",
]
[[package]]
name = "num-iter"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.18"
@ -964,6 +1039,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a"
dependencies = [
"autocfg",
"libm",
]
[[package]]
@ -1035,6 +1111,12 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "parking_lot"
version = "0.12.1"
@ -1058,6 +1140,25 @@ dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "pem"
version = "3.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e459365e590736a54c3fa561947c84837534b8e9af6fc5bf781307e82658fae"
dependencies = [
"base64",
"serde",
]
[[package]]
name = "pem-rfc7468"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412"
dependencies = [
"base64ct",
]
[[package]]
name = "percent-encoding"
version = "2.3.1"
@ -1096,6 +1197,27 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pkcs1"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f"
dependencies = [
"der",
"pkcs8",
"spki",
]
[[package]]
name = "pkcs8"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
dependencies = [
"der",
"spki",
]
[[package]]
name = "pkg-config"
version = "0.3.30"
@ -1124,14 +1246,21 @@ dependencies = [
"clap",
"cli-table",
"puppy",
"tokio",
]
[[package]]
name = "puppy"
version = "0.0.0"
dependencies = [
"bincode",
"chrono",
"derive_more",
"either",
"fetch",
"serde_json",
"store",
"tracing",
]
[[package]]
@ -1217,7 +1346,7 @@ version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e6cc1e89e689536eb5aeede61520e874df5a4707df811cd5da4aa5fbb2aae19"
dependencies = [
"base64 0.22.0",
"base64",
"bytes",
"encoding_rs",
"futures-core",
@ -1241,7 +1370,7 @@ dependencies = [
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper 0.1.2",
"sync_wrapper",
"system-configuration",
"tokio",
"tokio-native-tls",
@ -1262,6 +1391,27 @@ dependencies = [
"librocksdb-sys",
]
[[package]]
name = "rsa"
version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc"
dependencies = [
"const-oid",
"digest",
"num-bigint-dig",
"num-integer",
"num-traits",
"pkcs1",
"pkcs8",
"rand_core",
"sha2",
"signature",
"spki",
"subtle",
"zeroize",
]
[[package]]
name = "rustc-demangle"
version = "0.1.23"
@ -1302,7 +1452,7 @@ version = "2.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d"
dependencies = [
"base64 0.22.0",
"base64",
"rustls-pki-types",
]
@ -1312,12 +1462,6 @@ version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ecd36cc4259e3e4514335c4a138c6b43171a8d61d8f5c9348f9fc7529416f247"
[[package]]
name = "rustversion"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47"
[[package]]
name = "ryu"
version = "1.0.17"
@ -1399,16 +1543,6 @@ dependencies = [
"serde",
]
[[package]]
name = "serde_path_to_error"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6"
dependencies = [
"itoa",
"serde",
]
[[package]]
name = "serde_urlencoded"
version = "0.7.1"
@ -1425,9 +1559,37 @@ dependencies = [
name = "server"
version = "0.0.0"
dependencies = [
"axum",
"derive_more",
"http",
"http-body-util",
"hyper",
"hyper-util",
"puppy",
"serde_json",
"tokio",
"tracing",
"tracing-forest",
"tracing-subscriber",
]
[[package]]
name = "sha2"
version = "0.10.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
dependencies = [
"lazy_static",
]
[[package]]
@ -1436,19 +1598,6 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "sigh"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46bdb4cc44c46a3f0f0a6d1de27c63fccd7fa3384d8d370016c21c8f4a8b89a2"
dependencies = [
"base64 0.21.7",
"http",
"nom",
"openssl",
"thiserror",
]
[[package]]
name = "signal-hook-registry"
version = "1.4.1"
@ -1458,6 +1607,16 @@ dependencies = [
"libc",
]
[[package]]
name = "signature"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
dependencies = [
"digest",
"rand_core",
]
[[package]]
name = "slab"
version = "0.4.9"
@ -1483,6 +1642,22 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "spin"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
[[package]]
name = "spki"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d"
dependencies = [
"base64ct",
"der",
]
[[package]]
name = "store"
version = "0.0.0"
@ -1490,6 +1665,8 @@ dependencies = [
"bincode",
"chrono",
"derive_more",
"either",
"macro",
"rocksdb",
"tempfile",
"ulid",
@ -1501,6 +1678,12 @@ version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "subtle"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
[[package]]
name = "syn"
version = "1.0.109"
@ -1529,12 +1712,6 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
[[package]]
name = "sync_wrapper"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394"
[[package]]
name = "system-configuration"
version = "0.5.1"
@ -1597,6 +1774,16 @@ dependencies = [
"syn 2.0.60",
]
[[package]]
name = "thread_local"
version = "1.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "tinyvec"
version = "1.6.0"
@ -1702,9 +1889,21 @@ checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
dependencies = [
"log",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
]
[[package]]
name = "tracing-attributes"
version = "0.1.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.60",
]
[[package]]
name = "tracing-core"
version = "0.1.32"
@ -1712,6 +1911,44 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
dependencies = [
"once_cell",
"valuable",
]
[[package]]
name = "tracing-forest"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee40835db14ddd1e3ba414292272eddde9dad04d3d4b65509656414d1c42592f"
dependencies = [
"smallvec",
"thiserror",
"tracing",
"tracing-subscriber",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
dependencies = [
"nu-ansi-term",
"sharded-slab",
"smallvec",
"thread_local",
"tracing-core",
"tracing-log",
]
[[package]]
@ -1720,6 +1957,12 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "typenum"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "ulid"
version = "1.1.2"
@ -1775,12 +2018,24 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "valuable"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "virtue"
version = "0.0.13"
@ -2077,6 +2332,12 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "zeroize"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d"
[[package]]
name = "zstd-sys"
version = "2.0.10+zstd.1.5.6"

View file

@ -3,6 +3,7 @@ members = [
"lib/puppy",
"lib/store",
"lib/fetch",
"lib/macro",
"bin/server",
"bin/pupctl",
]

View file

@ -6,3 +6,4 @@ edition = "2021"
puppy = { path = "../../lib/puppy" }
clap = { version = "*", features = ["derive"] }
cli-table = "*"
tokio = { version = "*", features = ["full"] }

View file

@ -1,86 +1,115 @@
//! Control program for the ActivityPub federated social media server.
#![feature(iterator_try_collect)]
use puppy::{
store::{
self,
alias::Username,
arrow::{FollowRequested, Follows},
mixin::Profile,
Error,
},
tl::Post,
Bite, Key, Store,
actor::Actor,
config::Config,
data::{FollowRequest, Object, Profile},
post::Author,
Context,
};
fn main() -> store::Result<()> {
// Store::nuke(".state")?;
let db = Store::open(".state")?;
println!("creating actors");
let riley = get_or_create_actor(&db, "riley")?;
let linen = get_or_create_actor(&db, "linen")?;
if false {
println!("creating posts");
puppy::create_post(&db, riley, "@linen <3")?;
puppy::create_post(&db, linen, "@riley <3")?;
}
if false {
println!("making riley follow linen");
if !db.exists::<Follows>((riley, linen))? {
println!("follow relation does not exist yet");
if !db.exists::<FollowRequested>((riley, linen))? {
println!("no pending follow request; creating");
puppy::fr::create(&db, riley, linen)?;
} else {
println!("accepting the pending follow request");
puppy::fr::accept(&db, riley, linen)?;
}
} else {
println!("riley already follows linen");
#[tokio::main]
async fn main() -> puppy::Result<()> {
// puppy::store::Store::nuke(".state")?;
let config = Config {
ap_domain: "test.piss-on.me".to_string(),
wf_domain: "test.piss-on.me".to_string(),
state_dir: ".state".to_string(),
port: 1312,
};
let cx = Context::load(config)?;
let riley = get_or_create_actor(&cx, "riley")?;
cx.run(|tx| {
println!("\nRiley's following:");
for FollowRequest { id, origin, .. } in
riley.pending_requests(&tx).try_collect::<Vec<_>>()?
{
let Profile { account_name, .. } = tx.get_mixin(origin)?.unwrap();
let Object { id, .. } = tx.get_mixin(id)?.unwrap();
println!("- @{account_name} ({origin}) (request url = {id})");
}
Ok(())
})?;
// let post = puppy::post::create_post(&cx, riley.key, "i like boys")?;
// puppy::post::federate_post(&cx, post).await
// let linen = get_or_create_actor(&cx, "linen")?;
// if true {
// println!("creating posts");
// puppy::post::create_post(&cx, riley.key, "@linen <3")?;
// puppy::post::create_post(&cx, linen.key, "@riley <3")?;
// }
// if true {
// println!("making riley follow linen");
// cx.run(|tx| {
// if !riley.follows(&tx, &linen)? {
// println!("follow relation does not exist yet");
// if let Some(req) = linen
// .pending_requests(&tx)
// .find_ok(|r| r.origin == riley.key)?
// {
// println!("accepting the pending follow request");
// linen.do_accept_request(&cx, req)
// } else {
// println!("no pending follow request; creating");
// riley.do_follow_request(&cx, &linen).map(|_| ())
// }
// } else {
// println!("riley already follows linen");
// Ok(())
// }
// })?;
// }
println!("\nPosts on the instance:");
for post in puppy::post::fetch_timeline(cx.store(), .., None)?.posts() {
let Author { ref handle, .. } = post.author;
let content = post.content.content.as_ref().unwrap();
println!("- {:?} by {handle}:\n{content}", post.id)
}
println!("Posts on the instance:");
for Post {
id,
content,
author,
} in puppy::tl::fetch_all(&db)?
{
let (_, Profile { account_name, .. }) = db.lookup(author)?;
let content = content.content.unwrap();
println!("- {id} by @{account_name} ({author}):\n{content}",)
}
println!("Linen's followers:");
for id in puppy::fr::followers_of(&db, linen)? {
let (_, Profile { account_name, .. }) = db.lookup(id)?;
println!("- @{account_name} ({id})");
}
println!("Riley's following:");
for id in puppy::fr::following_of(&db, riley)? {
let (_, Profile { account_name, .. }) = db.lookup(id)?;
println!("- @{account_name} ({id})");
}
println!("Biting riley");
puppy::bite_actor(&db, linen, riley).unwrap();
for Bite { id, biter, .. } in puppy::bites_on(&db, riley).unwrap() {
let (_, Profile { account_name, .. }) = db.lookup(biter).unwrap();
println!("riley was bitten by @{account_name} at {}", id.timestamp());
}
store::OK
Ok(())
// cx.run(|tx| {
// println!("\nLinen's followers:");
// for id in linen.followers(&tx).try_collect::<Vec<_>>()? {
// let Profile { account_name, .. } = db.get_mixin(id)?.unwrap();
// println!("- @{account_name} ({id})");
// }
// println!("\nRiley's following:");
// for id in riley.following(&tx).try_collect::<Vec<_>>()? {
// let Profile { account_name, .. } = db.get_mixin(id)?.unwrap();
// println!("- @{account_name} ({id})");
// }
// if false {
// println!("Biting riley");
// linen.do_bite(&cx, &riley)?;
// for Bite { id, biter, .. } in riley.bites_suffered(&tx).try_collect::<Vec<_>>()? {
// let Profile { account_name, .. } = db.get_mixin(biter)?.unwrap();
// println!("riley was bitten by @{account_name} at {}", id.timestamp());
// }
// }
// Ok(())
// })
}
fn get_or_create_actor(db: &Store, username: &str) -> Result<Key, Error> {
let user = db.translate::<Username>(username);
fn get_or_create_actor(cx: &Context, username: &str) -> puppy::Result<Actor> {
let user = cx.run(|tx| Actor::by_username(tx, username))?;
match user {
Ok(key) => {
println!("found '{username}' ({key})");
Some(key) => {
println!("found '{username}' ({key:?})");
Ok(key)
}
Err(Error::Missing) => {
None => {
println!("'{username}' doesn't exist yet, creating");
let r = puppy::create_actor(&db, username);
let r = puppy::actor::create_local(cx, username);
if let Ok(ref key) = r {
println!("created '{username}' with key {key}");
println!("created '{username}' with key {key:?}");
}
r
}
Err(e) => Err(e),
}
}

View file

@ -4,5 +4,13 @@ edition = "2021"
[dependencies]
puppy = { path = "../../lib/puppy" }
hyper = { version = "*", features = ["full"] }
tokio = { version = "*", features = ["full"] }
axum = "*"
http-body-util = "*"
hyper-util = { version = "*", features = ["full"] }
serde_json = "*"
http = "*"
derive_more = "*"
tracing = "*"
tracing-subscriber = "*"
tracing-forest = "*"

310
bin/server/src/api.rs Normal file
View file

@ -0,0 +1,310 @@
//! API endpoints and request handlers.
use std::convert::Infallible;
use std::net::SocketAddr;
use std::sync::Arc;
use http_body_util::{BodyExt as _, Full};
use hyper::body::Bytes;
use hyper::server::conn::http1;
use hyper::service::service_fn;
use hyper_util::rt::TokioIo;
use hyper::Method;
use puppy::Context;
use serde_json::{json, Value};
use tokio::net::TcpListener;
use tracing::{error, info, info_span, trace_span, Instrument as _};
use crate::sig::{Signer, Verdict, Verifier, VERIFIER_MOUNT};
use self::error::Message;
// A simple macro for returning an error message.
macro_rules! fuck {
($code:literal: $($arg:tt)*) => {
return Err(crate::api::error::Message {
status: $code,
error: format!($($arg)*),
detail: None,
})
};
}
// Makes a response.
macro_rules! respond {
($($arg:tt)*) => {
crate::api::Resp {
$($arg)*,
.. crate::api::Resp::default()
}.into()
};
}
/// Parameters for a response
struct Resp<'s> {
body: Option<&'s Value>,
kind: &'s str,
code: u16,
}
impl<'s> Default for Resp<'s> {
fn default() -> Self {
Resp {
body: None,
kind: "application/json",
code: 200,
}
}
}
impl<'a> From<Resp<'a>> for Response {
fn from(Resp { body, kind, code }: Resp<'_>) -> Response {
let resp = Response::<()>::builder()
.status(code)
.header("content-type", kind);
resp.body(match body {
Some(data) => Full::new(serde_json::to_vec(&data).unwrap().into()),
None => Full::new(Bytes::default()),
})
.unwrap()
}
}
pub mod ap;
pub mod wf;
type Request = hyper::Request<hyper::body::Incoming>;
type Response<T = Full<Bytes>> = hyper::Response<T>;
/// Initialize the http server loop.
pub async fn start(context: Context) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let addr = SocketAddr::from(([127, 0, 0, 1], context.config().port));
let listener = TcpListener::bind(addr).await?;
let verifier = Arc::new(Verifier::load(context.config()));
loop {
let (stream, _) = listener.accept().await?;
let io = TokioIo::new(stream);
let cx = context.clone();
let verifier = verifier.clone();
tokio::spawn(async move {
let service = service_fn(|req| {
let user_agent = req
.headers()
.get("user-agent")
.and_then(|h| h.to_str().ok());
let span = info_span!(
"request",
target = format!("{} {}", req.method().as_str(), req.uri().to_string()),
"user-agent" = user_agent,
);
handle(req, &verifier, cx.clone()).instrument(span)
});
if let Err(err) = http1::Builder::new().serve_connection(io, service).await {
error!("Error serving connection: {:?}", err);
}
});
}
}
// A parsed HTTP request for easy handling.
struct Req<'a> {
method: &'a Method,
body: Bytes,
// The content-types in the accept header
accept: Vec<&'a str>,
// URI bits
params: Vec<(&'a str, &'a str)>,
path: Vec<&'a str>,
}
impl Req<'_> {
/// Get the path segments (non-empty parts of the path string separated by the '/' character).
fn path(&self) -> &[&str] {
&self.path
}
/// Turn an HTTP request into a more simple form so we can process it more easily.
fn simplify<'x>(r: &'x http::Request<Bytes>) -> Req<'x> {
let path: Vec<&str> = r
.uri()
.path()
.split('/')
.filter(|s| !s.is_empty())
.collect();
let params: Vec<(&str, &str)> = r
.uri()
.query()
.into_iter()
.flat_map(|s| s.split('&'))
.filter_map(|s| s.split_once('='))
.collect();
let accept = r
.headers()
.iter()
.find_map(|(k, v)| (k == "accept").then_some(v))
.and_then(|val| val.to_str().ok())
.iter()
.flat_map(|s| s.split(' '))
.filter(|s| !s.is_empty())
.collect();
Req {
method: r.method(),
body: r.body().clone(),
accept,
params,
path,
}
}
}
/// The request handler.
async fn handle(req: Request, verifier: &Verifier, cx: Context) -> Result<Response, Infallible> {
// We need to fetch the entire body of the request for signature validation, because that involves making
// a digest of the request body in some cases.
// TODO: defer loading the body until it is needed.
let request = {
let (req, body) = req.into_parts();
let Ok(body) = body.collect().await.map(|b| b.to_bytes()) else {
panic!("could not collect body!");
};
http::Request::from_parts(req, body)
};
// Simplified representation of a request, so we can pattern match on it more easily in the dispatchers.
let req = Req::simplify(&request);
// We'll use the path to pick where specifically to send the request.
// Check request signature at the door. Even if it isn't needed for a particular endpoint, failing fast
// with a clear error message will save anyone trying to get *their* signatures implementation a major
// headache.
let res = match verifier.verify(&request).await {
// If the request was signed and the signature was accepted, they can access the protected endpoints.
Verdict::Verified(sig) => dispatch_signed(cx, &verifier, &req, sig).await,
// Unsigned requests can see a smaller subset of endpoints, most notably the verification actor.
Verdict::Unsigned => dispatch_public(cx, &verifier, &req).await,
// If a signature was provided *but it turned out to be unverifiable*, show them the error message.
Verdict::Rejected { reason, signature_str } => Err(Message {
error: String::from("signature verification failed for request"),
status: 403,
detail: Some(json!({
"signature": signature_str,
"reason": reason,
})),
}),
};
// If one of the endpoints gave us an error message, we convert that into a response and then
// serve it to the client. In either case, we just serve a response.
let response = res.unwrap_or_else(|msg| {
info!("{}: {msg}", msg.status);
req.error(msg)
});
Ok(response)
}
const POST: &Method = &Method::POST;
const GET: &Method = &Method::GET;
/// Handle a signed and verified request.
///
/// This function is where all requests to a protected endpoint have to go through. If the request
/// was signed but does not target a protected endpoint, this function will fall back to the
/// [`dispatch_public`] handler.
#[tracing::instrument(level = "DEBUG", target = "router", skip_all)]
async fn dispatch_signed(
cx: Context,
verifier: &Verifier,
req: &Req<'_>,
sig: Signer,
) -> Result<Response, Message> {
match (req.method, req.path()) {
// Viewing ActivityPub objects requires a signed request, i.e. "authorized fetch".
// The one exception for this is `/s/request-verifier`, which is where the request
// verification actor lives.
(GET, ["o", ulid]) => ap::serve_object(&cx, ulid),
// POSTs to an actor's inbox need to be signed to prevent impersonation.
(POST, ["o", ulid, "inbox"]) => ap::inbox(&cx, ulid, sig, &req.body).await,
// Try the resources for which no signature is required as well.
_ => dispatch_public(cx, verifier, req).await,
}
}
/// Dispatch `req` to an unprotected endpoint. If the requested path does not exist, the
/// function will return a 404 response. If the path *does* exist, but the signature is not
/// valid, they will also get a 404.
#[tracing::instrument(level = "DEBUG", target = "router", skip_all)]
async fn dispatch_public(
cx: Context,
verifier: &Verifier,
req: &Req<'_>,
) -> Result<Response, Message> {
match (req.method, req.path()) {
(GET, ["proxy"]) => ap::proxy(&cx, &req.params).await,
(GET, ["outbox"]) => ap::outbox(&cx, &req.params).await,
(GET, [".well-known", "webfinger"]) => wf::resolve(&cx, &req.params),
// TODO: nicer solution for this
(GET, VERIFIER_MOUNT) => Ok(ap::serve_verifier_actor(&verifier)),
_ => fuck!(404: "not found"),
}
}
mod error {
//! Pre-baked error responses.
use serde_json::{json, Value};
use super::Response;
/// An error message shown to an end user of the API.
#[derive(Debug)]
pub struct Message {
/// The main error message.
pub error: String,
/// Only shown if the `accept` header included json.
pub detail: Option<Value>,
/// The status code for the response.
pub status: u16,
}
impl std::fmt::Display for Message {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.error.fmt(f)
}
}
impl super::Req<'_> {
/// Generate an error response for the request.
pub fn error(&self, err: Message) -> Response {
let resp = Response::<()>::builder().status(err.status);
// If the accept header wants json, we will give them a nice structured error
// message. Otherwise, we throw a short bit of text at them.
if self.accepts_json() {
let json = json!({
"error": err.error,
"details": err.detail,
});
let body = serde_json::to_vec_pretty(&json).unwrap();
resp.header("content-type", "application/json")
.body(body.try_into().unwrap())
.unwrap()
} else {
resp.header("content-type", "text/plain")
.body(err.error.try_into().unwrap())
.unwrap()
}
}
/// Check whether the requester wants json from us.
pub fn accepts_json(&self) -> bool {
fn is_json((k, v): (&str, &str)) -> bool {
k == "application" && v.split('+').any(|p| p == "json")
}
self.accept
.iter()
.filter_map(|s| s.split_once('/'))
.any(is_json)
}
}
}

119
bin/server/src/api/ap.rs Normal file
View file

@ -0,0 +1,119 @@
//! ActivityPub handlers.
use http_body_util::Full;
use hyper::body::Bytes;
use puppy::{
actor::{get_signing_key, Actor},
fetch::object::Activity,
get_local_ap_object, Context, Error, Key,
};
use serde_json::Value;
use tracing::{info, instrument};
use crate::sig::{Signer, Verifier};
use super::{error::Message, Response};
/// Proxy a request through the instance.
pub async fn proxy(cx: &Context, params: &[(&str, &str)]) -> Result<Response, Message> {
// Extract our query parameters.
let Some(user) = params.iter().find_map(|(k, v)| (*k == "user").then_some(v)) else {
fuck!(400: "expected `user` query param");
};
let Some(url) = params.iter().find_map(|(k, v)| (*k == "url").then_some(v)) else {
fuck!(400: "expected `url` query param");
};
// Look up the actor's key in the store (which is accessible through the puppy context).
let Ok(signing_key) = cx.run(|tx| {
let actor = Actor::by_username(&tx, user)?.unwrap();
get_signing_key(tx, actor).map_err(Error::from)
}) else {
fuck!(500: "failed to get signing key");
};
eprintln!("proxy: params: {params:?}");
// Proxy the request through our fetcher.
let resp = puppy::fetch::forward(&signing_key, url).await.unwrap();
eprintln!("proxy: status = {}", resp.status());
// Convert the http-types request to a hyper request.
Ok(resp.map(Bytes::from).map(Full::new).into())
}
pub async fn outbox(cx: &Context, params: &[(&str, &str)]) -> Result<Response, Message> {
// Extract our query parameters.
let Some(user) = params.iter().find_map(|(k, v)| (*k == "user").then_some(v)) else {
fuck!(400: "expected `user` query param");
};
let Some(content) = params
.iter()
.find_map(|(k, v)| (*k == "content").then_some(v))
else {
fuck!(400: "expected `url` query param");
};
let Ok(Some(actor)) = cx.run(|tx| Actor::by_username(&tx, user)) else {
fuck!(500: "failed actor by name {user}");
};
let post = puppy::post::create_local_post(&cx, actor.key, content.to_string()).unwrap();
puppy::post::federate_post(&cx, post).await.unwrap();
Ok(respond! {
code: 200
})
}
/// Handle POSTs to actor inboxes. Requires request signature.
#[instrument(skip_all)]
pub async fn inbox(
cx: &Context,
actor_id: &str,
sig: Signer,
body: &[u8],
) -> Result<Response, Message> {
let receiver = actor_id.parse::<Key>().unwrap();
let json: Value = serde_json::from_slice(body).unwrap();
let id = json["id"].to_string();
info! {
inbox = receiver.to_string(),
signature = sig.ap_id,
"processing object '{id}'",
};
match Activity::from_json(json) {
Ok(activity) => {
puppy::ingest(&cx, receiver, &activity).await.unwrap();
match puppy::interpret(&cx, activity) {
Ok(_) => Ok(respond!(code: 202)),
Err(err) => fuck!(400: "error interpreting activity: {err}"),
}
}
Err(err) => fuck!(400: "invalid payload: {err}"),
}
}
/// Serve an ActivityPub object as json-ld.
pub fn serve_object(cx: &Context, object_ulid: &str) -> Result<Response, Message> {
let Ok(parsed) = object_ulid.parse::<Key>() else {
fuck!(400: "improperly formatted ulid");
};
let result = cx.run(|tx| get_local_ap_object(&tx, parsed));
let Ok(object) = result else {
fuck!(404: "object does not exist");
};
Ok(respond! {
kind: "application/activity+json",
body: Some(&object.to_json_ld())
})
}
/// Serve the special actor used for signing requests.
pub fn serve_verifier_actor(verifier: &Verifier) -> Response {
respond! {
kind: "application/activity+json",
body: Some(&verifier.to_json_ld())
}
}

72
bin/server/src/api/wf.rs Normal file
View file

@ -0,0 +1,72 @@
//! WebFinger endpoints and related stuff.
use puppy::{
data::{Id, Username},
Context,
};
use serde_json::{json, Value};
use derive_more::Display;
use super::{error::Message, Response};
const WF_CONTENT_TYPE: (&str, &str) = ("content-type", "application/jrd+json");
/// Respond to a webfinger request.
pub fn resolve(cx: &Context, params: &[(&str, &str)]) -> Result<Response, Message> {
match params.iter().find_map(get_handle) {
Some(handle) if cx.config().wf_domain == handle.instance => {
let username = Username(handle.username.to_string());
let Ok(Some(user)) = cx.store().lookup(username) else {
fuck!(404: "no user {}@{} exists", handle.username, handle.instance);
};
let Ok(Some(Id(id))) = cx.store().get_alias(user) else {
fuck!(500: "internal error");
};
let jrd = make_jrd(handle, &id);
Ok(respond! {
body: Some(&jrd),
kind: "application/jrd+json"
})
}
Some(_) | None => fuck!(400: "missing/invalid resource param"),
}
}
#[derive(Clone, Copy, Display)]
#[display(fmt = "@{username}@{instance}")]
pub struct Handle<'x> {
username: &'x str,
instance: &'x str,
}
/// Parse the `resource` parameter into a [`Handle`].
fn get_handle<'x>((k, v): &'x (&str, &str)) -> Option<Handle<'x>> {
// We're looking for the `resource` query parameter.
if *k == "resource" {
// This prefix needs to exist according to spec.
let (username, instance) = v
.strip_prefix("acct:")?
// Some implementations may prefix with `@`. its ok if it's there and its also ok
// if its not there, so we use `trim_start_matches` instead of `strip_prefix`.
.trim_start_matches('@')
// Split on the middle `@` symbol, which separates the username and instance bits
.split_once('@')?;
Some(Handle { username, instance })
} else {
None
}
}
/// Construct a "JSON resource descriptor".
fn make_jrd(handle: Handle<'_>, id: &str) -> Value {
json!({
"subject": format!("acct:{}@{}", handle.username, handle.instance),
"links": [
{
"rel": "self",
"type": "application/activity+json",
"href": id
},
]
})
}

View file

@ -1,8 +1,39 @@
use axum::{routing::get, Router};
//! The ActivityPuppy social media server.
//!
//! This crate contains the implementation of the ActivityPuppy's server binary. Also see the library,
//! [`puppy`], and the other two major components: [`store`] for persistence and [`fetch`] for the
//! federation implementation.
//!
//! [`store`]: puppy::store
//! [`fetch`]: puppy::fetch
#![feature(try_blocks, yeet_expr)]
use puppy::{config::Config, Context};
use tracing::Level;
use tracing_forest::ForestLayer;
use tracing_subscriber::{
filter::filter_fn, layer::SubscriberExt as _, util::SubscriberInitExt as _, Registry,
};
mod sig;
mod api;
/// Starts up the whole shebang.
#[tokio::main]
async fn main() {
let app = Router::new().route("/", get(|| async { "Hello, World!" }));
let sock = tokio::net::TcpListener::bind("0.0.0.0:1312").await.unwrap();
axum::serve(sock, app).await.unwrap();
Registry::default()
.with(filter_fn(|meta| !meta.target().starts_with("reqwest")))
.with(filter_fn(|meta| *meta.level() < Level::DEBUG))
.with(ForestLayer::default())
.init();
// TODO: load the config from a file or something.
let config = Config {
ap_domain: "test.piss-on.me".to_string(),
wf_domain: "test.piss-on.me".to_string(),
state_dir: ".state".to_string(),
port: 1312,
};
let context = Context::load(config).unwrap();
// Start the web server
api::start(context).await.unwrap();
}

164
bin/server/src/sig.rs Normal file
View file

@ -0,0 +1,164 @@
//! Verification of HTTP signatures.
use http::Request;
use puppy::config::Config;
use puppy::fetch::{
signatures::{Private, Public, Signature, SigningKey, VerificationKey, Key},
FetchError,
};
use serde_json::{json, Value};
use tracing::{debug, info, trace};
/// Checks request signatures.
#[derive(Clone)]
pub struct Verifier {
actor_id: String,
key_id: String,
private: Private,
public: Public,
}
const VERIFIER_PATH: &str = "/s/request-verifier";
/// The path at which the request verification actor will present itself.
pub const VERIFIER_MOUNT: &[&str] = &["s", "request-verifier"];
/// A "verdict" about a signed request, passed by a [`Verifier`].
#[derive(Debug)]
pub enum Verdict {
/// The signature checks out.
Verified(Signer),
/// The signature does not contain a signature header. This may be intentional, or a client error.
Unsigned,
/// The signature failed to verify due to an error related to the signature itself.
Rejected {
signature_str: String,
reason: String,
},
}
impl Verifier {
/// Get the JSON-LD representation of the verifier actor.
pub fn to_json_ld(&self) -> Value {
json!({
"@context": [
"https://www.w3.org/ns/activitystreams",
"https://w3id.org/security/v1",
],
"id": self.actor_id,
"name": "Public key fetcher",
"publicKey": {
"id": self.key_id,
"owner": self.actor_id,
"publicKeyPem": self.public.encode_pem()
},
"type": "Service",
})
}
/// Load the server's verifier actor.
///
/// Each server has one special actor for fetching public keys. Unlike all other objects,
/// acquiring that actor's JSON-LD representation does not require a request signature.
///
/// It doesn't have any data in the data store. Due to its exceptional nature, we just put
/// the private key in the [`state_dir`][Config::state_dir]. The very first time you load
/// the verifier, it generates the required private keys.
pub fn load(cfg: &Config) -> Verifier {
let Config { ap_domain, state_dir, .. } = cfg;
let key_path = format!("{state_dir}/fetcher.pem");
// Read the private key from the state directory, or generate a new one if it couldn't
// be read.
let private = Private::load(&key_path).unwrap_or_else(|| {
let (private, _) = Private::gen();
private.save(key_path);
private
});
Verifier {
actor_id: format!("https://{ap_domain}{VERIFIER_PATH}"),
key_id: format!("https://{ap_domain}{VERIFIER_PATH}#sig-key"),
public: private.get_public(),
private,
}
}
/// Does the HTTP signature verification process, and returns a "proof" of the signature in the form
/// of the [`Signer`], which contains information about who signed a particular request.
#[tracing::instrument(level = "DEBUG", skip_all)]
pub async fn verify<B>(&self, req: &Request<B>) -> Verdict {
// TODO: implement the whole verification thing as a middleware so we can intercept requests
// like these, instead of coupling this tightly with the router.
if req.uri().path() == VERIFIER_PATH {
// HACK: Allow access to the request verifier actor without checking the signature.
debug!("allowing request to verifier to pass without checking signature");
return Verdict::Unsigned;
}
let Some(header) = req.headers().get("signature") else {
debug!("request not signed");
return Verdict::Unsigned;
};
let signature_str = header
.to_str()
.expect("signature header value should be valid ascii")
.to_string();
let sig = match Signature::derive(&req).map_err(|e| e.to_string()) {
Err(reason) => {
info!(reason, signature_str, "invalid signature");
return Verdict::Rejected { signature_str, reason };
}
Ok(signature) => {
trace!("signature parsed");
signature
}
};
// Fetch the signer's public key using our private key.
let fetch_result = self.fetch_public_key(sig.key_id()).await;
let public_key = match fetch_result {
Ok(public_key) => public_key,
Err(err) => {
info!(reason = err.to_string(), "failed to fetch pubkey");
return Verdict::Rejected {
reason: format!("could not fetch public key: {err}"),
signature_str,
};
}
};
// TODO: verify digest also
if let Err(error) = public_key.verify(&sig) {
info!(reason = error, "rejected");
Verdict::Rejected { signature_str, reason: error }
} else {
debug!(key_owner = public_key.owner, "accepted");
Verdict::Verified(Signer { ap_id: public_key.owner })
}
}
/// Send a request to get the public key from an ID. This request will be signed with the
/// verifier actor's public key.
#[tracing::instrument(level = "TRACE", skip_all)]
async fn fetch_public_key(&self, uri: &str) -> Result<VerificationKey, FetchError> {
let json = puppy::fetch::resolve(&self.signing_key(), uri).await?;
let Some(key) = Key::from_json(json) else {
return Err(FetchError::BadJson(
"invalid public key structure".to_string(),
));
};
Ok(key.upgrade())
}
/// Get the key that the verification actor signs requests with.
fn signing_key(&self) -> SigningKey {
Key {
id: self.key_id.clone(),
owner: self.actor_id.clone(),
inner: self.private.clone(),
}
}
}
/// An ActivityPub actor that signed a request.
#[derive(Debug)]
pub struct Signer {
/// The ActivityPub ID (a URL) of the signer of the request.
pub ap_id: String,
}

View file

@ -6,6 +6,15 @@ edition = "2021"
path = "src/lib.rs"
[dependencies]
reqwest = "*"
sigh = "*"
serde_json = "*"
reqwest = { version = "*", features = ["json"] }
serde_json = "*"
derive_more = "*"
http = "*"
chrono = "*"
base64 = "*"
rsa = { version = "*", features = ["sha2"] }
spki = "*"
http-body-util = "*"
rand = "*"
pem = "*"
tracing = "*"

207
lib/fetch/src/client.rs Normal file
View file

@ -0,0 +1,207 @@
use chrono::Utc;
use http::Method;
use http_body_util::BodyExt as _;
use reqwest::Body;
use serde_json::{error, Value};
use derive_more::Display;
use tracing::{debug, error, info, instrument, warn};
use crate::{
object::Activity,
signatures::{SigningKey, Options},
FetchError,
};
/// The name of the server software, used for generating the user agent string.
///
/// See also [`VERSION`].
pub const SOFTWARE: &str = "ActivityPuppy";
/// The current version of the server software, which is incorporated into the user agent string
/// for all outbound requests made by ActivityPuppy.
pub const VERSION: &str = "0.0.1-dev";
/// Content-type/accept header for ActivityPub requests.
pub const ACTIVITYPUB_TYPE: &str = "application/activity+json";
/// A client for sending ActivityPub and WebFinger requests with.
#[derive(Clone)]
pub struct Client {
inner: reqwest::Client,
}
impl Client {
/// Constructs a new federation client.
pub fn new() -> Client {
Client { inner: reqwest::Client::new() }
}
/// Deliver an [`Activity`] to a particular `inbox`.
///
/// Note that in order for the request to be considered valid by most implementations, `key.owner`
/// must equal `payload.actor`.
#[instrument(target = "fetch.delivery", skip_all, fields(activity = payload.id, url = inbox, key = key.id))]
pub async fn deliver(&self, key: &SigningKey, payload: &Activity, inbox: &str) {
let system = Subsystem::Delivery;
let body = serde_json::to_string(&payload.to_json_ld()).unwrap();
let mut req = system
.new_request(inbox)
.unwrap()
.method(Method::POST)
.header("content-type", ACTIVITYPUB_TYPE)
.body(body)
.unwrap();
key.sign_with_digest(Options::LEGACY, &mut req)
.map_err(FetchError::Sig)
.expect("signature generation to work")
.commit(&mut req);
let request = req.map(Body::from).try_into().unwrap();
self.inner.execute(request).await.unwrap();
}
/// A high-level function to resolve a single ActivityPub ID using a signed request.
#[instrument(target = "fetch.resolver", skip_all, fields(url = url, key = key.id))]
pub async fn resolve(&self, key: &SigningKey, url: &str) -> Result<Value, FetchError> {
let system = Subsystem::Resolver;
let mut req = system
.new_request(url)?
.header("accept", ACTIVITYPUB_TYPE)
.body(())
.unwrap();
match key.sign(Options::LEGACY, &req) {
Ok(signature) => signature.commit(&mut req),
Err(error) => {
// This shouldn't happen during normal operation
warn!("failed to sign request: {error}");
return Err(FetchError::Sig(error));
}
}
let request = req.map(|()| Body::default()).try_into()?;
let response = self.inner.execute(request).await?;
if response.status().is_success() {
debug!("resolution successful");
response.json().await.map_err(From::from)
} else {
let status = response.status().as_u16();
let body = response.text().await?;
debug!(status, "resolution failed: {body}");
Err(FetchError::NotSuccess {
url: url.to_string(),
status,
body,
})
}
}
/// Forwards a request and returns the raw response, so that it can be analyzed for debugging.
///
/// It exists solely as a debugging tool!
#[instrument(target = "fetch.devproxy", skip_all, fields(url, key = key.id))]
pub async fn proxy(
&self,
key: &SigningKey,
url: &str,
) -> Result<http::Response<String>, FetchError> {
let system = Subsystem::DevProxy;
let mut req = system
.new_request(url)?
.header("accept", ACTIVITYPUB_TYPE)
.body(())
.unwrap();
key.sign(Options::LEGACY, &req)
.expect("signing error")
.commit(&mut req);
let resp = self
.inner
.execute(req.map(|_| Body::default()).try_into().unwrap())
.await?;
let http_resp: http::Response<reqwest::Body> = resp.into();
let (res, body) = http_resp.into_parts();
let body = body.collect().await.unwrap().to_bytes();
let http_resp =
http::Response::from_parts(res, String::from_utf8_lossy(body.as_ref()).into_owned());
Ok(http_resp)
}
}
/// Identifies a specific subsystem that makes an outgoing request.
///
/// This allows us to precisely track each outgoing request, as well as generate a meaningful
/// user-agent header. It is also used to generate a "base request".
#[derive(Clone, Copy, Display)]
enum Subsystem {
/// The subsystem that dereferences ActivityPub URLs to JSON values.
///
/// In addition, the resolver is used for resolving webfinger handles to ActivityPub actors.
#[display(fmt = "resolver")]
Resolver,
/// The subsystem responsible for delivering activities to inboxes.
#[display(fmt = "delivery")]
Delivery,
/// For testing the resolver and signatures.
#[display(fmt = "devproxy")]
DevProxy,
}
impl Subsystem {
/// Get the user agent string for the subsystem.
fn user_agent(&self) -> String {
format!("{SOFTWARE}/{VERSION} [{}]", self.as_str())
}
/// Get a str representation of this subsystem.
#[inline]
const fn as_str(self) -> &'static str {
match self {
Subsystem::Resolver => "resolver",
Subsystem::Delivery => "delivery",
Subsystem::DevProxy => "devproxy",
}
}
/// Construct a new request for this subsystem.
///
/// This will set the following headers, which are common to all requests made by the fetch
/// system:
///
/// - `user-agent`, which depends on the particular subsystem in use
/// - `date`, which is generated from the current time
/// - `host`, which is derived from `target`
///
/// This function returns an error if the `target` is not a valid URI. It panics if the URI
/// does not have a host specified.
fn new_request(self, target: &str) -> Result<http::request::Builder, FetchError> {
// Format our time like "Sun, 06 Nov 1994 08:49:37 GMT"
const RFC_822: &str = "%a, %d %b %Y %H:%M:%S GMT";
let date = Utc::now().format(RFC_822).to_string();
let uri = target
.parse::<http::Uri>()
.map_err(|e| FetchError::InvalidURI {
url: target.to_string(),
error: e.to_string(),
})?;
let Some(host) = uri.host() else {
// SECURITY: Refuse to resolve URLs to local resources using local keys.
error!(target: "security", "refusing to resolve a relative URL: {target}");
return Err(FetchError::InvalidURI {
url: target.to_string(),
error: "Relative URI".to_string(),
});
};
let req = http::Request::builder()
.uri(target)
.header("user-agent", self.user_agent())
.header("date", date)
.header("host", host);
Ok(req)
}
}

View file

@ -0,0 +1,88 @@
#![feature(iter_intersperse, yeet_expr, iterator_try_collect, try_blocks)]
use std::error::Error;
use derive_more::Display;
use serde_json::Value;
use object::Activity;
use signatures::SigningKey;
pub use http;
pub mod signatures;
pub mod object;
pub use client::Client;
mod client;
/// Deliver an activity to an inbox.
pub async fn deliver(key: &SigningKey, activity: &Activity, inbox: &str) {
Client::new().deliver(key, &activity, inbox).await
}
/// Resolve an ActivityPub ID to a JSON value.
///
/// Note: This creates a new [`Client`] every time you call it, so if you're gonna call it more than just
/// a couple of times, create a `Client` and call its inherent methods instead.
pub async fn resolve(key: &SigningKey, target: &str) -> Result<Value, FetchError> {
Client::new().resolve(key, target).await
}
/// Proxy a GET request through this server.
///
/// Should only be used for manually testing stuff.
pub async fn forward(key: &SigningKey, target: &str) -> Result<http::Response<String>, FetchError> {
Client::new().proxy(key, target).await
}
/// Errors that may occur during the execution of HTTP request routines.
#[derive(Debug, Display)]
pub enum FetchError {
/// Some error internal to the request sending process occurred.
#[display(fmt = "internal error: {error} (url={url:?})")]
Internal { url: Option<String>, error: String },
/// The URI was not valid and therefore the request could not be made.
#[display(fmt = "invalid uri: {error} (url={url})")]
InvalidURI { url: String, error: String },
/// A non-success status code was encountered.
#[display(fmt = "non-2xx status code: {status} (url={url})")]
NotSuccess {
status: u16,
url: String,
body: String,
},
/// The JSON body of a response could not be loaded. The string inside is the error
/// message produced by the JSON deserializer.
#[display(fmt = "deserialization error: {}", self.0)]
BadJson(String),
/// A JSON-LD document could not be deserialized because it does not conform to our expectations.
#[display(fmt = "parsing error: {}", self.0)]
BadObject(String),
/// An error that occurred while generating a signature for a a request.
#[display(fmt = "signing error: {}", self.0)]
Sig(String),
}
impl FetchError {
/// Check whether the error is due to a 403 UNAUTHORIZED response status code.
pub fn is_unauthorized(&self) -> bool {
matches!(self, FetchError::NotSuccess { status: 403, .. })
}
/// Check whether the error is due to a 404 NOT FOUND response status code.
pub fn is_not_found(&self) -> bool {
matches!(self, FetchError::NotSuccess { status: 404, .. })
}
}
#[doc(hidden)]
impl From<reqwest::Error> for FetchError {
fn from(error: reqwest::Error) -> FetchError {
match error.source().and_then(|e| e.downcast_ref()) {
Some(e @ serde_json::Error { .. }) => FetchError::BadJson(e.to_string()),
None => {
let url = error.url().map(|u| u.to_string());
FetchError::Internal { url, error: error.to_string() }
}
}
}
}

222
lib/fetch/src/object.rs Normal file
View file

@ -0,0 +1,222 @@
//! ActivityPub vocabulary as interpreted by ActivityPuppy.
use serde_json::{json, Value};
use derive_more::From;
pub use crate::signatures::Key as PublicKey;
#[derive(Debug)]
pub struct Activity<T = String> {
pub id: String,
pub actor: String,
pub object: Box<Object>,
pub kind: T,
}
impl<K> Activity<K> {
pub fn to_json_ld(&self) -> Value
where
K: ToString,
{
json!({
"@context": [
"https://www.w3.org/ns/activitystreams",
{ "Bite": "https://ns.mia.jetzt/as#Bite" },
],
"id": self.id.to_string(),
"actor": self.actor.to_string(),
"object": self.object.to_json_ld(),
"type": self.kind.to_string(),
})
}
}
impl Activity {
pub fn from_json(mut json: Value) -> Result<Activity, String> {
let Some(map) = json.as_object() else {
do yeet "expected an object"
};
let Some(id) = map.get("id").and_then(|s| s.as_str()).map(str::to_owned) else {
do yeet "missing `id` property"
};
let Some(actor) = map.get("actor").and_then(|s| s.as_str()).map(str::to_owned) else {
do yeet format!("missing `actor` property for activity {id}")
};
let Some(kind) = map.get("type").and_then(|s| s.as_str()).map(str::to_owned) else {
do yeet format!("missing `type` property for activity {id}")
};
// TODO: make this behave gracefully when we only get an ID.
let Some(object) = json
.get_mut("object")
.map(Value::take)
.map(Object::from_json)
.transpose()?
.map(Box::new)
else {
do yeet format!("missing or invalid `object` property for activity {id}")
};
Ok(Activity { id, actor, object, kind })
}
}
/// An actor is an entity capable of producing Takes.
#[derive(Debug)]
pub struct Actor {
/// The URL pointing to this object.
pub id: String,
/// Where others should send activities.
pub inbox: String,
/// Note: this maps to the `preferredUsername` property.
pub account_name: String,
/// Note: this maps to the `name` property.
pub display_name: Option<String>,
/// Public counterpart to the signing key used to sign activities
/// generated by the actor.
pub public_key: PublicKey,
}
impl Actor {
pub fn to_json_ld(&self) -> Value {
json!({
"@context": [
"https://www.w3.org/ns/activitystreams",
"https://w3id.org/security/v1",
],
"id": self.id.to_string(),
"inbox": self.inbox.to_string(),
"outbox": self.inbox.to_string().replace("inbox", "outbox"),
"preferredUsername": self.account_name,
"name": self.display_name,
"type": "Person",
"publicKey": {
"id": self.public_key.id,
"publicKeyPem": self.public_key.inner,
"owner": self.id.to_string(),
}
})
}
pub fn from_json(json: Value) -> Result<Actor, String> {
let Value::Object(map) = json else {
do yeet format!("expected json object")
};
Ok(Actor {
id: map
.get("id")
.ok_or("id is required")?
.as_str()
.ok_or("id must be a str")?
.to_string(),
inbox: map
.get("inbox")
.ok_or("inbox is required")?
.as_str()
.ok_or("inbox must be a str")?
.to_string(),
account_name: map
.get("preferredUsername")
.ok_or("preferredUsername is required")?
.as_str()
.ok_or("preferredUsername must be a str")?
.to_string(),
display_name: map.get("name").and_then(|v| v.as_str()).map(str::to_owned),
public_key: map
.get("publicKey")
.cloned()
.and_then(PublicKey::from_json)
.ok_or("publicKey property could not be parsed")?,
})
}
}
#[derive(From, Debug)]
pub enum Object {
#[from(ignore)]
Id {
id: String,
},
Activity(Activity),
Actor(Actor),
Note(Note),
}
impl Object {
pub fn id(&self) -> &str {
match self {
Object::Activity(a) => &a.id,
Object::Actor(a) => &a.id,
Object::Note(n) => &n.id,
Object::Id { id } => id,
}
}
pub fn from_json(json: Value) -> Result<Object, String> {
if let Value::String(id) = json {
Ok(Object::Id { id })
} else if let Value::Object(ref map) = json {
match map.get("type").and_then(Value::as_str) {
Some("System" | "Application" | "Person" | "Service") => {
Actor::from_json(json).map(Object::Actor)
}
Some("Create" | "Follow" | "Accept" | "Reject" | "Bite") => {
Activity::from_json(json).map(Object::Activity)
}
Some(kind) => Ok(Object::Note(Note {
id: map
.get("id")
.ok_or("id is required")?
.as_str()
.ok_or("id must be a str")?
.to_string(),
kind: kind.to_string(),
author: map
.get("attributedTo")
.ok_or("attributedTo is required")?
.as_str()
.ok_or("attributedTo must be a str")?
.to_string(),
content: map
.get("content")
.and_then(|v| v.as_str())
.map(str::to_owned),
summary: map
.get("summary")
.and_then(|v| v.as_str())
.map(str::to_owned),
})),
None => do yeet "could not determine type of object",
}
} else {
Err(format!("expected a json object or an id, got {json:#?}"))
}
}
pub fn to_json_ld(&self) -> Value {
match self {
Object::Id { id } => json!(id),
Object::Activity(a) => a.to_json_ld(),
Object::Actor(a) => a.to_json_ld(),
Object::Note(Note {
id,
kind,
content,
summary,
author,
}) => json!({
"to": [
"https://www.w3.org/ns/activitystreams#Public",
],
"id": id.to_string(),
"type": kind,
"attributedTo": author,
"content": content,
"summary": summary,
}),
}
}
}
#[derive(Debug)]
pub struct Note {
pub id: String,
pub author: String,
pub content: Option<String>,
pub summary: Option<String>,
pub kind: String,
}

735
lib/fetch/src/signatures.rs Normal file
View file

@ -0,0 +1,735 @@
//! Containment zone for the funny math that doesn't make much sense to puppy.
//!
//! This module provides ActivityPuppy's HTTP signatures implementation. The state of HTTP signatures implementations
//! is, to put it mildly, *een fucking kutzooi*. For historical reasons, no one implements it *exactly* right (much
//! like URI parsers). This implementation aims to be as broadly compatible as possible.
//!
//! The only non-deprecated [`Algorithm`] is [`"hs2019"`][HS2019], but not everyone implements it, because the initial
//! round of implementations of the spec were based on a draft, and [`"rsa-sha256"`][RSA_SHA256] is kinda the de facto
//! standard.
//!
//! # Behavior
//!
//! By default, puppy will sign with `algorithm="hs2019"` (using `(created)` and `(expires)` pseudo-headers), and retry
//! in legacy mode (using `algorithm="rsa-sha256"` with `date` header) if the signature gets rejected.
//!
//! Currently, `"hs2019"` is treated as equivalent to `"rsa-sha256"` for verification purposes. Support for elliptic
//! curve keys is planned, but not a priority.
//!
//! # Links
//!
//! More information about http signatures:
//!
//! - <https://swicg.github.io/activitypub-http-signature>
//! - <https://datatracker.ietf.org/doc/html/draft-cavage-http-signatures>
use std::path::Path;
use chrono::{DateTime, TimeDelta, Utc};
use http::{HeaderValue, Request};
use rsa::{
pkcs8::{
DecodePrivateKey, DecodePublicKey, EncodePrivateKey as _, EncodePublicKey as _, LineEnding,
},
sha2::Sha256,
signature::{SignatureEncoding as _, Signer as _, Verifier as _},
RsaPrivateKey,
};
use serde_json::{Map, Value};
use self::new::{decode, encode, sha256, IR};
/// Size of the RSA private keys puppy generates.
const KEY_SIZE: usize = 2048;
/// A key that can be used to verify a request signature.
pub type VerificationKey = Key<Public>;
/// A key that can be used to sign a request.
pub type SigningKey = Key<Private>;
/// A key used for authorized fetch.
///
/// It comes in several flavors:
///
/// - `Key` (`K` = [`String`]): PEM-encoded, can be turned into a JSON object.
/// - [`VerificationKey`] (`K` = [`Public`]): used as an input in the request signature validation process.
/// - [`SigningKey`] (`K` = [`Private`]): used as an input in the generation of a signed request.
#[derive(Debug)]
pub struct Key<K = String> {
/// The `"id"` property of the public key, which should equal the `keyId` part of a signature.
pub id: String,
/// The `"owner"` property.
pub owner: String,
/// Maps to the `"publicKeyPem"` property of an actor's `"publicKey"` when (de)serializing, and when the
/// key is used for doing signatures.
pub inner: K,
}
impl Key {
/// Tries to find the PEM-encoded public key from the result of fetching a key id.
pub fn from_json(json: Value) -> Option<Key> {
// First, we try the object itself.
json.as_object().and_then(Key::from_map).or_else(|| {
// Because of how mastodon deals with pubkey resolution, most implementations will serve the whole actor
// object instead of just the key, so we try that first, because it is the de facto standard.
json.get("publicKey")?.as_object().and_then(Key::from_map)
})
}
/// Try to interpret the given map as a public key.
fn from_map(map: &Map<String, Value>) -> Option<Key> {
Some(Key {
id: map.get("id")?.as_str().map(str::to_owned)?,
owner: map.get("owner")?.as_str().map(str::to_owned)?,
inner: map.get("publicKeyPem")?.as_str().map(str::to_owned)?,
})
}
/// "Upgrade" a pem-encoded public key to a key that can actually be used for requests.
///
/// The inverse of this is [`Key::serialize`], which turns `inner` back into a string.
///
/// [`Key::serialize`]: Key::<Public>::serialize
pub fn upgrade(self) -> Key<Public> {
let inner = Public::decode_pem(&self.inner);
Key {
id: self.id,
owner: self.owner,
inner,
}
}
}
/// A key that can be used to generate signatures.
#[derive(Clone)]
pub struct Private(rsa::RsaPrivateKey);
impl Private {
/// Generate a new keypair.
pub fn gen() -> (Private, Public) {
let mut rng = rand::thread_rng();
let private = RsaPrivateKey::new(&mut rng, KEY_SIZE).unwrap();
let public = private.to_public_key();
(Private(private), Public(public))
}
/// Get the public counterpart to this key.
pub fn get_public(&self) -> Public {
Public(self.0.to_public_key())
}
/// Load a private key from a file on disk.
pub fn load(path: impl AsRef<Path>) -> Option<Private> {
use rsa::pkcs8::DecodePrivateKey;
let path = path.as_ref();
DecodePrivateKey::read_pkcs8_pem_file(path)
.map(Private)
.ok()
}
/// Store the private key at `path`.
pub fn save(&self, path: impl AsRef<Path>) {
use rsa::pkcs8::EncodePrivateKey;
self.0
.write_pkcs8_pem_file(path, LineEnding::default())
.expect("writing a private key to a file should not fail")
}
/// PEM-encode the key PKCS#8 style.
pub fn encode_pem(&self) -> String {
self.0
.to_pkcs8_pem(LineEnding::default())
.unwrap()
.to_string()
}
/// Decode the key from a PKCS#8 PEM-encoded string.
pub fn decode_pem(pkcs8_pem: &str) -> Private {
DecodePrivateKey::from_pkcs8_pem(&pkcs8_pem)
.map(Private)
.unwrap()
}
}
/// A key that can be used to verify signatures.
#[derive(Clone)]
pub struct Public(rsa::RsaPublicKey);
impl Public {
/// PEM-encode the public key in accordance with PKCS#8.
pub fn encode_pem(&self) -> String {
self.0
.to_public_key_pem(LineEnding::default())
.unwrap()
.to_string()
}
/// Decode a PKCS#8 PEM-encoded public key from a string.
pub fn decode_pem(pkcs8_pem: &str) -> Public {
let doc = pem::parse(pkcs8_pem).unwrap();
<rsa::RsaPublicKey as DecodePublicKey>::from_public_key_der(doc.contents())
.map(Public)
.unwrap()
}
}
impl std::fmt::Debug for Public {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.encode_pem().fmt(f)
}
}
impl SigningKey {
/// Create a signature for `req` using the given options.
pub fn sign<T>(&self, opt: Options, req: &Request<T>) -> Result<Signature, String> {
IR::partial(&req, opt, |ir| ir.signed(self)?.to_signature())
}
/// Create a signature for `req` using the given algorithm, and calculate and attach the `digest` header to
/// the request (if it doesn't already have one).
///
/// This is required by most implementations when POSTing to an inbox.
pub fn sign_with_digest<T>(
&self,
opt: Options,
req: &mut Request<T>,
) -> Result<Signature, String>
where
T: AsRef<[u8]>,
{
// Calculate and insert digest if it isn't there yet, otherwise do nothing.
let digest = format!("sha-256={}", encode(sha256(req.body())));
req.headers_mut()
.entry("digest")
.or_insert_with(|| digest.try_into().unwrap());
self.sign(opt, req)
}
}
impl VerificationKey {
/// Test the signature against three requirements:
///
/// 1. The signature must not be expired.
/// 2. The signature's `keyId` must be the same as `self`'s.
/// 3. The `signed_str` must have been signed by the private counterpart of `self`.
pub fn verify(&self, sig: &Signature) -> Result<(), String> {
if sig.is_expired_at(Utc::now()) {
do yeet format!("Signature is expired: (deadline was {})", sig.expires());
}
if sig.key_id() != &self.id {
do yeet format! {
"Mismatched key id; signature's key id is '{}', while presented key has '{}'",
sig.key_id(),
self.id
};
}
if !sig.was_signed_by(&self.inner)? {
do yeet "Signature was not generated by the presented key's private counterpart";
}
Ok(())
}
/// Encode a verification key so that it can be presented in a json.
pub fn serialize(self) -> Key {
let public_key_pem = self.inner.encode_pem();
Key {
id: self.id,
owner: self.owner,
inner: public_key_pem,
}
}
}
/// The algorithm to sign with.
///
/// Your two options are:
///
/// - [`hs2019`][HS2019], the *correct* option
/// - [`"rsa-sha256"`][RSA_SHA256], the most compatible option
#[derive(PartialEq, Debug, Clone, Copy)]
pub struct Algorithm(&'static str);
/// `hs2019`, the only non-deprecated HTTP signatures algorithm.
pub const HS2019: Algorithm = Algorithm("hs2019");
/// The HTTP signatures algorithm everyone uses, `rsa-sha256`.
pub const RSA_SHA256: Algorithm = Algorithm("rsa-sha256");
/// A signature derived from an [`http::Request`].
#[derive(Debug)]
pub struct Signature {
key_id: String,
components: Vec<(String, String)>,
time_range: (DateTime<Utc>, DateTime<Utc>),
target_str: String,
signed_str: String,
signature: String,
algorithm: Algorithm,
}
impl Signature {
/// Attempt to extract a signature from a request.
pub fn derive<T>(req: &Request<T>) -> Result<Signature, String> {
new::with_ir(req, Options::MODERN, |ir| ir.to_signature())
}
/// Obtain the key id for the signature.
pub fn key_id(&self) -> &str {
&self.key_id
}
/// Get the time the signature was created. This information is extracted from the `(created)`
/// pseudo-header if it is defined, and the `date` http header otherwise.
pub fn created(&self) -> DateTime<Utc> {
self.time_range.0
}
/// If specified, get the `(expires)` header, otherwise get the creation time + the configured grace window.
pub fn expires(&self) -> DateTime<Utc> {
self.time_range.1
}
/// Retrieve the algorithm used for the signature.
pub fn algorithm(&self) -> Algorithm {
self.algorithm
}
/// Attach `self` to `req` as the `signature` header.
pub fn commit<T>(self, req: &mut Request<T>) {
req.headers_mut().insert("signature", self.make_header());
}
/// Determine whether `self` was signed by the private counterpart of `key`.
pub fn was_signed_by(&self, key: &Public) -> Result<bool, String> {
use rsa::pkcs1v15::{VerifyingKey, Signature};
let Public(inner) = key.clone();
let key = VerifyingKey::<Sha256>::new(inner);
let raw_buf = decode(&self.signature)?;
let Ok(sig) = Signature::try_from(raw_buf.as_slice()) else {
do yeet "Failed to construct signature from decoded signature";
};
key.verify(self.signed_str.as_bytes(), &sig)
.map_err(|s| format!("{s:?}"))?;
Ok(true)
}
/// Check whether the given `time` falls within the window for valid signatures.
pub fn is_expired_at(&self, time: DateTime<Utc>) -> bool {
!(self.created()..self.expires()).contains(&time)
}
/// Turn the signature into an HTTP header value.
fn make_header(self) -> HeaderValue {
IR::<&str>::from_signature(&self)
.to_header()
.try_into()
.unwrap()
}
}
/// `rsa-sha256` is created using an rsa key and a sha256 hash.
fn sign_rsa_sha256(signing_string: &str, key: &Private) -> Result<Vec<u8>, String> {
use rsa::pkcs1v15::SigningKey;
let Private(rsa) = key.clone();
let key = SigningKey::<Sha256>::new(rsa);
let buf = key.sign(signing_string.as_bytes()).to_vec();
Ok(buf)
}
/// Maximum time difference between the creation time of the signature and the current time before the
/// signature will be rejected. This is a measure to increase the difficulty of a replay attack.
const EXPIRY_WINDOW: TimeDelta = TimeDelta::minutes(5);
/// Configuration for the behavior of the signing and verification routines.
///
/// This struct is non-exhaustive.
#[derive(Clone, Copy)]
pub struct Options {
/// Whether to use the `(created)` and `(expires)`. If `false`, the `date` header is used instead.
///
/// Defaults to `true`.
pub use_created: bool,
/// Quirk for older mastodon versions, which don't incorporate the query string into the signing
/// string during verification.
///
/// Defaults to `false`.
pub strip_query: bool,
/// For how long the signature is valid.
///
/// For signing, this only has an effect if `use_created` is also set. For verification, this is only
/// used if the `(expires)` pseudo-header is not present.
///
/// Defaults to 5 minutes.
pub expires_after: TimeDelta,
/// Which signature algorithm to use.
///
/// Defaults to [`"hs2019"`][super::HS2019].
pub algorithm: Algorithm,
}
impl Options {
/// Use hs2019 with the `(created)` pseudo-header.
pub const MODERN: Options = Options {
use_created: true,
strip_query: false,
expires_after: EXPIRY_WINDOW,
algorithm: HS2019,
};
/// Use rsa-sha256 with the `date` header.
pub const LEGACY: Options = Options {
use_created: false,
algorithm: RSA_SHA256,
..Options::MODERN
};
}
impl Default for Options {
fn default() -> Self {
Options {
use_created: true,
strip_query: false,
expires_after: EXPIRY_WINDOW,
algorithm: HS2019,
}
}
}
mod new {
use base64::prelude::*;
use chrono::{DateTime, Utc};
use http::{Method, Request};
use rsa::sha2::{Digest, Sha256};
use super::{sign_rsa_sha256, Options, Signature, SigningKey, EXPIRY_WINDOW, HS2019, RSA_SHA256};
/// Calculate the SHA256 hash of something.
pub fn sha256(buf: impl AsRef<[u8]>) -> Vec<u8> {
<Sha256 as Digest>::digest(buf.as_ref()).to_vec()
}
/// Base64-encode something.
pub fn encode(buf: impl AsRef<[u8]>) -> String {
BASE64_STANDARD.encode(buf.as_ref())
}
/// Base64-decode something.
pub fn decode(buf: &str) -> Result<Vec<u8>, String> {
BASE64_STANDARD.decode(buf).map_err(|e| e.to_string())
}
pub struct IR<'s, S = &'s str> {
target: &'s str,
inputs: Vec<(&'s str, &'s str)>,
alg: &'s str,
key: S,
sig: S,
}
/// Allocates a new [`IR`] for doing signature operations with.
pub fn with_ir<T, U>(
req: &Request<T>,
opt: Options,
f: impl FnOnce(IR<'_>) -> Result<U, String>,
) -> Result<U, String> {
let target = &format_target(&req, opt);
let map = make_header_map(&req);
let Some(header) = get(&map, "signature") else {
do yeet "Missing required `signature` header";
};
let (inputs, key, alg, sig) = parse_header(header, &target, &map)?;
f(IR { target, inputs, key, alg, sig })
}
impl<S> IR<'_, S> {
/// Create an HTTP header from the IR.
pub fn to_header(&self) -> String
where
S: AsRef<str>,
{
format_header(&self.inputs, self.key.as_ref(), self.alg, self.sig.as_ref())
}
/// Validate and upgrade the IR to a structured signature.
pub fn to_signature(&self) -> Result<Signature, String>
where
S: ToString,
{
let times: Result<_, String> = try {
let date = get(&self.inputs, "date").map(from_rfc822).transpose()?;
let created = get(&self.inputs, "(created)").map(from_secs).transpose()?;
let expires = get(&self.inputs, "(expires)").map(from_secs).transpose()?;
(date, created, expires)
};
let (date, created, expires) =
times.map_err(|e: String| format!("Failed to parse time: {e}"))?;
let (created, expires) = match (created, expires) {
(Some(created), None) => (created, created + EXPIRY_WINDOW),
(Some(created), Some(expires)) => (created, expires),
(None, _) => {
let Some(date) = date else {
do yeet "Cannot determine validity window";
};
(date, date + EXPIRY_WINDOW)
}
};
let algorithm = match self.alg {
"rsa-sha256" => RSA_SHA256,
"hs2019" => HS2019,
a => do yeet format!("Unsupported algorithm {a}"),
};
let signed_str = make_signing_string(&self.inputs);
let components = self
.inputs
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect();
Ok(Signature {
key_id: self.key.to_string(),
signature: self.sig.to_string(),
time_range: (created, expires),
target_str: self.target.to_string(),
components,
signed_str,
algorithm,
})
}
/// Create an IR from a signature.
pub fn from_signature<'s>(sig: &'s Signature) -> IR<'s> {
IR {
target: &sig.target_str,
inputs: sig
.components
.iter()
.map(|(a, b)| (a.as_str(), b.as_str()))
.collect(),
key: &sig.key_id,
alg: sig.algorithm.0,
sig: &sig.signature,
}
}
/// Create a signing string.
pub fn to_signing_str(&self) -> String {
make_signing_string(&self.inputs)
}
}
impl<'s> IR<'s, ()> {
/// Create a partial, unsigned IR.
pub fn partial<'r, T, U>(
req: &'r Request<T>,
opt: Options,
f: impl FnOnce(IR<'_, ()>) -> Result<U, String>,
) -> Result<U, String> {
let map = make_header_map(req);
let digest = req.method() == Method::POST;
let expires_after = opt.use_created.then_some(opt.expires_after);
let created = Utc::now();
let expires = created + expires_after.unwrap_or(EXPIRY_WINDOW);
let target = &format_target(&req, opt);
let created = created.timestamp().to_string();
let expires = expires.timestamp().to_string();
// Association list mapping pseudo headers names to concrete values.
#[rustfmt::skip]
let pseudo = &[
("(request-target)", target.as_str()),
("(created)", created.as_str()),
("(expired)", expires.as_str()),
];
let inputs = match compute_inputs(&map, pseudo, opt, digest) {
Err(error) => do yeet format!("computing inputs: {error}"),
Ok(inputs) => inputs,
};
f(IR {
target,
inputs,
alg: opt.algorithm.0,
key: (),
sig: (),
})
}
/// Sign a partially constructed IR to make it actually useful.
pub fn signed(self, key: &'s SigningKey) -> Result<IR<'s, String>, String> {
let sig_str = self.to_signing_str();
let signature = match sign_rsa_sha256(&sig_str, &key.inner).map(encode) {
Err(error) => do yeet format!("RSA error: {error}"),
Ok(signature) => signature,
};
Ok(IR {
target: self.target,
inputs: self.inputs,
alg: self.alg,
key: key.id.to_string(),
sig: signature,
})
}
}
/// With the given options and headers, compute a set of headers and pseudo-headers that (in order) are to be
/// turned into the signing string.
fn compute_inputs<'a>(
headers: &[(&'a str, &'a str)],
pseudo: &[(&'a str, &'a str)],
opt: Options,
use_digest: bool,
) -> Result<Vec<(&'a str, &'a str)>, String> {
// List of input names that we want. Pseudo-headers are ordered before normal headers.
let needed = ["(request-target)"]
.into_iter()
.chain(if opt.use_created {
vec!["(created)", "(expired)"]
} else {
vec!["date"]
})
.chain(use_digest.then_some("digest"))
.chain(["host"]);
let assoc = |k| {
get(headers, k)
.or_else(|| get(&pseudo, k))
.ok_or_else(|| format!("Missing (pseudo)header `{k}`"))
.map(|v| (k, v))
};
needed.map(assoc).try_collect()
}
/// Allocate a `(request-target)` buffer.
fn format_target<T>(req: &Request<T>, opt: Options) -> String {
let path = if opt.strip_query {
req.uri().path()
} else {
req.uri()
.path_and_query()
.map(|r| r.as_str())
.unwrap_or_else(|| req.uri().path())
};
let method = req.method().as_str().to_ascii_lowercase();
format!("{method} {path}")
}
fn format_header(inputs: &[(&str, &str)], key: &str, alg: &str, sig: &str) -> String {
// Format all the headers in the order that we used them in the signing string.
let headers: String = inputs
.iter()
.map(|(k, _)| k.as_ref())
.intersperse(" ")
.collect();
// Get the time-based parameters, if they exist.
let created = get(inputs, "(created)").map(|v| ("created", v));
let expires = get(inputs, "(expires)").map(|v| ("expires", v));
// These parameters are always produced.
#[rustfmt::skip]
let table = [
("keyId", key),
("algorithm", alg),
("signature", sig),
("headers", &headers),
];
// Now we need to format the whole shebang.
table
.into_iter()
// `(created)` is part of a newer draft that not everyone implements.
.chain(created)
// `(expires)` is optional per the spec
.chain(expires)
// Step 1: all the values need to be surrounded by quotes
.map(|(k, v)| (k, format!(r#""{v}""#)))
// Step 2. join each pair together
.map(|(k, v)| format!("{k}={v}"))
// Step 3. comma separate everything
.intersperse(",".to_string())
// Step 4. fold the entire thing into one
.collect::<String>()
}
fn parse_header<'s>(
header: &'s str,
target: &'s str,
extra: &[(&'s str, &'s str)],
) -> Result<(Vec<(&'s str, &'s str)>, &'s str, &'s str, &'s str), String> {
// Parse the top-level table.
let table: Vec<(&str, &str)> = header
// Split into entries
.split(",")
// Split entries into key-value pairs
.filter_map(|pair| {
pair.trim_end_matches(' ') // QUIRK: akkoma does not put a space between entries
.split_once('=')
})
// Undo quoting of the values
.filter_map(|(k, v)| v.strip_prefix('"')?.strip_suffix('"').map(|v| (k, v)))
.collect();
let Some(headers) = get(&table, "headers") else {
do yeet "Missing `headers` field";
};
let Some(key) = get(&table, "keyId") else {
do yeet "Missing `keyId` field";
};
let Some(algorithm) = get(&table, "algorithm") else {
do yeet "Missing `algorithm` field";
};
let Some(signature) = get(&table, "signature") else {
do yeet "Missing `signature` field"
};
let inputs: Vec<(&str, &str)> = headers
// Headers and pseudo-headers are separated by spaces in the order in which they appear.
.split(' ')
// Map created and expires pseudo-headers to the ones specified in the inputs table.
.map(|k| match k {
"(request-target)" => Ok(("(request-target)", target)),
// If these exist, the table must have them, but other than that they're optional.
"(created)" => get(&table, "created")
.ok_or("`(created)` pseudo-header is listed, but does not exist".to_string())
.map(|v| ("(created)", v)),
"(expires)" => get(&table, "expires")
.ok_or("`(expires)` pseudo-header is listed, but does not exist".to_string())
.map(|v| ("(expires)", v)),
// For anything else, we don't have the required information, and we'll need access
// to the entire request in order to fill in the blanks.
k => get(&extra, k)
.ok_or(format!("header '{k}' is missing"))
.map(|v| (k, v)),
})
.try_collect()?;
Ok((inputs, key, algorithm, signature))
}
/// Make an association list associating header names to header values.
///
/// Allocates a new vector, but not any strings.
fn make_header_map<'r, T>(req: &'r Request<T>) -> Vec<(&'r str, &'r str)> {
req.headers()
.iter()
// Acquire string slices of every name-value pair.
.filter_map(|(k, v)| v.to_str().ok().map(|v| (k.as_str(), v)))
.collect()
}
/// Quick utility function to get stuff from an association list.
fn get<'x>(map: &[(&str, &'x str)], key: &str) -> Option<&'x str> {
map.iter()
.find_map(|(k, v)| k.eq_ignore_ascii_case(key).then_some(*v))
}
fn make_signing_string(data: &[(&str, &str)]) -> String {
data.iter()
// Each pair is separated by a colon and a space
.map(|(k, v)| format!("{k}: {v}"))
// Pairs must be separated by a newline
.intersperse("\n".to_string())
.collect()
}
fn from_secs(s: &str) -> Result<DateTime<Utc>, String> {
s.parse::<i64>().map_err(|e| e.to_string()).and_then(|t| {
let Some(time) = DateTime::from_timestamp(t, 0) else {
do yeet "Timestamp out of range";
};
Ok(time)
})
}
fn from_rfc822(s: &str) -> Result<DateTime<Utc>, String> {
DateTime::parse_from_rfc2822(s)
.map_err(|e| e.to_string())
.map(|time| time.to_utc())
}
}

13
lib/macro/Cargo.toml Normal file
View file

@ -0,0 +1,13 @@
[package]
name = "macro"
edition = "2021"
[lib]
path = "src/lib.rs"
proc-macro = true
[dependencies]
syn = { version = '2', features = ['full'] }
quote = '*'
proc-macro2 = '*'
heck = '*'

142
lib/macro/src/arrow.rs Normal file
View file

@ -0,0 +1,142 @@
use heck::AsKebabCase;
use proc_macro::TokenStream;
use quote::{quote, ToTokens};
use syn::{parse_macro_input, Data, DeriveInput, Field, Ident};
pub fn arrow(item: TokenStream) -> TokenStream {
let input = parse_macro_input!(item as DeriveInput);
let Data::Struct(structure) = input.data else {
panic!("Only structs are supported as arrows")
};
match structure.fields {
syn::Fields::Named(fields) => from_named(&input.ident, fields),
syn::Fields::Unnamed(f) if f.unnamed.len() == 1 => {
let first = f.unnamed.first().unwrap();
from_newtype(&input.ident, first)
}
_ => panic!(
"Only newtype structs and structs with named fields can have a derived arrow impl"
),
}
}
fn from_named(name: &Ident, fields: syn::FieldsNamed) -> TokenStream {
let (origin, target, identity) = extract_idents(fields);
match identity {
Some(id) => make_multi_arrow(name, origin, target, id),
None => make_basic_arrow(name, origin, target),
}
}
fn make_basic_arrow(name: &Ident, origin: Ident, target: Ident) -> TokenStream {
let spec = gen_spec(name);
TokenStream::from(quote! {
#spec
impl store::arrow::Arrow for #name {}
impl From<store::arrow::Basic> for #name {
fn from(v: store::arrow::Basic) -> #name {
#name {
#origin: v.origin,
#target: v.target,
}
}
}
impl From<#name> for store::arrow::Basic {
fn from(v: #name) -> store::arrow::Basic {
store::arrow::Basic {
origin: v.#origin,
target: v.#target,
}
}
}
})
}
fn make_multi_arrow(name: &Ident, origin: Ident, target: Ident, id: Ident) -> TokenStream {
let spec = gen_spec(name);
TokenStream::from(quote! {
#spec
impl store::arrow::Arrow for #name {
type Kind = store::arrow::Multi;
}
impl From<store::arrow::Multi> for #name {
fn from(v: store::arrow::Multi) -> #name {
#name {
#id: v.identity,
#origin: v.origin,
#target: v.target,
}
}
}
impl From<#name> for store::arrow::Multi {
fn from(v: #name) -> store::arrow::Multi {
store::arrow::Multi {
identity: v.#id,
origin: v.#origin,
target: v.#target,
}
}
}
})
}
fn extract_idents(fields: syn::FieldsNamed) -> (Ident, Ident, Option<Ident>) {
let origin = extract_ident("origin", &fields).unwrap();
let target = extract_ident("target", &fields).unwrap();
let id = extract_ident("identity", &fields);
(origin, target, id)
}
fn extract_ident(name: &str, fields: &syn::FieldsNamed) -> Option<Ident> {
// Prefer marked fields and default to correctly named fields.
fields
.named
.iter()
.find(|field| {
field
.attrs
.iter()
.filter_map(|attr| attr.meta.path().get_ident())
.any(|id| id == name)
})
.and_then(|f| f.ident.clone())
.or_else(|| {
fields
.named
.iter()
.filter_map(|f| f.ident.clone())
.find(|id| id == name)
})
}
fn gen_spec(name: &Ident) -> impl ToTokens {
let prefix = AsKebabCase(name.to_string());
let by_origin = format!("{prefix}/by-origin");
let by_target = format!("{prefix}/by-target");
quote! {
impl store::types::DataType for #name {
type Type = store::types::ArrowSpec;
const SPEC: Self::Type = store::types::ArrowSpec {
by_origin: store::types::Keyspace(#by_origin),
by_target: store::types::Keyspace(#by_target),
};
}
}
}
fn from_newtype(name: &Ident, field: &Field) -> TokenStream {
let spec = gen_spec(name);
let typ = &field.ty;
TokenStream::from(quote! {
#spec
impl store::arrow::Arrow for #name {
type Kind = #typ;
}
impl From<#typ> for #name {
fn from(v: #typ) -> #name { #name(v) }
}
impl From<#name> for #typ {
fn from(v: #name) -> #typ { v.0 }
}
})
}

73
lib/macro/src/lib.rs Normal file
View file

@ -0,0 +1,73 @@
use proc_macro::TokenStream;
mod arrow;
#[proc_macro_derive(Arrow, attributes(origin, target, identity))]
pub fn arrow(item: TokenStream) -> TokenStream {
arrow::arrow(item)
}
#[proc_macro_derive(Alias)]
pub fn alias(item: TokenStream) -> TokenStream {
let input = syn::parse_macro_input!(item as syn::DeriveInput);
let syn::Data::Struct(structure) = input.data else {
panic!("Only structs are supported as aliases")
};
match structure.fields {
syn::Fields::Unnamed(f) if f.unnamed.len() == 1 => {
let first = f.unnamed.first().unwrap();
make_alias_impl(&input.ident, first)
}
_ => panic!("Only string newtype structs are allowed as aliases"),
}
}
fn make_alias_impl(name: &syn::Ident, field: &syn::Field) -> TokenStream {
let typ = &field.ty;
let prefix = heck::AsKebabCase(name.to_string());
let keyspace = format!("{prefix}/keyspace");
let reversed = format!("{prefix}/reversed");
let spec = quote::quote! {
impl store::types::DataType for #name {
type Type = store::types::AliasSpec;
const SPEC: Self::Type = store::types::AliasSpec {
keyspace: store::types::Keyspace(#keyspace),
reversed: store::types::Keyspace(#reversed),
};
}
};
TokenStream::from(quote::quote! {
#spec
impl store::Alias for #name {}
impl AsRef<str> for #name {
fn as_ref(&self) -> &str { self.0.as_ref() }
}
impl From<#typ> for #name {
fn from(v: #typ) -> #name { #name(v) }
}
})
}
#[proc_macro_derive(Mixin, attributes(index))]
pub fn mixin(item: TokenStream) -> TokenStream {
let input = syn::parse_macro_input!(item as syn::DeriveInput);
let name = input.ident;
let prefix = heck::AsKebabCase(name.to_string());
let keyspace = format!("{prefix}/main");
let spec = quote::quote! {
impl store::types::DataType for #name {
type Type = store::types::MixinSpec;
const SPEC: Self::Type = store::types::MixinSpec {
keyspace: store::types::Keyspace(#keyspace),
};
}
};
TokenStream::from(quote::quote! {
#spec
impl store::Mixin for #name {}
})
}

View file

@ -8,3 +8,9 @@ path = "src/lib.rs"
[dependencies]
store = { path = "../store" }
fetch = { path = "../fetch" }
bincode = "2.0.0-rc.3"
chrono = "*"
either = "*"
derive_more = "*"
serde_json = "*"
tracing = "*"

54
lib/puppy/src/context.rs Normal file
View file

@ -0,0 +1,54 @@
use fetch::Client;
use store::{Key, Store, Transaction};
use crate::{config::Config, Result};
/// The context of a running ActivityPuppy.
///
/// This type provides access to the data store and configuration.
#[derive(Clone)]
pub struct Context {
config: Config,
client: Client,
store: Store,
}
impl Context {
/// Load the server context from the configuration.
pub fn load(config: Config) -> Result<Context> {
let store = Store::open(&config.state_dir, crate::data::schema())?;
let client = Client::new();
Ok(Context { config, store, client })
}
/// Do a data store [transaction][store::Transaction].
pub fn run<T>(&self, f: impl FnOnce(&Transaction<'_>) -> Result<T>) -> Result<T> {
self.store.run(f)
}
/// Access the store directly.
pub fn store(&self) -> &Store {
&self.store
}
/// Access the configuration.
pub fn config(&self) -> &Config {
&self.config
}
/// Create an ActivityPub object ID from a key.
pub fn mk_url(&self, key: Key) -> String {
format!("https://{}/o/{key}", self.config.ap_domain)
}
/// Access the federation client.
pub fn resolver(&self) -> &Client {
&self.client
}
}
/// Load a context for running tests in.
#[cfg(test)]
pub fn test_context<T>(
config: Config,
schema: store::types::Schema,
test: impl FnOnce(Context) -> Result<T>,
) -> Result<T> {
let client = Client::new();
Store::test(schema, |store| test(Context { config, store, client }))
}

215
lib/puppy/src/data.rs Normal file
View file

@ -0,0 +1,215 @@
//! Datas for the data store!
//!
//! This module contains the definitions for the data store.
use bincode::{Decode, Encode};
use derive_more::Display;
use store::{types::Schema, Alias, Arrow, Key, Mixin};
/// *Bites you*
#[derive(Arrow, Debug, PartialEq, Eq, Clone, Copy)]
pub struct Bite {
#[identity]
pub id: Key,
#[origin]
pub biter: Key,
#[target]
pub victim: Key,
}
/// Properties of ActivityPub objects.
#[derive(Mixin, Encode, Decode, Debug, Clone)]
pub struct Object {
/// The ActivityPub id of the object.
#[index]
pub id: Id,
/// What kind of object is it?
pub kind: ObjectKind,
/// Whether or not the object resides on this server or on another one.
pub local: bool,
}
/// Allows case analysis on the type of ActivityPub objects.
#[derive(Encode, Decode, Debug, Clone)]
pub enum ObjectKind {
Actor,
Activity(ActivityKind),
Notelike(String),
}
/// The type of an activity.
#[derive(Encode, Decode, Debug, Clone)]
pub enum ActivityKind {
/// Used for posting stuff!
Create = 0,
/// Represents a follow request.
Follow = 1,
/// Used to signal that a follow request was accepted.
Accept = 2,
/// Used to reject a follow request.
Reject = 3,
/// See [`bites`](crate::bites).
Bite = 4,
}
#[derive(Mixin, Encode, Decode, Debug, Clone)]
pub struct Channel {
pub inbox: String,
// TODO: add public key here
}
/// A predicate; `follower` "follows" `followed`.
#[derive(Arrow, Debug, PartialEq, Eq, Clone, Copy)]
pub struct Follows {
#[origin]
pub follower: Key,
#[target]
pub followed: Key,
}
/// An instance of a request from some `origin` user to follow a `target` user.
///
/// This should not be used to determine whether two actors are following each other. For that, use
/// [`Follows`], a basic arrow for exactly this purpose. *This* arrow is used to identify specific
/// instances of *requests*, and serves mostly as a historical reference and for synchronizing with
/// other servers.
///
/// Used to represent a `Follow` activity.
///
/// Mixins always present for the `id`:
///
/// - [`Status`], carrying the status of the request.
#[derive(Arrow, Debug, PartialEq, Eq, Clone, Copy)]
pub struct FollowRequest {
/// The unique ID of this particular request.
#[identity]
pub id: Key,
/// The "follower", the user that made the request.
pub origin: Key,
/// The one the request is made to.
pub target: Key,
}
/// The status of a [`FollowRequest`].
///
/// Valid state transitions:
///
/// ```text
/// ┌──────────────▶ Rejected
/// │
/// │
/// │
///
/// None ─────────▶ Pending ────────▶ Accepted
///
/// │ │
/// │ │
/// │ │
/// ▼ │
/// Withdrawn ◀────────────┘
/// ```
///
/// In addition, a follow request will be deleted if either endpoint is removed from the graph.
#[derive(Mixin, Encode, Decode, Eq, PartialEq, Clone)]
pub enum Status {
/// The follow request was previously pending or accepted, but since withdrawn.
///
/// This can happen when someone cancels their follow request or unfollows the target.
Withdrawn,
/// The follow request was accepted.
Accepted,
/// The follow request was denied.
Rejected,
/// The follow request is still under review.
Pending,
}
/// An ActivityPub ID, used to look up remote objects by their canonical URL.
#[derive(Alias, Encode, Decode, Clone, PartialEq, Eq, Debug, Hash, Display)]
pub struct Id(pub String);
/// A "profile" in the social media sense.
///
/// Contains all presentation information about someone making posts.
#[derive(Mixin, Encode, Decode, Debug, Clone)]
pub struct Profile {
/// How many posts has this user made?
pub post_count: usize,
/// The name used for the profile's handle.
#[index] // <- currently doesnt do anything but i have an idea
pub account_name: Username,
/// The name displayed above their posts.
pub display_name: Option<String>,
/// The "bio", a freeform "about me" field.
pub about_string: Option<String>,
/// Arbitrary custom metadata fields.
pub about_fields: Vec<(String, String)>,
}
/// A unique name for an actor that is part of their "handle".
#[derive(Alias, Encode, Decode, Clone, PartialEq, Eq, Debug, Hash, Display)]
pub struct Username(pub String);
/// The relation that `author` has constructed and published `object`.
#[derive(Arrow, Debug, PartialEq, Eq, Clone, Copy)]
pub struct AuthorOf {
#[origin]
pub author: Key,
#[target]
pub object: Key,
}
/// The contents of a post.
#[derive(Mixin, Encode, Decode, Debug, Clone, Default)]
pub struct Content {
/// Main post body.
pub content: Option<String>,
/// Content warning for the post.
pub warning: Option<String>,
}
/// A public key used for verifying requests.
#[derive(Mixin, Encode, Decode, Debug, Clone)]
pub struct PublicKey {
pub key_id: String,
pub key_pem: String,
}
/// A private key for signing requests with.
#[derive(Mixin, Encode, Decode, Clone)]
pub struct PrivateKey {
pub key_pem: String,
}
/// Represents a `Create` activity.
#[derive(Arrow)]
pub struct Create {
#[identity]
pub id: Key,
#[origin]
pub actor: Key,
#[target]
pub object: Key,
}
/// Construct the schema.
pub fn schema() -> Schema {
Schema::new()
// Mixins
.has::<Profile>()
.has::<Content>()
.has::<Status>()
.has::<Object>()
.has::<Channel>()
.has::<PrivateKey>()
.has::<PublicKey>()
// Aliases
.has::<Username>()
.has::<Id>()
// Arrows
.has::<Bite>()
.has::<FollowRequest>()
.has::<AuthorOf>()
.has::<Follows>()
.has::<Create>()
}

233
lib/puppy/src/interact.rs Normal file
View file

@ -0,0 +1,233 @@
//! Interactions between actors.
use store::{util::IterExt as _, Key, StoreError, Transaction};
use crate::{
actor::Actor,
data::{FollowRequest, Bite, Status, Follows},
Context, Error, Result,
};
/// Interactions with other objects.
impl Actor {
/// Create a [`Bite`].
pub fn bite(&self, victim: Actor) -> Bite {
Bite {
victim: victim.key,
biter: self.key,
id: Key::gen(),
}
}
/// Construct a [`FollowRequest`].
pub fn follow_request(&self, target: Actor) -> FollowRequest {
FollowRequest {
origin: self.key,
target: target.key,
id: Key::gen(),
}
}
/// Makes `biter` bite `victim` and inserts the records into the database.
pub fn do_bite(&self, cx: &Context, victim: Actor) -> Result<Bite> {
let bite = self.bite(victim);
cx.run(|tx| tx.create(bite).map_err(Error::Store))?;
Ok(bite)
}
/// Creates a follow request from `self` to `target`.
pub fn do_follow_request(&self, cx: &Context, target: Actor) -> Result<FollowRequest> {
let req = self.follow_request(target);
cx.run(|tx| {
tx.create(req)?;
tx.add_mixin(req.id, Status::Pending)?;
Ok(req)
})
}
/// Accept a follow request.
pub fn do_accept_request(&self, cx: &Context, req: FollowRequest) -> Result<()> {
debug_assert! {
self.key == req.target,
"only the target of a follow request may accept it"
};
cx.run(|tx| try {
let fr = tx
.between::<FollowRequest>(req.origin, req.target)
// Get the one that is equal to `req`.
.filter(|fr| fr.as_ref().is_ok_and(|f| f.id == req.id))
.last()
.unwrap()?;
// Only apply the update if the follow request is still in a pending state.
if let Some(Status::Pending) = tx.get_mixin(fr.id)? {
tx.update(fr.id, |_| Status::Accepted)?;
tx.create(Follows {
follower: req.origin,
followed: req.target,
})?;
}
})
}
/// Reject a follow request.
pub fn do_reject_request(&self, cx: &Context, req: FollowRequest) -> Result<()> {
debug_assert! {
self.key == req.target,
"only the target of a follow request may accept it"
};
cx.run(|tx| try { tx.update(req.id, |_| Status::Rejected)? })?;
Ok(())
}
/// Get all pending follow request for `self`.
pub fn pending_requests<'c>(
&self,
tx: &'c Transaction<'c>,
) -> impl Iterator<Item = Result<FollowRequest>> + 'c {
tx.incoming::<FollowRequest>(self.key)
.map_err(Error::Store)
.filter_bind_results(|req| Ok(if req.is_pending(tx)? { Some(req) } else { None }))
}
/// Get all nodes `self` is following.
pub fn following<'c>(&self, tx: &'c Transaction<'c>) -> impl Iterator<Item = Result<Key>> + 'c {
tx.outgoing::<Follows>(self.key)
.map_err(Error::Store)
.map_ok(|a| a.followed)
}
/// Get all followers of `self`.
pub fn followers<'c>(&self, tx: &'c Transaction<'c>) -> impl Iterator<Item = Result<Key>> + 'c {
tx.incoming::<Follows>(self.key)
.map_err(Error::Store)
.map_ok(|a| a.follower)
}
/// List all specific times `self` was bitten.
pub fn bites_suffered<'c>(
&self,
tx: &'c Transaction<'c>,
) -> impl Iterator<Item = Result<Bite>> + 'c {
tx.incoming::<Bite>(self.key).map_err(Error::Store)
}
/// Check whether `self` follows `other`.
pub fn follows(&self, tx: &Transaction<'_>, other: &Actor) -> Result<bool> {
try { tx.exists::<Follows>(self.key, other.key)? }
}
}
impl FollowRequest {
/// Determine if this follow request is pending.
pub fn is_pending(&self, tx: &Transaction<'_>) -> Result<bool> {
// The status is stored as a mixin, so we need to get it.
let Some(st) = tx.get_mixin::<Status>(self.id)? else {
// If we don't have a status for a follow request, something is borked.
return Err(StoreError::Missing.into());
};
// If the status of the follow request is pending, it can't also be true that the follows
// relation already exists.
debug_assert! {
!(st == Status::Pending)
|| tx.exists::<Follows>(self.origin, self.target).map(|x| !x)?,
"fr.is_pending -> !(fr.origin follows fr.target)"
};
Ok(st == Status::Pending)
}
}
#[cfg(test)]
mod tests {
use store::util::IterExt as _;
use crate::{
actor::{create_local as create_actor, Actor},
config::Config,
data::{schema, FollowRequest, Follows},
test_context, Context, Result,
};
fn make_test_actors(cx: &Context) -> Result<(Actor, Actor)> {
let alice = create_actor(&cx, "alice")?;
let bob = create_actor(&cx, "bob")?;
eprintln!("alice={alice:?}, bob={bob:?}");
Ok((alice, bob))
}
fn test_config() -> Config {
Config {
ap_domain: String::from("unit-test.puppy.gay"),
wf_domain: String::from("unit-test.puppy.gay"),
state_dir: todo!(), // TODO: make this a temp dir
port: 0,
}
}
#[test]
fn create_fr() -> Result<()> {
test_context(test_config(), schema(), |cx| {
let (alice, bob) = make_test_actors(&cx)?;
alice.do_follow_request(&cx, bob)?;
assert!(
cx.store().exists::<FollowRequest>(alice.key, bob.key)?,
"(alice -> bob) ∈ follow-requested"
);
assert!(
!cx.store().exists::<Follows>(alice.key, bob.key)?,
"(alice -> bob) ∉ follows"
);
let pending_for_bob = cx.run(|tx| {
bob.pending_requests(&tx)
.map_ok(|fr| fr.origin)
.try_collect::<Vec<_>>()
})?;
assert_eq!(pending_for_bob, vec![alice.key], "bob.pending = {{alice}}");
Ok(())
})
}
#[test]
fn accept_fr() -> Result<()> {
test_context(test_config(), schema(), |cx| {
let db = cx.store();
let (alice, bob) = make_test_actors(&cx)?;
let req = alice.do_follow_request(&cx, bob)?;
bob.do_accept_request(&cx, req)?;
assert!(
db.exists::<Follows>(alice.key, bob.key)?,
"(alice -> bob) ∈ follows"
);
assert!(
!db.exists::<Follows>(bob.key, alice.key)?,
"(bob -> alice) ∉ follows"
);
cx.run(|tx| try {
let pending_for_bob: Vec<_> = bob.pending_requests(&tx).try_collect()?;
assert!(pending_for_bob.is_empty(), "bob.pending = ∅");
let followers_of_bob: Vec<_> = bob.followers(&tx).try_collect()?;
assert_eq!(
followers_of_bob,
vec![alice.key],
"bob.followers = {{alice}}"
);
})
})
}
#[test]
fn listing_follow_relations() -> Result<()> {
test_context(test_config(), schema(), |cx| try {
let (alice, bob) = make_test_actors(&cx)?;
let req = alice.do_follow_request(&cx, bob)?;
bob.do_accept_request(&cx, req)?;
cx.run(|tx| try {
let followers_of_bob: Vec<_> = bob.followers(&tx).try_collect()?;
assert_eq!(
followers_of_bob,
vec![alice.key],
"bob.followers = {{alice}}"
);
let following_of_alice: Vec<_> = alice.following(&tx).try_collect()?;
assert_eq!(
following_of_alice,
vec![bob.key],
"alice.following = {{bob}}"
);
})?
})
}
}

View file

@ -1,244 +1,347 @@
#![feature(iterator_try_collect)]
pub use store::{self, Key, Store};
use store::{
alias::Username,
arrow::{self, multi::MultiArrow, AuthorOf},
mixin::{Content, Profile},
util::IterExt,
Keylike, Tag,
};
//! If you're an ActivityPub developer looking for information about ActivityPuppy's federation behavior,
//! you should take a look at [`fetch`].
mod tags {
//! Type tags for vertices.
// Working with result types is such a bitch without these.
#![feature(iterator_try_collect, try_blocks, once_cell_try, box_into_inner)]
use store::Tag;
use std::hint::unreachable_unchecked;
pub const ACTOR: Tag = Tag(0);
pub const POST: Tag = Tag(1);
pub const BITE: Tag = Tag(2);
use actor::get_signing_key;
pub use context::Context;
#[cfg(test)]
pub use context::test_context;
use data::{ActivityKind, AuthorOf, Channel, Content, Create, Id, ObjectKind, Profile, PublicKey};
use fetch::object::{Activity, Note, Object};
use store::Transaction;
pub use store::{self, Key, StoreError};
pub use fetch::{self, FetchError};
mod context;
pub mod data;
pub mod post;
mod interact;
use derive_more::{From, Display};
use tracing::{instrument, warn};
/// Retrieve an ActivityPub object from the database.
///
/// Fails with `Error::Missing` if the required properties are not present.
pub fn get_local_ap_object(tx: &Transaction<'_>, key: Key) -> Result<fetch::object::Object> {
let Some(obj) = tx.get_mixin::<data::Object>(key)? else {
// We need this data in order to determine the object type. If the passed key does not
// have this data, it must not be an ActivityPub object.
return Err(Error::MissingData { node: key, prop: "Object" });
};
match obj.kind {
ObjectKind::Actor => {
let Some(Profile { account_name, display_name, .. }) = tx.get_mixin(key)? else {
return Err(Error::MissingData { node: key, prop: "Profile" });
};
let Some(Channel { inbox }) = tx.get_mixin(key)? else {
return Err(Error::MissingData { node: key, prop: "Channel" });
};
let Some(PublicKey { key_id, key_pem }) = tx.get_mixin(key)? else {
return Err(Error::MissingData { node: key, prop: "PublicKey" });
};
Ok(fetch::object::Object::Actor(fetch::object::Actor {
id: obj.id.0.clone().into(),
inbox: inbox.into(),
account_name: account_name.0,
display_name,
public_key: fetch::object::PublicKey {
owner: obj.id.0.into(),
id: key_id.into(),
inner: key_pem,
},
}))
}
ObjectKind::Activity(ActivityKind::Create) => {
let Some(Create { object, actor, .. }) = tx.get_arrow(key)? else {
panic!("expected a `Create`");
};
let Id(actor) = tx.get_alias(actor)?.unwrap();
Ok(fetch::object::Object::Activity(fetch::object::Activity {
id: obj.id.0.into(),
actor: actor.into(),
object: Box::new(get_local_ap_object(tx, object)?),
kind: String::from("Create"),
}))
}
ObjectKind::Notelike(kind) => {
let Some(Content { content, warning, .. }) = tx.get_mixin(key)? else {
panic!()
};
let Some(AuthorOf { author, .. }) = tx.incoming(key).next().transpose()? else {
panic!()
};
let Some(Id(author)) = tx.get_alias(author)? else {
todo!()
};
Ok(fetch::object::Object::Note(Note {
id: obj.id.0.clone().into(),
summary: warning,
content,
author,
kind,
}))
}
_ => todo!(),
}
}
pub fn create_post(db: &Store, author: Key, content: impl ToString) -> store::Result<Key> {
let key = Key::gen();
db.transaction(|tx| {
tx.create_vertex(key, tags::POST)?;
tx.update::<Profile>(author, |_, mut profile| {
profile.post_count += 1;
Ok(profile)
})?;
tx.insert(key, Content {
content: Some(content.to_string()),
summary: None,
})?;
tx.insert_arrow((author, key), AuthorOf)?;
Ok(key)
})
}
pub mod actor {
use fetch::{
object,
signatures::{Private, SigningKey},
};
use store::{Key, StoreError, Transaction};
pub fn create_actor(db: &Store, username: impl ToString) -> store::Result<Key> {
let key = Key::gen();
db.transaction(|tx| {
tx.create_vertex(key, tags::ACTOR)?;
tx.insert_alias(key, Username(username.to_string()))?;
tx.insert(key, Profile {
use crate::{
data::{Channel, Id, Object, ObjectKind, PrivateKey, Profile, PublicKey, Username},
Context, Error, Result,
};
/// A reference to an actor.
#[derive(Clone, Copy, Eq, PartialEq, Debug)]
pub struct Actor {
/// The key identifying the actor in the data store.
pub key: Key,
}
impl Actor {
/// Get a local actor from the store by their username.
pub fn by_username(tx: &Transaction<'_>, username: impl ToString) -> Result<Option<Actor>> {
let maybe_key = tx
.lookup(Username(username.to_string()))
.map_err(Error::Store)?;
Ok(maybe_key.map(|key| Actor { key }))
}
}
/// Create a fresh local actor.
pub fn create_local(cx: &Context, username: impl ToString) -> Result<Actor> {
let key = Key::gen();
cx.run(|tx| {
let username: Username = username.to_string().into();
// Federation stuff
mixin_ap_actor(tx, key, &cx.config().ap_domain, true)?;
mixin_priv_key(tx, key, &cx.config().ap_domain)?;
// Social properties
tx.add_alias(key, username.clone())?;
tx.add_mixin(key, Profile {
post_count: 0,
account_name: username,
display_name: None,
about_string: None,
about_fields: Vec::new(),
})?;
Ok(Actor { key })
})
}
/// Register an actor from another server.
pub fn create_remote(tx: &Transaction<'_>, object: object::Actor) -> Result<Actor> {
let key = Key::gen();
tx.add_alias(key, Id(object.id.clone()))?;
tx.add_mixin(key, Channel { inbox: object.inbox })?;
tx.add_mixin(key, Object {
kind: ObjectKind::Actor,
id: Id(object.id),
local: false,
})?;
tx.add_mixin(key, Profile {
post_count: 0,
account_name: username.to_string(),
display_name: None,
account_name: Username(object.account_name),
display_name: object.display_name,
about_string: None,
about_fields: Vec::new(),
})?;
Ok(key)
})
}
pub fn list_posts_by_author(
db: &Store,
author: impl Keylike,
) -> store::Result<Vec<(Key, Content)>> {
db.transaction(|tx| {
tx.list_outgoing::<AuthorOf>(author)
.bind_results(|(post_key, _)| tx.lookup::<Content>(post_key))
.collect()
})
}
pub struct Bite {
pub id: Key,
pub biter: Key,
pub victim: Key,
}
impl MultiArrow for Bite {
const TYPE: Tag = tags::BITE;
}
pub fn bite_actor(db: &Store, biter: Key, victim: Key) -> store::Result<Key> {
db.transaction(|tx| {
// Bites are represented as multiedges.
let key = arrow::multi::insert::<Bite>(&tx, biter, victim)?;
// We can treat particular arrows in a quiver as a vertex by registering it.
tx.create_vertex(key, tags::BITE)?;
Ok(key)
})
}
pub fn bites_on(db: &Store, victim: Key) -> store::Result<Vec<Bite>> {
db.transaction(|tx| {
arrow::multi::list_incoming::<Bite>(&tx, victim)
.map_ok(|(biter, id)| Bite { id, biter, victim })
.try_collect()
})
}
pub mod tl {
//! Timelines
use store::{arrow::AuthorOf, mixin::Content, util::IterExt as _, Error, Key, Result, Store};
pub struct Post {
pub id: Key,
pub author: Key,
pub content: Content,
tx.add_mixin(key, PublicKey {
key_id: object.public_key.id,
key_pem: object.public_key.inner,
})?;
Ok(Actor { key })
}
pub fn fetch_all(db: &Store) -> Result<Vec<Post>> {
db.transaction(|tx| {
let iter = tx.list::<Content>();
iter.bind_results(|(id, content)| {
let author = tx
.list_incoming::<AuthorOf>(id)
.keys()
.next_or(Error::Missing)?;
Ok(Post {
id,
author,
content,
})
})
.collect()
})
/// Add properties related to local ActivityPub actors to a vertex.
pub fn mixin_ap_actor(
tx: &Transaction<'_>,
vertex: Key,
domain: &str,
local: bool,
) -> Result<(), StoreError> {
let id = Id(format!("https://{domain}/o/{vertex}"));
tx.add_alias(vertex, id.clone())?;
tx.add_mixin(vertex, Channel { inbox: format!("{id}/inbox") })?;
tx.add_mixin(vertex, Object {
kind: ObjectKind::Actor,
local,
id,
})?;
store::OK
}
/// Generate and attach a public/private key pair to the vertex.
pub fn mixin_priv_key(
tx: &Transaction<'_>,
vertex: Key,
domain: &str,
) -> Result<(), StoreError> {
let key_id = format!("https://{domain}/o/{vertex}#sig-key");
let (private, public) = Private::gen();
tx.add_mixin(vertex, PublicKey {
key_pem: public.encode_pem(),
key_id,
})?;
tx.add_mixin(vertex, PrivateKey { key_pem: private.encode_pem() })?;
store::OK
}
pub fn get_signing_key(tx: &Transaction<'_>, actor: Actor) -> Result<SigningKey, StoreError> {
let (PrivateKey { key_pem, .. }, PublicKey { key_id, .. }) =
tx.get_mixin_many(actor.key)?;
let Id(owner) = tx.get_alias(actor.key)?.unwrap();
let inner = Private::decode_pem(&key_pem);
Ok(SigningKey { id: key_id, owner, inner })
}
}
pub mod fr {
//! Follow requests
pub type Result<T, E = Error> = std::result::Result<T, E>;
use store::{
arrow::{FollowRequested, Follows},
util::IterExt as _,
Key, Store, OK,
#[derive(From, Debug, Display)]
pub enum Error {
/// An error internal to the store.
#[display(fmt = "store error: {}", self.0)]
Store(StoreError),
/// An error generated by the [fetch] subsystem.
#[display(fmt = "fetch error: {}", self.0)]
Fetch(FetchError),
/// Expected `node` to have some property that it doesn't have.
#[display(fmt = "missing data: {node} is missing {prop}")]
MissingData {
/// The node that is missing the data.
node: Key,
/// Name of the thing it is missing.
prop: &'static str,
},
#[display(fmt = "invalid data: {}", self.0)]
Invalid(String),
}
pub mod config {
#[derive(Clone)]
pub struct Config {
pub ap_domain: String,
pub wf_domain: String,
pub state_dir: String,
pub port: u16,
}
}
/// Interpret an *incoming* activity. Outgoing activities are *never* interpreted through this function,
/// because their changes are already in the database.
// TODO: figure out if that is the behavior we actually want
#[instrument(skip_all, fields(activity.id = activity.id))]
pub fn interpret(cx: &Context, activity: Activity) -> Result<()> {
// Fetch our actor from the database
let Some(actor) = cx.store().lookup(Id(activity.actor.clone()))? else {
panic!(
"actor {} does not exist in the database (id={})",
activity.actor, activity.id
)
};
pub fn create(db: &Store, requester: Key, target: Key) -> store::Result<()> {
db.transaction(|tx| {
tx.insert_arrow((requester, target), FollowRequested)?;
OK
})
}
// Fetch our object from the database. The object must already exist in the database.
let id = activity.object.id();
let Some(object) = cx.store().lookup(Id(id.to_owned()))? else {
panic!(
"object {} does not exist in the database (id={})",
activity.object.id(),
activity.id
)
};
pub fn accept(db: &Store, requester: Key, target: Key) -> store::Result<()> {
db.transaction(|tx| {
tx.remove_arrow::<FollowRequested>((requester, target))?;
tx.insert_arrow((requester, target), Follows)?;
OK
})
}
pub fn reject(db: &Store, requester: Key, target: Key) -> store::Result<()> {
db.transaction(|tx| {
tx.remove_arrow::<FollowRequested>((requester, target))?;
OK
})
}
pub fn list_pending(db: &Store, target: Key) -> store::Result<Vec<Key>> {
db.transaction(|tx| tx.list_incoming::<FollowRequested>(target).keys().collect())
}
pub fn following_of(db: &Store, actor: Key) -> store::Result<Vec<Key>> {
db.transaction(|tx| tx.list_outgoing::<Follows>(actor).keys().collect())
}
pub fn followers_of(db: &Store, actor: Key) -> store::Result<Vec<Key>> {
db.transaction(|tx| tx.list_incoming::<Follows>(actor).keys().collect())
}
#[cfg(test)]
mod tests {
use store::{
arrow::{FollowRequested, Follows},
Key, Store, OK,
};
use crate::create_actor;
fn make_test_actors(db: &Store) -> store::Result<(Key, Key)> {
let alice = create_actor(&db, "alice")?;
let bob = create_actor(&db, "bob")?;
eprintln!("alice={alice}, bob={bob}");
Ok((alice, bob))
let actor = actor::Actor { key: actor };
let (key, tag) = match activity.kind.as_str() {
"Bite" => {
let object = actor::Actor { key: object };
(actor.do_bite(&cx, object)?.id, ActivityKind::Bite)
}
#[test]
fn create_fr() -> store::Result<()> {
Store::with_tmp(|db| {
let (alice, bob) = make_test_actors(&db)?;
super::create(&db, alice, bob)?;
assert!(
db.exists::<FollowRequested>((alice, bob))?,
"(alice -> bob) ∈ follow-requested"
);
assert!(
!db.exists::<Follows>((alice, bob))?,
"(alice -> bob) ∉ follows"
);
let pending_for_bob = super::list_pending(&db, bob)?;
assert_eq!(pending_for_bob, vec![alice], "bob.pending = {{alice}}");
OK
})
"Create" => {
// NOTE: due to the ingesting, we already have this information.
// TODO: change this. for god's sake
return Ok(());
}
"Follow" => {
let object = actor::Actor { key: object };
#[test]
fn accept_fr() -> store::Result<()> {
Store::with_tmp(|db| {
let (alice, bob) = make_test_actors(&db)?;
super::create(&db, alice, bob)?;
super::accept(&db, alice, bob)?;
assert!(
db.exists::<Follows>((alice, bob))?,
"(alice -> bob) ∈ follows"
);
assert!(
!db.exists::<Follows>((bob, alice))?,
"(bob -> alice) ∉ follows"
);
let pending_for_bob = super::list_pending(&db, bob)?;
assert!(pending_for_bob.is_empty(), "bob.pending = ∅");
let followers_of_bob = super::followers_of(&db, bob)?;
assert_eq!(followers_of_bob, vec![alice], "bob.followers = {{alice}}");
OK
})
let req = actor.do_follow_request(&cx, object)?;
(req.id, ActivityKind::Follow)
}
#[test]
fn listing_follow_relations() -> store::Result<()> {
Store::with_tmp(|db| {
let (alice, bob) = make_test_actors(&db)?;
super::create(&db, alice, bob)?;
super::accept(&db, alice, bob)?;
let followers_of_bob = super::followers_of(&db, bob)?;
assert_eq!(followers_of_bob, vec![alice], "bob.followers = {{alice}}");
let following_of_alice = super::following_of(&db, alice)?;
assert_eq!(following_of_alice, vec![bob], "alice.following = {{bob}}");
OK
})
tag @ ("Accept" | "Reject") => {
// Follow requests are multi-arrows in our graph, and they have their own activitypub id.
let Some(req) = cx.store().get_arrow(object)? else {
panic!(
"follow request does not exist: {object} (id={})",
activity.id
)
};
// Dispatch to the actual method based on the tag
let tag = match tag {
"Accept" => actor
.do_accept_request(&cx, req)
.map(|_| ActivityKind::Accept)?,
"Reject" => actor
.do_reject_request(&cx, req)
.map(|_| ActivityKind::Reject)?,
_ => unsafe {
// SAFETY: this branch of the outer match only matches if the tag is either "Accept" or "Reject",
// so this inner branch is truly unreachable.
unreachable_unchecked()
},
};
(Key::gen(), tag)
}
}
k => {
warn!(activity.id, "unsupported activity type {k}");
return Err(Error::Invalid(format!("activity type '{k}' not supported")));
}
};
cx.run(|tx| {
tx.add_alias(key, Id(activity.id.clone()))?;
tx.add_mixin(key, data::Object {
id: Id(activity.id.clone()),
kind: ObjectKind::Activity(tag),
local: false,
})?;
Ok(())
})
}
/// Make sure all the interesting bits of an activity are here.
#[instrument(skip_all, fields(activity.id = activity.id, key = auth.to_string()))]
pub async fn ingest(cx: &Context, auth: Key, activity: &Activity) -> Result<()> {
let key = cx.run(|tx| get_signing_key(tx, actor::Actor { key: auth }).map_err(Error::Store))?;
for id in [activity.actor.as_str(), activity.object.id()] {
if cx.store().lookup(Id(id.to_owned()))?.is_some() {
// Skip ingesting if we already know this ID.
continue;
}
let json = cx.resolver().resolve(&key, &id).await?;
let object = Object::from_json(json).unwrap();
match object {
Object::Activity(a) => interpret(&cx, a)?,
Object::Actor(a) => cx.run(|tx| actor::create_remote(tx, a).map(void))?,
Object::Note(a) => post::create_post_from_note(cx, a).map(void)?,
_ => todo!(),
}
}
Ok(())
}
/// Discard the argument.
fn void<T>(_: T) -> () {}

262
lib/puppy/src/post.rs Normal file
View file

@ -0,0 +1,262 @@
//! Timelines: where you go to view the posts.
use std::ops::RangeBounds;
use chrono::{DateTime, Utc};
use either::Either::{Left, Right};
use fetch::object::{Activity, Note, Object};
use store::{util::IterExt as _, Key, Store, StoreError, Transaction};
use crate::{
actor::{get_signing_key, Actor},
data::{
self, ActivityKind, AuthorOf, Channel, Content, Create, Follows, Id, ObjectKind, Profile,
},
Context,
};
#[derive(Clone, Copy, Debug)]
pub struct Post {
pub key: Key,
}
impl From<&str> for Content {
fn from(value: &str) -> Self {
value.to_string().into()
}
}
impl From<String> for Content {
fn from(value: String) -> Self {
Content {
content: Some(value),
warning: None,
}
}
}
/// A piece of content posted by someone.
#[derive(Clone, Debug)]
pub struct PostData {
/// The post's internal ID.
pub id: Post,
/// The actual post contents.
pub content: Content,
/// Metadata about the post's author.
pub author: Author,
}
/// Information about a [`Post`]'s author.
#[derive(Clone, Debug)]
pub struct Author {
/// The identifier of the author.
pub id: Key,
/// The name to display along with the post.
pub display_name: String,
/// An informal identifier for a particular author.
pub handle: String,
}
/// An ordered list of [`Post`]s for viewing.
#[derive(Debug)]
pub struct Timeline {
items: Vec<Item>,
}
/// Discrete events that can be displayed to a user as part of a timeline.
#[derive(Debug)]
enum Item {
Post(PostData),
}
impl Item {
/// Get the timeline item if it is a [`Post`].
pub fn as_post(&self) -> Option<&PostData> {
match self {
Item::Post(ref post) => Some(post),
}
}
}
impl Timeline {
/// Get all the posts in the timeline.
pub fn posts(&self) -> impl Iterator<Item = &PostData> {
self.items.iter().filter_map(|x| x.as_post())
}
}
/// Gets at most `limit` of the posts known to the instance that were inserted within `time_range`.
pub fn fetch_timeline(
db: &Store,
time_range: impl RangeBounds<DateTime<Utc>>,
limit: Option<usize>,
) -> Result<Timeline, StoreError> {
let posts = db.run(|tx| {
// Get all post content entries (the argument passed here is a range of chrono datetimes).
let iter = tx.range::<Content>(time_range);
let iter = match limit {
Some(n) => Left(iter.take(n)),
None => Right(iter),
};
// Then, we're gonna map each of them to their author, and get the profile information needed to
// render the post (mostly display name and handle).
iter.bind_results(|(key, content)| try {
// Take the first author. There is nothing stopping a post from having multiple authors, but
// let's take it one step at a time.
let (author, Some(Profile { display_name, account_name, .. })) = tx
.join_on(|a: AuthorOf| a.author, tx.incoming(key))?
.swap_remove(0)
else {
// We expect all posts to have at least one author, so we should complain if there is one
// that doesn't (for now). For robustness, the `.collect()` down there should be replaced
// with a strategy where we log a warning instead of failing, but in the current state of
// the project, failing fast is a good thing.
return Err(StoreError::Missing);
};
Item::Post(PostData {
id: Post { key },
author: Author {
id: author,
handle: format!("@{account_name}"),
display_name: display_name.unwrap_or(account_name.0),
},
content,
})
})
.collect()
})?;
Ok(Timeline { items: posts })
}
/// Create a new post entity.
pub fn create_local_post(
cx: &Context,
author: Key,
content: impl Into<Content>,
) -> crate::Result<Post> {
let content = content.into();
cx.run(|tx| {
let key = Key::gen();
// Local stuff
mixin_post(tx, key, author, content)?;
// Federation stuff
let id = Id(cx.mk_url(key));
tx.add_alias(key, id.clone())?;
tx.add_mixin(key, data::Object {
kind: ObjectKind::Notelike("Note".to_string()),
local: true,
id,
})?;
Ok(Post { key })
})
}
/// Assumes all objects referenced already exist.
#[tracing::instrument(skip(cx))]
pub fn create_post_from_note(cx: &Context, note: Note) -> crate::Result<Post> {
cx.run(|tx| {
let Some(author) = tx.lookup(Id(note.author))? else {
panic!("needed author to already exist")
};
let key = Key::gen();
tx.add_alias(key, Id(note.id.clone()))?;
tx.create(AuthorOf { object: key, author })?;
tx.add_mixin(key, Content {
content: note.content,
warning: note.summary,
})?;
tx.add_mixin(key, data::Object {
kind: ObjectKind::Notelike(note.kind),
id: Id(note.id),
local: false,
})?;
Ok(Post { key })
})
}
#[tracing::instrument(skip(cx))]
pub async fn federate_post(cx: &Context, post: Post) -> crate::Result<()> {
// Obtain all the data we need to construct our activity
let (Content { content, warning }, url, author, signing_key, followers) = cx.run(|tx| try {
let Some(AuthorOf { author, .. }) = tx.incoming(post.key).next().transpose()? else {
panic!("can't federate post without author: {post:?}")
};
let signing_key = get_signing_key(tx, Actor { key: author })?;
let (c, data::Object { id, .. }) = tx.get_mixin_many(post.key)?;
let targets = tx.join_on::<Channel, _>(|a| a.follower, tx.incoming::<Follows>(author))?;
(c, id, author, signing_key, targets)
})?;
let activity_key = Key::gen();
// Insert a create activity into the database so we can serve it later
cx.run(|tx| try {
let id = Id(cx.mk_url(activity_key));
tx.add_alias(activity_key, id.clone())?;
tx.add_mixin(activity_key, data::Object {
kind: ObjectKind::Activity(ActivityKind::Create),
local: true,
id,
})?;
tx.create(Create {
id: activity_key,
actor: author,
object: post.key,
})?;
})?;
// Construct an ActivityPub message to send
let activity = Activity {
id: cx.mk_url(activity_key),
actor: signing_key.owner.clone(),
object: Box::new(Object::Note(Note {
id: url.to_string(),
kind: "Note".to_string(),
author: cx.mk_url(author),
summary: warning,
content,
})),
kind: "Create".to_string(),
};
for inbox in followers
.into_iter()
.filter_map(|(_, c)| c.map(|t| t.inbox))
// FIXME: remove this when im done testing
.chain(["https://crimew.gay/users/riley/inbox".to_string()])
{
fetch::deliver(&signing_key, &activity, &inbox).await;
}
Ok(())
}
/// Add a post's mixins and predicates to an existing `node`.
pub fn mixin_post(
tx: &Transaction<'_>,
node: Key,
author: Key,
content: impl Into<Content>,
) -> Result<Key, StoreError> {
tx.update::<Profile>(author, |mut profile| {
profile.post_count += 1;
profile
})?;
tx.add_mixin(node, content.into())?;
tx.create(AuthorOf { author, object: node })?;
Ok(node)
}
pub fn list_posts_by_author(db: &Store, author: Key) -> Result<Vec<(Key, Content)>, StoreError> {
db.run(|tx| {
let posts = tx
.join_on(|a: AuthorOf| a.object, tx.outgoing(author))?
.into_iter()
.filter_map(|(k, opt)| try { (k, opt?) })
.collect();
Ok(posts)
})
}

View file

@ -12,3 +12,5 @@ derive_more = "*"
bincode = "2.0.0-rc.3"
chrono = "*"
tempfile = "*"
macro = { path = "../macro" }
either = "*"

View file

@ -1,17 +1,115 @@
//! Alternative keys.
/// Derive an implementation of [`Alias`].
pub use r#macro::Alias;
use derive_more::{Display, From};
use super::{
types::{AliasSpec, DataType},
Batch, Store, Transaction,
};
use crate::{Key, Result};
use crate::Space;
/// An alternative unique identifier for a node.
pub trait Alias: DataType<Type = AliasSpec> + From<String> + AsRef<str> {}
/// An alternative unique key for a vertex.
pub trait Alias: ToString + From<String> {
const SPACE: (Space, Space);
impl Transaction<'_> {
/// Look up the key associated with the alias.
pub fn lookup<A>(&self, alias: A) -> Result<Option<Key>>
where
A: Alias,
{
op::lookup::<A>(self, alias.as_ref())
}
/// Get the alias associated with the `node`.
pub fn get_alias<A>(&self, node: Key) -> Result<Option<A>>
where
A: Alias,
{
op::get_alias(self, node)
}
/// Add an alias to `node`.
pub fn add_alias<A>(&self, node: Key, alias: A) -> Result<()>
where
A: Alias,
{
op::add_alias::<A>(self, node, alias.as_ref())
}
/// Check whether `node` has an `M` defined for it.
pub fn has_alias<A>(&self, node: Key) -> Result<bool>
where
A: Alias,
{
op::has_alias::<A>(self, node)
}
}
#[derive(Display, From)]
pub struct Username(pub String);
impl Alias for Username {
const SPACE: (Space, Space) = (Space("username/l"), Space("username/r"));
impl Store {
/// Look up the key associated with the alias.
pub fn lookup<A>(&self, alias: A) -> Result<Option<Key>>
where
A: Alias,
{
op::lookup::<A>(self, alias.as_ref())
}
/// Get the alias associated with the `node`.
pub fn get_alias<A>(&self, node: Key) -> Result<Option<A>>
where
A: Alias,
{
op::get_alias(self, node)
}
/// Check whether `node` has an `M` defined for it.
pub fn has_alias<A>(&self, node: Key) -> Result<bool>
where
A: Alias,
{
op::has_alias::<A>(self, node)
}
}
impl Batch {
/// Add an alias to `node`.
///
/// # Warning
///
/// This will *not* fail if the key already has a alias of this type, and in fact *it will cause fundamental inconsistency*
/// if the alias already exists. Don't call this function unless you know that neither `node` nor `alias` exist yet.
pub fn put_alias<A>(&mut self, node: Key, alias: A)
where
A: Alias,
{
// TODO: consistency *could* be checked by manually iterating over the transaction using `WriteBatch::iterate`
op::add_alias::<A>(self, node, alias.as_ref()).unwrap();
}
}
mod op {
use crate::{internal::*, Alias, Key, Result, OK};
pub fn lookup<A: Alias>(cx: &impl Query, alias: &str) -> Result<Option<Key>> {
cx.open(A::SPEC.keyspace).get(alias).map(|k| match k {
Some(x) => Some(Key::from_slice(x.as_ref())),
None => None,
})
}
pub fn has_alias<A: Alias>(cx: &impl Query, node: Key) -> Result<bool> {
cx.open(A::SPEC.reversed).has(node)
}
pub fn add_alias<A: Alias>(cx: &impl Write, node: Key, alias: &str) -> Result<()> {
cx.open(A::SPEC.keyspace).set(alias, node)?;
cx.open(A::SPEC.reversed).set(node, alias)?;
OK
}
pub fn get_alias<A: Alias>(cx: &impl Query, node: Key) -> Result<Option<A>> {
let buf = cx.open(A::SPEC.reversed).get(node)?;
Ok(buf.map(decode))
}
fn decode<T>(data: impl AsRef<[u8]>) -> T
where
T: From<String>,
{
T::from(String::from_utf8_lossy(data.as_ref()).into_owned())
}
}

View file

@ -1,79 +1,607 @@
//! Relations between nodes.
//! Directed edges, both parallel and simple.
//!
//! This module's main exports are [`Arrow`], and the two kinds of arrows: [`Basic`] and [`Multi`].
//!
//! Querying information about arrows can be done using the APIs exposed by [`Store`] and [`Transaction`],
//! and manipulating them can likewise be done from within the context of a `Transaction`.
//!
//! The arrow API is designed to aggressively minimize boilerplate for defining arrow types, and uses a
//! few tricks to do with associated constants and types to make it all work nicely.
//!
//! # Terminology
//!
//! An arrow is a part of a graph. Graphs consist of *nodes* (also called *vertices*) and *edges*. Nodes
//! can be seen as "things", and edges as connections between those things, defined by the two nodes that
//! they connect (which are called the *endpoints* of the edge).
//!
//! These edges can be directed or undirected. The difference is that undirected edges are identified by
//! an unordered pair of their endpoints, whereas directed edges (also called **arrows**), are identified
//! by an ordered pair, where one of the endpoints is the *tail* (or *origin* in the code/docs here) and
//! the other is the *head* (usually called *target* here).
//!
//! # Arrow kinds
//!
//! Arrows can be either [`Basic`] or [`Multi`]. The main difference is that basic arrows are defined
//! solely by which two nodes they connect, which means that their representation and certain operations
//! are more efficient. The trade-off is that they cannot capture more complex information than "this
//! edge exists".
//!
//! For some use cases (for example, predicates) this is sufficient, but other use cases require multiple,
//! individually identifiable and manipulatable parallel edges. Here, the trade-off is that while they
//! are much more expressive, and can be labeled by associating [mixins] with the arrow's identity key,
//! they incur more space overhead, and most operations on them are more expensive compared to basic
//! edges.
//!
//! Most arrow operations work on either kind of edge. Some signatures reference [`Arrow::Kind`], which
//! is either of the `Multi` or `Basic` types mentioned before. Because parallel arrows need to be
//! discernable from each other, each of them also has an `identity` key, in addition to listing the two
//! edges they connect.
//!
//! [mixins]: super::Mixin
#![allow(private_interfaces)]
use bincode::{Decode, Encode};
pub use self::kinds::{Basic, Multi};
use super::{
types::{ArrowSpec, DataType},
Batch, Store, Transaction,
};
use crate::{internal::Context as _, util::IterExt as _, Key, Result};
use crate::Space;
/// A directed edge.
///
/// See the [module docs][self] for an introduction.
pub trait Arrow: DataType<Type = ArrowSpec> + From<Self::Kind> + Into<Self::Kind> {
/// The representation of this arrow, which also determines whether parallel edges are allowed.
type Kind: ArrowKind = Basic;
}
pub mod multi {
//! Managing multiedges.
//!
//! Unlike regular [`Arrow`]s, which don't have an identity (they are identified by the two nodes that
//! they connect), multiarrows can have their own [`Key`]. This allows one to have multiple arrows in
//! the same direction connecting the same two vertices, which isn't possible with normal arrows.
//!
//! Multiarrows can also be treated as if they were vertices, if their identity (`Key`) is registered as
//! one.
//!
//! This comes with a trade-off, though, specifically in both space and complexity. A multi-arrow also
//! can't have a label, like a typical arrow.
/// Parameterizing arrows so we can distinguish between kinds of arrows.
///
/// This lets us present a common API for certain arrow-related operations while also leveraging some
/// specialization. Essentially, from a type parameter which implements [`Arrow`], we can tell both at
/// the type level and at the value level whether that arrow is a multi-arrow or not.
pub trait ArrowKind {
/// Whether this kind of arrow should be represented using the specialized representation for edges
/// that are allowed to be parallel.
const IS_MULTI: bool;
/// Construct an arrow from a buffer containing a correctly-oriented arrow.
///
/// Each arrow is stored twice, once "correctly", and once "reversed". This allows us to efficiently
/// list both the outgoing and incoming edges for any particular vertex by using a prefix scan on the
/// [`BY_ORIGIN`][ArrowSpec::by_origin] and [`BY_TARGET`][ArrowSpec::by_target] keyspaces respectively.
///
/// The buffer passed to this function will start with 16 bytes origin, followed by 16 bytes target.
/// For basic arrows, that's it, but for multiarrows there is an additional 16 bytes of "identity",
/// which is needed to discriminate between multiple parallel edges.
///
/// # Failure
///
/// This method must panic if `buf` is not the expected size (32 bytes for basic arrows, 48 bytes for
/// multi arrows). The responsibility for ensuring that `buf` is correctly oriented lies with the
/// caller lest the result is incorrect, but passing an incorrectly oriented arrow is not a memory
/// safety issue, so this function is safe.
fn dec(buf: &[u8]) -> Self;
/// Encode an arrow's key origin-first and target-first.
fn enc(self) -> (impl AsRef<[u8]>, impl AsRef<[u8]>);
#[doc(hidden)]
/// Turn him into a raw edge.
fn raw(&self) -> Raw;
}
use crate::{Key, Result, Tag, Transaction};
union Raw {
multi: Multi,
basic: Basic,
}
pub fn insert<A>(tx: &Transaction<'_>, origin: Key, target: Key) -> Result<Key>
impl Store {
/// Check whether there exists any arrow of type `A` that points from `origin` to `target`.
pub fn exists<A>(&self, origin: Key, target: Key) -> Result<bool>
where
A: MultiArrow,
A: Arrow,
{
let key = Key::gen();
tx.quiver(A::TYPE).insert(origin, target, key)?;
Ok(key)
op::exists::<A>(self, origin, target)
}
/// Get all arrows of type `A` that point at `target`.
pub fn incoming<'a, A>(&'a self, target: Key) -> impl Iterator<Item = Result<A>> + 'a
where
A: Arrow + 'a,
{
op::incoming::<A>(self, target).map_ok(A::from)
}
/// Get all arrows of type `A` that point away from `origin`.
pub fn outgoing<'a, A>(&'a self, origin: Key) -> impl Iterator<Item = Result<A>> + 'a
where
A: Arrow + 'a,
{
op::outgoing::<A>(self, origin).map_ok(A::from)
}
/// List all arrows between `a` and `b`, in either direction.
pub fn between<'a, A>(&'a self, a: Key, b: Key) -> impl Iterator<Item = Result<A>> + 'a
where
A: Arrow<Kind = Multi> + 'a,
{
op::between::<A>(self, a, b).map_ok(A::from)
}
/// Construct the arrow from its identifier.
pub fn get_arrow<A>(&self, key: Key) -> Result<Option<A>>
where
A: Arrow<Kind = Multi>,
{
let arrow = self
.open(crate::types::MULTIEDGE_HEADERS)
.get(key)?
.map(|v| Key::split(v.as_ref()))
.map(|(origin, target)| A::from(Multi { origin, target, identity: key }));
Ok(arrow)
}
}
impl Transaction<'_> {
/// Check whether there exists any arrow of type `A` that points from `origin` to `target`.
///
/// This only tells you whether there is *any* such arrow, not how many (in the case of parallel edges).
pub fn exists<A>(&self, origin: Key, target: Key) -> Result<bool>
where
A: Arrow,
{
op::exists::<A>(self, origin, target)
}
/// Get all arrows of type `A` that point at `target`.
pub fn incoming<'a, A>(&'a self, target: Key) -> impl Iterator<Item = Result<A>> + 'a
where
A: Arrow + 'a,
{
op::incoming::<A>(self, target).map_ok(A::from)
}
/// Get all arrows of type `A` that point away from `origin`.
pub fn outgoing<'a, A>(&'a self, origin: Key) -> impl Iterator<Item = Result<A>> + 'a
where
A: Arrow + 'a,
{
op::outgoing::<A>(self, origin).map_ok(A::from)
}
/// Create a new arrow of type `A`.
///
/// This operation supports both [`Multi`] and [`Basic`] arrows.
///
/// # Example
///
/// The following snippet creates an arrow between `origin` and `target`.
///
/// ```rust
/// # fn main () -> store::Result<()> {
/// use store::{Arrow, Key};
///
/// #[derive(Arrow)]
/// struct MyArrow { origin: Key, target: Key }
///
/// # let schema = store::types::Schema::new().has::<MyArrow>();
/// # store::Store::test(schema, |db| {
/// let origin = Key::gen();
/// let target = Key::gen();
///
/// db.run(|tx| {
/// tx.create(MyArrow { origin, target })
/// })?;
///
/// assert!(db.exists::<MyArrow>(origin, target)?);
/// # store::OK })
/// # }
/// ```
pub fn create<A>(&self, arrow: A) -> Result<()>
where
A: Arrow,
{
op::create::<A>(self, arrow.into())
}
/// Delete all edges of type `A` from `origin` to `target`.
///
/// It is not an error for this function not to delete anything.
pub fn delete_all<A>(&self, origin: Key, target: Key) -> Result<()>
where
A: Arrow,
{
op::delete_all::<A>(self, origin, target)
}
/// Delete a specific arrow.
pub fn delete_one<A>(&self, arrow: A) -> Result<()>
where
A: Arrow,
{
op::delete_one::<A>(self, arrow.into())
}
/// List all arrows between `a` and `b`, in either direction.
pub fn between<'a, A>(&'a self, a: Key, b: Key) -> impl Iterator<Item = Result<A>> + 'a
where
A: Arrow<Kind = Multi> + 'a,
{
op::between::<A>(self, a, b).map_ok(A::from)
}
/// Construct the arrow from its identifier.
pub fn get_arrow<A>(&self, key: Key) -> Result<Option<A>>
where
A: Arrow<Kind = Multi>,
{
let arrow = self
.open(crate::types::MULTIEDGE_HEADERS)
.get(key)?
.map(|v| Key::split(v.as_ref()))
.map(|(origin, target)| A::from(Multi { origin, target, identity: key }));
Ok(arrow)
}
}
impl Batch {
/// Create an arrow. See [`Transaction::create`].
pub fn create<A>(&mut self, arrow: A)
where
A: Arrow,
{
op::create::<A>(self, arrow.into())
.expect("no errors expected to occur during batch operation")
}
/// Delete a specific arrow.
pub fn delete_one<A>(&mut self, arrow: A)
where
A: Arrow,
{
op::delete_one::<A>(self, arrow.into())
.expect("no errors expected to occur during batch operation")
}
}
mod op {
//! Implementations of arrow operations.
use super::*;
use crate::{internal::*, types::MULTIEDGE_HEADERS, Key, Result, OK};
/// Check whether there exists at least one arrow of type `A` from `origin` to `target`.
pub fn exists<A>(cx: &impl Query, origin: Key, target: Key) -> Result<bool>
where
A: Arrow,
{
if A::Kind::IS_MULTI {
// In the case of a multi-edge, at least one result from the prefix scan
// indicates that there is at least one edge.
cx.open(A::SPEC.by_origin)
.scan(origin.fuse(target))
.next()
.transpose()
.map(|o| o.is_some())
} else {
cx.open(A::SPEC.by_origin).has(origin.fuse(target))
}
}
pub fn list_incoming<'db, A>(
tx: &'db Transaction<'db>,
/// List incoming arrows relative to `target`.
pub fn incoming<'db, A>(
cx: &'db impl Query,
target: Key,
) -> impl Iterator<Item = Result<(Key, Key)>> + 'db
) -> impl Iterator<Item = Result<A::Kind>> + 'db
where
A: MultiArrow,
A: Arrow,
A::Kind: 'db,
{
tx.quiver(A::TYPE).list_incoming(target)
// In the `by_target` keyspace, for either kind of arrow the layout is such that the target is
// the prefix, so we pick that keyspace to more efficiently list all arrows that target the key.
cx.open(A::SPEC.by_target)
.scan(target)
.map_ok(|(mut k, _)| {
// Arrows from `by_target` are oriented target-first, while the decoder function requires
// that the buffer is oriented origin-first. Regardless of whether `..32` covers the prefix
// or the whole slice, swapping the two keys always gives us the ordering expected by the
// decoding function.
let (t, o) = k[..32].split_at_mut(16);
t.swap_with_slice(o);
A::Kind::dec(&k)
})
}
pub trait MultiArrow {
const TYPE: Tag;
/// List outgoing arrows relative to `origin`.
pub fn outgoing<'db, A>(
cx: &'db impl Query,
origin: Key,
) -> impl Iterator<Item = Result<A::Kind>> + 'db
where
A: Arrow,
A::Kind: 'db,
{
cx.open(A::SPEC.by_origin)
.scan(origin)
.map_ok(|(ref k, _)| A::Kind::dec(k))
}
/// Get all arrows between the two endpoints (in either direction)
pub fn between<'db, A>(
cx: &'db impl Query,
origin: Key,
target: Key,
) -> impl Iterator<Item = Result<A::Kind>> + 'db
where
A: Arrow,
A::Kind: 'db,
{
let ks = cx.open(A::SPEC.by_origin);
ks.scan(origin.fuse(target))
.chain(ks.scan(target.fuse(origin)))
.map_ok(|(ref k, _)| A::Kind::dec(k))
}
/// Create a new arrow.
pub fn create<A>(cx: &impl Write, arrow: A::Kind) -> Result<()>
where
A: Arrow,
{
if A::Kind::IS_MULTI {
let Multi { identity, origin, target } = unsafe { arrow.raw().multi };
cx.open(MULTIEDGE_HEADERS)
.set(identity, origin.fuse(target))?;
}
let (by_origin, by_target) = arrow.enc();
cx.open(A::SPEC.by_origin).set(by_origin, b"")?;
cx.open(A::SPEC.by_target).set(by_target, b"")?;
OK
}
/// Delete all arrows from `origin` to `target`.
///
/// TODO: Remove the query requirement (depends on range delete being available).
pub fn delete_all<A>(cx: &(impl Write + Query), origin: Key, target: Key) -> Result<()>
where
A: Arrow,
{
let by_origin = cx.open(A::SPEC.by_origin);
let by_target = cx.open(A::SPEC.by_target);
Ok(if A::Kind::IS_MULTI {
let headers = cx.open(MULTIEDGE_HEADERS);
// TODO: optimize this implementation using range deletes.
// Unfortunately, range deletes are not available in transactional backends.
for key in by_origin.scan(origin.fuse(target)).keys() {
let key = Multi::decode(key?.as_ref());
by_origin.del(key.encode())?;
by_target.del(key.swap().encode())?;
headers.del(key.identity)?;
}
} else {
by_origin.del(origin.fuse(target))?;
by_target.del(target.fuse(origin))?;
})
}
/// Delete a specific arrow, if it exists. Doesn't error if the arrow does *not* exist.
pub fn delete_one<A>(cx: &impl Write, arrow: A::Kind) -> Result<()>
where
A: Arrow,
{
let (by_origin, by_target) = arrow.enc();
cx.open(A::SPEC.by_origin).del(by_origin)?;
cx.open(A::SPEC.by_target).del(by_target)?;
OK
}
}
/// A directed edge between two vertices.
pub trait Arrow: Encode + Decode {
const SPACE: (Space, Space);
}
/// Which way an arrow is pointing when viewed from a particular vertex.
pub enum Direction {
Incoming,
Outgoing,
}
/// The node this arrow points away from is the "author" of the node the arrow points to.
#[derive(Encode, Decode)]
pub struct AuthorOf;
impl Arrow for AuthorOf {
const SPACE: (Space, Space) = (Space("created-by/l"), Space("created-by/r"));
}
/// The origin of this arrow has follow requested the target.
#[derive(Encode, Decode)]
pub struct FollowRequested;
impl Arrow for FollowRequested {
const SPACE: (Space, Space) = (Space("pending-fr/l"), Space("pending-fr/r"));
}
/// The origin "follows" the target.
#[derive(Encode, Decode)]
pub struct Follows;
impl Arrow for Follows {
const SPACE: (Space, Space) = (Space("follows/l"), Space("follows/r"));
/// Types representing the different kinds of arrows.
mod kinds {
use super::ArrowKind;
use crate::Key;
impl ArrowKind for Multi {
const IS_MULTI: bool = true;
fn dec(buf: &[u8]) -> Self {
Multi::decode(buf)
}
fn enc(self) -> (impl AsRef<[u8]>, impl AsRef<[u8]>) {
(self.encode(), self.swap().encode())
}
fn raw(&self) -> super::Raw {
super::Raw { multi: *self }
}
}
impl ArrowKind for Basic {
const IS_MULTI: bool = false;
fn dec(buf: &[u8]) -> Self {
Basic::decode(buf)
}
fn enc(self) -> (impl AsRef<[u8]>, impl AsRef<[u8]>) {
(self.encode(), self.reverse().encode())
}
fn raw(&self) -> super::Raw {
super::Raw { basic: *self }
}
}
/// A multi-edge is an edge with an identity, which allows multiple parallel edges to exist
/// between two vertices.
#[derive(Clone, Copy)]
pub struct Multi {
/// The node that this edge points away from.
pub origin: Key,
/// The node that this edge points towards.
pub target: Key,
/// The discriminator of this particular edge, which distinguishes it from all other edges that
/// connect `origin` and `target`, and indeed from every other edge or node in the graph.
pub identity: Key,
}
impl Multi {
/// Decode a multiarrow key from an origin-first encoded buffer. If the buffer is not correctly
/// oriented, the results will be wrong; the arrow will be oriented *away* from the target and
/// *at* the origin, instead of the other way around.
///
/// # Orientation
///
/// In this context, *correctly oriented* means that it is laid out in *origin-first* order,
/// like this (where `o`, `t` and `i` represent bytes):
///
/// ```text
/// oooooooooooooooo tttttttttttttttt iiiiiiiiiiiiiiii
/// |--------------| |--------------| |--------------|
/// origin target identity
/// ..16 16..32 32..
/// ```
///
/// In a *reverse oriented* buffer, the origin and target parts are swapped, so the target is
/// the prefix, followed by the origin, and then the identity. This is also called *target-first*
/// encoding in this documentation.
///
/// # Silent failure
///
/// There is no way to detect whether the ordering is correct from just the buffer, so the caller
/// must ensure that the order is correct. If you have a target-first encoded buffer, you can have
/// to swap the two keys before passing it into this function, or this function will give you an
/// edge that does not exist (since a multiedge can only point in one direction).
///
/// Safety-wise, this isn't an issue, so it does not warrant marking this function as `unsafe`.
///
/// # Panics
///
/// This function panics if `buf` is not exactly 48 bytes long.
pub fn decode(buf: &[u8]) -> Multi {
Multi {
origin: Key::from_slice(&buf[..16]),
target: Key::from_slice(&buf[16..32]),
identity: Key::from_slice(&buf[32..]),
}
}
/// Encode an arrow in *origin-first order*. See the docs of [`Multi::decode`] for an explanation
/// of the difference between origin-first encoding and target-first encoding.
pub fn encode(self) -> [u8; 48] {
let mut key = [0; 48];
key[..16].copy_from_slice(&self.origin.0);
key[16..32].copy_from_slice(&self.target.0);
key[32..].copy_from_slice(&self.identity.0);
key
}
/// Swap the origin and target of this arrow, while leaving the identity the same.
pub(super) fn swap(self) -> Multi {
Multi {
origin: self.target,
target: self.origin,
..self
}
}
}
/// A normal directed edge. Duplicates are not allowed.
///
/// This kind of arrow is useful for modeling predicates and simple relationships.
#[derive(Clone, Copy)]
pub struct Basic {
pub origin: Key,
pub target: Key,
}
impl Basic {
/// Get the inverse of this arrow (an arrow that connects the same two nodes, but pointing in the
/// other direction).
pub fn reverse(self) -> Basic {
Basic {
origin: self.target,
target: self.origin,
}
}
/// Encode `self` in origin-first order. See [`Multi::decode`] for docs on ordering.
pub fn encode(self) -> [u8; 32] {
self.origin.fuse(self.target)
}
/// Decode a basic edge from a buffer laid out origin-first. See [`Multi::decode`] for more information
/// about key encoding.
///
/// # Panics
///
/// Panics if `buf` is not exactly 32 bytes long.
pub fn decode(buf: &[u8]) -> Basic {
let (origin, target) = Key::split(buf);
Basic { origin, target }
}
}
}
/// Derive [`Arrow`] for a struct.
///
/// This will generate the required [`Into`] and [`From`] impls, as well as an [`Arrow`](trait@Arrow) impl and
/// a [`DataType`] impl with the namespaces derived from the name of the struct. The macro works on structs with
/// specific fields, or newtypes of any arrow kind.
///
/// # Attributes
///
/// The `origin`, `target` and `identity` attributes are used on fields of type [`Key`], and they are used
/// to map the arrow's type to an [`ArrowKind`]. The `#[origin]` annotation isn't needed if the struct contains
/// a field named `origin`. Ditto with `target` and `identity`.
///
/// If there is no `identity` defined, the `ArrowKind` will be [`Basic`]. If an `identity` is defined, the kind
/// will be [`Multi`].
///
/// # Examples
///
/// Generates a [`Basic`] arrow called `my-arrow`.
///
/// ```
/// use store::{Key, Arrow, types::Schema};
///
/// #[derive(Arrow)]
/// struct MyArrow { origin: Key, target: Key }
///
/// // This will fail to compile if the type doesn't implement `Arrow` correctly
/// Schema::new().has::<MyArrow>();
/// ```
///
/// Newtypes of either arrow kind are supported.
///
/// ```
/// use store::{Key, arrow::{Basic, Multi, Arrow}};
///
/// /// The origin has requested to follow the target.
/// ///
/// /// Note: there may be more than one follow request between any two actors.
/// #[derive(Arrow)]
/// struct FollowRequest(Multi);
///
/// /// A relation between two actors meaning that the origin follows the target.
/// #[derive(Arrow)]
/// struct Follows(Basic);
///
/// /// Users can follow each other.
/// struct User(Key);
///
/// impl User {
/// /// Make `self` follow `other`.
/// pub fn follows(self, other: User) -> Follows {
/// Follows(Basic { origin: self.0, target: other.0 })
/// }
/// }
/// ```
///
/// Generates a [`Multi`] arrow called `my-multi-arrow`, mapping the multiarrow's discriminator to the struct's
/// `unique` field.
///
/// ```
/// use store::{Key, Arrow};
///
/// #[derive(Arrow)]
/// struct MyMultiArrow {
/// pub origin: Key,
/// pub target: Key,
/// #[identity]
/// pub unique: Key,
/// }
/// ```
///
/// The macro automatically adds `From` and `Into` implementations:
///
/// ```
/// use store::{Key, Arrow, arrow::Basic};
///
/// #[derive(Arrow)]
/// struct MyArrow { origin: Key, target: Key }
///
/// let origin = Key::gen();
/// let target = Key::gen();
///
/// let edge: Basic = MyArrow { origin, target }.into();
///
/// assert_eq!(origin, edge.origin);
/// assert_eq!(target, edge.target);
/// ```
pub use r#macro::Arrow;

306
lib/store/src/internal.rs Normal file
View file

@ -0,0 +1,306 @@
//! Provides a nice hashmap-esque interface for manipulating entries in the store's backend.
use std::sync::Arc;
use rocksdb::{BoundColumnFamily, IteratorMode};
pub use self::cx::{Context, Query, Write};
use crate::{util::IterExt as _, Result, StoreError};
/// An internal interface to a specific keyspace that exposes basic hashmap-esque operations
/// on that keyspace, generic over whether the source of the data is a [`Transaction`] or a
/// [`Store`].
pub struct Keyspace<'db, C> {
pub(super) context: &'db C,
pub(super) cf: Arc<BoundColumnFamily<'db>>,
}
impl<'db, C> Keyspace<'db, C>
where
C: Query,
{
/// Fetch a value from the keyspace.
pub fn get(&self, key: impl AsRef<[u8]>) -> Result<Option<impl AsRef<[u8]> + 'db>> {
self.context.get_pinned(&self.cf, key)
}
/// Test whether a key exists.
pub fn has(&self, key: impl AsRef<[u8]>) -> Result<bool> {
self.get(key).map(|r| r.is_some())
}
/// Execute a prefix scan.
pub fn scan(
&self,
prefix: impl AsRef<[u8]> + 'db,
) -> impl Iterator<Item = Result<(Box<[u8]>, Box<[u8]>)>> + 'db {
let t = prefix.as_ref().to_vec();
self.context
.prefix_iterator(&self.cf, prefix.as_ref())
// The prefix iterator may "overshoot". This makes it stop when it reaches
// the end of the range that has the prefix.
.take_while(move |r| match r {
Ok((ref k, _)) => k.starts_with(&t),
_ => true,
})
.map_err(StoreError::Internal)
}
/// List all pairs in the keyspace.
pub fn list(&self) -> impl Iterator<Item = Result<(Box<[u8]>, Box<[u8]>)>> + 'db {
self.context
.full_iterator(&self.cf, IteratorMode::Start)
.map_err(StoreError::Internal)
}
/// Execute a range scan
pub fn range<const N: usize>(
&self,
lower: [u8; N],
upper: [u8; N],
) -> impl Iterator<Item = Result<(Box<[u8]>, Box<[u8]>)>> + 'db {
// TODO: use a seek op to make this more efficient
self.context
.full_iterator(&self.cf, IteratorMode::Start)
.skip_while(move |r| match r {
Ok((ref k, _)) => k.as_ref() < &lower,
_ => false,
})
// The prefix iterator may "overshoot". This makes it stop when it reaches
// the end of the range that has the prefix.
.take_while(move |r| match r {
Ok((ref k, _)) => k.as_ref() < &upper,
_ => true,
})
.map_err(StoreError::Internal)
}
/// Join all the keys to their values in this keyspace.
///
/// This may be optimized compared to many random point lookups.
pub fn join(
&self,
keys: impl IntoIterator<Item = impl AsRef<[u8]>>,
) -> Vec<Result<Option<Vec<u8>>>> {
self.context
.multi_get(keys.into_iter().map(|x| (&self.cf, x)))
}
}
impl<C> Keyspace<'_, C>
where
C: Write,
{
/// Set the given `key` to the `value`, overwriting it if there was already a value there.
pub fn set(&self, key: impl AsRef<[u8]>, val: impl AsRef<[u8]>) -> Result<()> {
self.context.put(&self.cf, key, val)
}
/// Drop the value if it exists.
pub fn del(&self, key: impl AsRef<[u8]>) -> Result<()> {
self.context.delete(&self.cf, key)
}
}
mod cx {
//! Contexts for doing reads, writes or both to the database.
//!
//! The traits in this module map abstract calls to their methods on the [rocksdb] objects.
use rocksdb::{
AsColumnFamilyRef, DBAccess, DBIteratorWithThreadMode, DBPinnableSlice, IteratorMode,
};
use super::Keyspace;
use crate::{util::IterExt as _, Backend, Batch, Result, Store, StoreError, Transaction, OK};
/// A context for executing database operations.
pub trait Context {
/// Open the keyspace identified by `cf`.
fn open<'cx>(&'cx self, cf: impl AsRef<str>) -> Keyspace<'cx, Self>
where
Self: Sized;
}
/// A context in which one can read from the data store.
///
/// Specifically, this maps calls to either the transaction or the store's internals without us having
/// to implement methods for *both* transactions and the store.
pub trait Query: Context {
type Backend: DBAccess;
fn get_pinned<'a>(
&'a self,
cf: &impl AsColumnFamilyRef,
key: impl AsRef<[u8]>,
) -> Result<Option<DBPinnableSlice<'a>>>;
fn prefix_iterator<'a>(
&'a self,
cf: &impl AsColumnFamilyRef,
prefix: &[u8],
) -> DBIteratorWithThreadMode<'a, Self::Backend>;
fn full_iterator<'a>(
&'a self,
cf: &impl AsColumnFamilyRef,
mode: IteratorMode<'a>,
) -> DBIteratorWithThreadMode<'a, Self::Backend>;
/// Optimized multi-point lookup.
fn multi_get<'a, C: AsColumnFamilyRef + 'a>(
&'a self,
keys: impl IntoIterator<Item = (&'a C, impl AsRef<[u8]>)>,
) -> Vec<Result<Option<Vec<u8>>>>;
}
/// A context in which one can read from and modify the data store.
pub trait Write: Context {
fn delete(&self, cf: &impl AsColumnFamilyRef, key: impl AsRef<[u8]>) -> Result<()>;
fn put(
&self,
cf: &impl AsColumnFamilyRef,
key: impl AsRef<[u8]>,
val: impl AsRef<[u8]>,
) -> Result<()>;
}
impl Context for Store {
fn open<'cx>(&'cx self, cf: impl AsRef<str>) -> Keyspace<'cx, Self> {
let name = cf.as_ref();
let Some(cf) = self.inner.cf_handle(name) else {
panic!("unregistered keyspace {name}! is it in the schema?")
};
Keyspace { context: &self, cf }
}
}
impl Query for Store {
type Backend = Backend;
fn get_pinned<'a>(
&'a self,
cf: &impl AsColumnFamilyRef,
key: impl AsRef<[u8]>,
) -> Result<Option<DBPinnableSlice<'a>>> {
self.inner
.get_pinned_cf(cf, key)
.map_err(StoreError::Internal)
}
fn prefix_iterator<'a>(
&'a self,
cf: &impl AsColumnFamilyRef,
prefix: &[u8],
) -> DBIteratorWithThreadMode<'a, Backend> {
self.inner.prefix_iterator_cf(cf, prefix)
}
fn full_iterator<'a>(
&'a self,
cf: &impl AsColumnFamilyRef,
mode: IteratorMode<'a>,
) -> DBIteratorWithThreadMode<'a, Self::Backend> {
self.inner.full_iterator_cf(cf, mode)
}
fn multi_get<'a, C: AsColumnFamilyRef + 'a>(
&'a self,
keys: impl IntoIterator<Item = (&'a C, impl AsRef<[u8]>)>,
) -> Vec<Result<Option<Vec<u8>>>> {
self.inner
.multi_get_cf(keys)
.into_iter()
.map_err(StoreError::Internal)
.collect()
}
}
impl Context for Transaction<'_> {
fn open<'cx>(&'cx self, cf: impl AsRef<str>) -> Keyspace<'cx, Self> {
let name = cf.as_ref();
let Some(cf) = self.store.inner.cf_handle(name) else {
panic!("unregistered keyspace {name}! is it in the schema?")
};
Keyspace { context: &self, cf }
}
}
impl<'db> Query for Transaction<'db> {
type Backend = rocksdb::Transaction<'db, Backend>;
fn get_pinned<'a>(
&'a self,
cf: &impl AsColumnFamilyRef,
key: impl AsRef<[u8]>,
) -> Result<Option<DBPinnableSlice<'a>>> {
self.inner
.get_pinned_cf(cf, key)
.map_err(StoreError::Internal)
}
fn prefix_iterator<'a>(
&'a self,
cf: &impl AsColumnFamilyRef,
prefix: &[u8],
) -> DBIteratorWithThreadMode<'a, Self::Backend> {
self.inner.prefix_iterator_cf(cf, prefix)
}
fn full_iterator<'a>(
&'a self,
cf: &impl AsColumnFamilyRef,
mode: IteratorMode<'a>,
) -> DBIteratorWithThreadMode<'a, Self::Backend> {
self.inner.full_iterator_cf(cf, mode)
}
fn multi_get<'a, C: AsColumnFamilyRef + 'a>(
&'a self,
keys: impl IntoIterator<Item = (&'a C, impl AsRef<[u8]>)>,
) -> Vec<Result<Option<Vec<u8>>>> {
self.inner
.multi_get_cf(keys)
.into_iter()
.map_err(StoreError::Internal)
.collect()
}
}
impl Write for Transaction<'_> {
fn delete(&self, cf: &impl AsColumnFamilyRef, key: impl AsRef<[u8]>) -> Result<()> {
self.inner.delete_cf(cf, key).map_err(StoreError::Internal)
}
fn put(
&self,
cf: &impl AsColumnFamilyRef,
key: impl AsRef<[u8]>,
val: impl AsRef<[u8]>,
) -> Result<()> {
self.inner
.put_cf(cf, key, val)
.map_err(StoreError::Internal)
}
}
impl Context for Batch {
fn open<'cx>(&'cx self, cf: impl AsRef<str>) -> Keyspace<'cx, Self>
where
Self: Sized,
{
let name = cf.as_ref();
let Some(cf) = self.store.inner.cf_handle(name) else {
panic!("unregistered keyspace {name}! is it in the schema?")
};
Keyspace { context: &self, cf }
}
}
impl Write for Batch {
fn delete(&self, cf: &impl AsColumnFamilyRef, key: impl AsRef<[u8]>) -> Result<()> {
self.inner.borrow_mut().delete_cf(cf, key);
OK
}
fn put(
&self,
cf: &impl AsColumnFamilyRef,
key: impl AsRef<[u8]>,
val: impl AsRef<[u8]>,
) -> Result<()> {
self.inner.borrow_mut().put_cf(cf, key, val);
OK
}
}
}

View file

@ -1,37 +1,25 @@
use std::fmt::{Debug, Display};
use std::{
fmt::{Debug, Display},
str::FromStr,
};
use chrono::{DateTime, Utc};
use ulid::Ulid;
use crate::{Alias, Error, Result, Transaction};
use crate::StoreError;
/// A unique identifier for vertices in the database.
#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Key(pub(crate) [u8; 16]);
impl Display for Key {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Display::fmt(&Ulid::from_bytes(self.0), f)
}
}
impl Debug for Key {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Key({})", Ulid::from_bytes(self.0))
}
}
impl Key {
/// Generate a new node identifier.
pub fn gen() -> Key {
Key(ulid::Ulid::new().to_bytes())
}
pub(crate) fn from_slice(buf: &[u8]) -> Key {
let mut key = [0; 16];
key.copy_from_slice(&buf);
Key(key)
}
/// Get the time at which this key was generated.
pub fn timestamp(self) -> DateTime<Utc> {
let ms = Ulid::from_bytes(self.0).timestamp_ms();
let ms = self.to_ulid().timestamp_ms();
DateTime::from_timestamp_millis(ms as i64).unwrap()
}
/// Join two keys together.
@ -41,11 +29,25 @@ impl Key {
buf[16..].copy_from_slice(&other.0);
buf
}
pub(crate) fn from_slice(buf: &[u8]) -> Key {
let mut key = [0; 16];
key.copy_from_slice(&buf);
Key(key)
}
pub(crate) fn split(buf: &[u8]) -> (Key, Key) {
let tail = Key::from_slice(&buf[..16]);
let head = Key::from_slice(&buf[16..]);
(tail, head)
}
// TODO: This doesn't belong here lmao
pub(crate) fn range(ts: DateTime<Utc>) -> ([u8; 16], [u8; 16]) {
let min = Ulid::from_parts(ts.timestamp_millis() as u64, u128::MIN).to_bytes();
let max = Ulid::from_parts(ts.timestamp_millis() as u64, u128::MAX).to_bytes();
(min, max)
}
fn to_ulid(self) -> Ulid {
Ulid::from_bytes(self.0)
}
}
impl AsRef<[u8]> for Key {
@ -54,45 +56,24 @@ impl AsRef<[u8]> for Key {
}
}
/// Anything that can be used to reference a vertex, both "normal" [keys](Key)
/// and [aliases](Alias).
///
/// In general, using a key directly is going to be more efficient than using
/// an alias, because it incurs less lookups.
pub trait Keylike: Sized {
/// Translate the thing to a [`Key`].
///
/// This function should return [`Error::Missing`] if the key cannot be located.
fn translate(self, tx: &Transaction<'_>) -> Result<Key>;
/// Translate, and check whether the key is actually registered.
///
/// This function should return [`Error::Undefined`] if the key does not *really*
/// exist. It should return [`Error::Missing`] if the key can't be found.
fn checked_translate(self, tx: &Transaction<'_>) -> Result<Key> {
let key = self.translate(tx)?;
if !tx.is_registered(key)? {
Err(Error::Undefined { key })
} else {
Ok(key)
}
impl FromStr for Key {
type Err = crate::StoreError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
s.parse::<Ulid>()
.map(|x| Key(x.to_bytes()))
.map_err(|err| StoreError::BadKey(err))
}
}
impl Keylike for Key {
fn translate(self, _: &Transaction<'_>) -> Result<Key> {
Ok(self)
impl Display for Key {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Display::fmt(&self.to_ulid(), f)
}
}
impl<A> Keylike for A
where
A: Alias,
{
fn translate(self, tx: &Transaction<'_>) -> Result<Key> {
tx.lookup_alias(self)
impl Debug for Key {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Key({})", Ulid::from_bytes(self.0))
}
}
/// A type tag identifying a vertex.
#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash)]
pub struct Tag(pub u8);

View file

@ -1,78 +1,113 @@
#![feature(iterator_try_collect)]
//! The data store abstractions used by the ActivityPuppy project.
#![feature(iterator_try_collect, associated_type_defaults)]
//! Data persistence for the ActivityPuppy social media server built on top of [rocksdb].
//!
//! Persistence in a puppy server is handled by this component, which implements a directed graph
//! inspired datastore on top of the [rocksdb] key-value store.
//! # Overview
//!
//! The workflow for manipulating stuff in the store is to open a [`Store`], and then to call
//! its [`transaction`](Store::transaction) method. This method takes a function that, given
//! a [`Transaction`], returns a result with some value. The `Transaction` object contains some
//! useful CRUD methods. Returning an `Ok` commits the transaction and returning `Err` rolls it
//! back.
//! The design of the data store's abstractions is heavily inspired by graph theory. The idea is to encourage
//! composition and loose coupling by segmenting all data associated with a node into [mixins][Mixin], and
//! modeling relations and predicates between nodes as [arrows][Arrow]. In additions, the key identifying a
//! node can be [aliased][Alias] by a string newtype, which must be unique within the namespace of that alias.
//!
//! This component is specialized to puppy's storage needs, and probably won't be much use unless
//! you're writing something that interfaces with puppy.
//! The API is optimized for reducing boilerplate and legibility at the call site.
//!
//! There are three interfaces to the store: the read-only [`Store`], the write-only [`Batch`] and the [`Transaction`],
//! which allows both reads and writes.
use std::{path::Path, sync::Arc};
use std::{cell::RefCell, future::Future, path::Path, sync::Arc};
use derive_more::From;
use rocksdb::{MultiThreaded, Options, TransactionDBOptions};
type Backend = rocksdb::TransactionDB<MultiThreaded>;
use derive_more::{From, Display};
use rocksdb::{Options, TransactionDBOptions, WriteBatchWithTransaction};
use types::Schema;
mod alias;
mod internal;
mod key;
mod transaction;
pub use key::{Key, Keylike, Tag};
pub use transaction::Transaction;
pub use {alias::Alias, arrow::Arrow, mixin::Mixin};
pub mod alias;
pub mod arrow;
pub mod mixin;
pub mod util;
/// A shorthand for committing a [`Transaction`] (because I think `Ok(())` is ugly).
pub const OK: Result<()> = Ok(());
/// Master list of all column family names in use.
const SPACES: &[&'static str] = &[
"registry",
"username/l",
"username/r",
"follows/l",
"follows/r",
"profile",
"content",
"created-by/l",
"created-by/r",
"pending-fr/l",
"pending-fr/r",
"multi:id-map",
"multi:index/l",
"multi:index/r",
#[cfg(test)]
"test-arrow/l",
#[cfg(test)]
"test-arrow/r",
];
/// The handle to the data store.
///
/// This type can be cloned freely.
#[derive(Clone)]
pub struct Store {
inner: Arc<Backend>,
}
mod mixin;
/// The name of the puppy data store inside the state directory.
const STORE_NAME: &str = "main-store";
/// Open a [`Store`]. Creates one if it doesn't exist yet at the state directory path.
pub fn open(state_dir: impl AsRef<Path>, schema: Schema) -> Result<Store> {
Store::open(state_dir, schema)
}
pub use {alias::Alias, arrow::Arrow, key::Key, mixin::Mixin};
pub mod arrow;
pub mod types;
pub mod util;
/// The main interface to the data persistence engine.
///
/// This type provides reading capabilities, but does not expose APIs for manipulating data in the store. For
/// that, you must [run][Store::run] a [`Transaction`] or [apply][Store::apply] a [`Batch`].
#[derive(Clone)]
pub struct Store {
// TODO: maybe switch to `OptimisticTransactionDB` because it has `batched_multi_get_cf`, which may be useful
// if we end up doing lots of point lookups. alternatively, maybe we don't need *transactions* altogether, and
// we can get away with write batches and snapshots. the main problem with transactions is that it doesn't let
// us do range deletes, which affects the efficiency of multiarrow deletion.
//
// a switch to write batches is feasible if we end up not doing reads and writes in the same transaction.
inner: Arc<Backend>,
}
/// Hosts APIs for manipulating the data store.
///
/// You can access these APIs from the body of the closure passed to [`Store::run`].
pub struct Transaction<'db> {
inner: rocksdb::Transaction<'db, Backend>,
store: &'db Store,
}
/// A set of writes that are to be executed atomically.
pub struct Batch {
inner: RefCell<WriteBatchWithTransaction<true>>,
store: Store,
}
impl Store {
/// Open a data store in the given `state_dir`.
/// Run a [transaction][Transaction].
///
/// If the data store does not exist yet, it will be created.
pub fn open(state_dir: impl AsRef<Path>) -> Result<Store> {
/// In a transaction, either all writes succeed, or the transaction is aborted and the changes are not
/// recorded. Changes made inside a transaction can be read from within that transaction before they are
/// committed.
///
/// If the closure passed to `run` returns an error, the transaction is rolled back, and otherwise the
/// changes are committed.
pub fn run<T, E>(&self, f: impl FnOnce(&Transaction<'_>) -> Result<T, E>) -> Result<T, E>
where
E: From<StoreError>,
{
let tx = Transaction {
inner: self.inner.transaction(),
store: &self,
};
let r = f(&tx);
if let Err(e) = if r.is_err() {
tx.inner.rollback()
} else {
tx.inner.commit()
} {
return Err(E::from(StoreError::Internal(e)));
}
r
}
/// Apply a batch of changes atomically.
pub fn apply(&self, batch: Batch) -> Result<()> {
self.inner.write(batch.inner.into_inner())?;
OK
}
/// Construct a [`Batch`].
pub fn batch(&self) -> Batch {
Batch {
inner: RefCell::new(WriteBatchWithTransaction::default()),
store: self.clone(),
}
}
/// Open the data store in `state_dir`, and create one if it doesn't exist yet.
pub fn open(state_dir: impl AsRef<Path>, schema: Schema) -> Result<Store> {
let mut db_opts = Options::default();
db_opts.create_if_missing(true);
db_opts.create_missing_column_families(true);
@ -81,64 +116,31 @@ impl Store {
&db_opts,
&tx_opts,
state_dir.as_ref().join(STORE_NAME),
SPACES,
schema.0,
)?);
Ok(Store { inner })
}
/// Construct a temporary store, for testing. This store gets erased after `f` is done.
pub fn with_tmp<T, E>(f: impl FnOnce(Store) -> Result<T, E>) -> Result<T, E>
where
E: From<Error>,
{
let tmp_dir = tempfile::tempdir().expect("couldn't create tempdir");
f(Store::open(tmp_dir)?)
}
/// Delete the whole store.
///
/// **This deletes all data in the store**. Do not run this unless you want to delete all the state of the instance.
/// Delete the main data store in `state_dir` if it exists.
pub fn nuke(state_dir: impl AsRef<Path>) -> Result<()> {
Backend::destroy(&Options::default(), state_dir.as_ref().join(STORE_NAME))
.map_err(Error::from)
.map_err(StoreError::Internal)
}
/// Get the value of mixin `M` for `key`.
pub fn lookup<M>(&self, key: impl Keylike) -> Result<(Key, M)>
where
M: Mixin,
{
self.transaction(|tx| tx.lookup(key))
}
/// Get the key associated with a given [alias][Alias].
pub fn translate<A>(&self, s: impl ToString) -> Result<Key>
where
A: Alias,
{
self.transaction(|tx| tx.lookup_alias(A::from(s.to_string())))
}
/// Quickly test whether a particular [arrow][Arrow] exists.
pub fn exists<A>(&self, arrow: (Key, Key)) -> Result<bool>
where
A: Arrow,
{
self.transaction(|tx| tx.exists::<A>(arrow))
/// Open a store that lives until `f` returns, for testing.
pub fn test<T>(schema: Schema, f: impl FnOnce(Store) -> T) -> T {
let tmp_dir = tempfile::tempdir().expect("couldn't create tempdir");
f(Store::open(tmp_dir, schema).expect("failed to open temporary data store in {tmp_dir}"))
}
}
/// An isolated keyspace.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Space(&'static str);
impl AsRef<str> for Space {
fn as_ref(&self) -> &str {
&self.0
}
}
/// A shorthand for committing a [`Transaction`] (because I think `Ok(())` is ugly).
pub const OK: Result<()> = Ok(());
/// Results from this component.
pub type Result<T, E = Error> = std::result::Result<T, E>;
pub type Result<T, E = StoreError> = std::result::Result<T, E>;
/// Errors from the data store.
#[derive(From, Debug)]
pub enum Error {
#[derive(From, Display, Debug)]
pub enum StoreError {
/// The requested value was expected to exist in a particular keyspace, but does not actually
/// exist there. This can occur on updates for example.
Missing,
@ -149,8 +151,12 @@ pub enum Error {
/// Returned if there is a conflict; for example, if the uniqueness property of an alias would
/// be violated by inserting one.
Conflict,
/// A node key couldn't be decoded.
BadKey(ulid::DecodeError),
/// Signals a failure related to the data store's backend.
Internal(rocksdb::Error),
Encoding(bincode::error::EncodeError),
Decoding(bincode::error::DecodeError),
}
type Backend = rocksdb::TransactionDB<rocksdb::MultiThreaded>;

View file

@ -1,35 +1,328 @@
//! Modules of information.
use std::ops::RangeBounds;
use bincode::{Decode, Encode};
use chrono::{DateTime, Utc};
use crate::Space;
use super::{
types::{DataType, MixinSpec},
Batch, Store, Transaction,
};
use crate::{util::IterExt as _, Key, Result, StoreError};
/// A simple piece of data associated with a vertex.
pub trait Mixin: Encode + Decode {
const SPACE: Space;
/// Mixins are the simplest pieces of data in the store.
pub trait Mixin: DataType<Type = MixinSpec> + Encode + Decode {}
/// Derive a [`Mixin`] implementation.
///
/// In addition to deriving `Mixin`, you will need to derive or implement [`Encode`]
/// and [`Decode`].
pub use r#macro::Mixin;
impl Store {
/// Get the value!
pub fn get_mixin<M>(&self, node: Key) -> Result<Option<M>>
where
M: Mixin,
{
op::get_mixin(self, node)
}
/// Check if `node` has a mixin `M`.
pub fn has_mixin<M>(&self, node: Key) -> Result<bool>
where
M: Mixin,
{
op::has_mixin::<M>(self, node)
}
/// Get all `M`s where the key's timestamp is within the `range`.
pub fn range<M>(
&self,
range: impl RangeBounds<DateTime<Utc>>,
) -> impl Iterator<Item = Result<(Key, M)>> + '_
where
M: Mixin,
{
op::get_range(self, range)
}
/// Think "LEFT JOIN". In goes an iterator over keys, out come all the associated results.
pub fn join_on<M>(
&self,
iter: impl IntoIterator<Item = Result<Key>>,
) -> Result<Vec<(Key, Option<M>)>>
where
M: Mixin,
{
op::join_on(self, iter)
}
/// Get multiple mixins associated with the same key.
pub fn get_mixin_many<T>(&self, key: Key) -> Result<T>
where
T: GetMany,
{
T::get(self, key)
}
}
/// Information needed to render a social media profile.
#[derive(Encode, Decode)]
pub struct Profile {
pub post_count: usize,
pub account_name: String,
pub display_name: Option<String>,
pub about_string: Option<String>,
pub about_fields: Vec<(String, String)>,
impl Transaction<'_> {
/// Apply an update function to the mixin `M` of `node`.
///
/// # Errors
///
/// - [`Error::Missing`]: if `node` does not have a mixin of this type.
///
/// [`Error::Missing`]: crate::Error::Missing
pub fn update<M>(&self, node: Key, update: impl FnOnce(M) -> M) -> Result<()>
where
M: Mixin,
{
op::update(self, node, update)
}
/// Get the mixin of the specified type associated with `node`.
pub fn get_mixin<M>(&self, node: Key) -> Result<Option<M>>
where
M: Mixin,
{
op::get_mixin(self, node)
}
/// Add a mixin to `node`.
///
/// # Errors
///
/// - [`Error::Conflict`]: if `node` already has a mixin of type `M`.
///
/// [`Error::Conflict`]: crate::Error::Missing
pub fn add_mixin<M>(&self, node: Key, mixin: M) -> Result<()>
where
M: Mixin,
{
if op::has_mixin::<M>(self, node)? {
return Err(StoreError::Conflict);
} else {
op::add_mixin::<M>(self, node, mixin)
}
}
/// Check whether `node` has an `M` defined for it.
pub fn has_mixin<M>(&self, node: Key) -> Result<bool>
where
M: Mixin,
{
op::has_mixin::<M>(self, node)
}
/// Get all `M`s where the key's timestamp is within the `range`.
pub fn range<M>(
&self,
range: impl RangeBounds<DateTime<Utc>>,
) -> impl Iterator<Item = Result<(Key, M)>> + '_
where
M: Mixin,
{
op::get_range(self, range)
}
/// Think "LEFT JOIN". In goes an iterator over keys, out come all the associated results.
pub fn join_on<M, T>(
&self,
f: impl Fn(T) -> Key,
iter: impl IntoIterator<Item = Result<T>>,
) -> Result<Vec<(Key, Option<M>)>>
where
M: Mixin,
{
op::join_on(self, iter.into_iter().map_ok(f))
}
/// Get multiple mixins associated with the same key.
pub fn get_mixin_many<T>(&self, key: Key) -> Result<T>
where
T: GetMany,
{
T::get(self, key)
}
}
impl Mixin for Profile {
const SPACE: Space = Space("profile");
impl Batch {
/// Add a mixin to the `node`.
///
/// **Note**: unlike [`Transaction::add_mixin`], this will *not* return an error if the key already has a mixin
/// of this type. This *should* not cause inconsistency.
pub fn put_mixin<M>(&mut self, node: Key, mixin: M)
where
M: Mixin,
{
op::add_mixin(self, node, mixin).unwrap()
}
}
/// Contents of a post.
#[derive(Encode, Decode)]
pub struct Content {
pub content: Option<String>,
pub summary: Option<String>,
/// Getting tuples of stuff.
pub trait GetMany: Sized {
#[doc(hidden)]
fn get(cx: &impl crate::internal::Query, key: Key) -> Result<Self>;
}
impl Mixin for Content {
const SPACE: Space = Space("content");
mod op {
use std::ops::{Bound, RangeBounds};
use chrono::{DateTime, TimeDelta, Utc};
use either::Either;
use super::Mixin;
use crate::{internal::*, util::IterExt as _, Key, Result, StoreError};
impl<A, B> super::GetMany for (A, B)
where
A: Mixin,
B: Mixin,
{
fn get(cx: &impl Query, key: Key) -> Result<Self> {
let ksps = [cx.open(A::SPEC.keyspace).cf, cx.open(B::SPEC.keyspace).cf];
let mut vec = cx.multi_get(ksps.iter().map(|c| (&*c, key)));
let b = vec.pop().ok_or(StoreError::Missing)??.unwrap();
let a = vec.pop().ok_or(StoreError::Missing)??.unwrap();
Ok((decode(a)?, decode(b)?))
}
}
impl<A, B, C> super::GetMany for (A, B, C)
where
A: Mixin,
B: Mixin,
C: Mixin,
{
fn get(cx: &impl Query, key: Key) -> Result<Self> {
let ksps = [
cx.open(A::SPEC.keyspace).cf,
cx.open(B::SPEC.keyspace).cf,
cx.open(C::SPEC.keyspace).cf,
];
let mut vec = cx.multi_get(ksps.iter().map(|c| (&*c, key)));
let c = vec.pop().ok_or(StoreError::Missing)??.unwrap();
let b = vec.pop().ok_or(StoreError::Missing)??.unwrap();
let a = vec.pop().ok_or(StoreError::Missing)??.unwrap();
Ok((decode(a)?, decode(b)?, decode(c)?))
}
}
impl<A, B, C, D> super::GetMany for (A, B, C, D)
where
A: Mixin,
B: Mixin,
C: Mixin,
D: Mixin,
{
fn get(cx: &impl Query, key: Key) -> Result<Self> {
let ksps = [
cx.open(A::SPEC.keyspace).cf,
cx.open(B::SPEC.keyspace).cf,
cx.open(C::SPEC.keyspace).cf,
cx.open(D::SPEC.keyspace).cf,
];
let mut vec = cx.multi_get(ksps.iter().map(|c| (&*c, key)));
let d = vec.pop().ok_or(StoreError::Missing)??.unwrap();
let c = vec.pop().ok_or(StoreError::Missing)??.unwrap();
let b = vec.pop().ok_or(StoreError::Missing)??.unwrap();
let a = vec.pop().ok_or(StoreError::Missing)??.unwrap();
Ok((decode(a)?, decode(b)?, decode(c)?, decode(d)?))
}
}
pub fn update<M>(
cx: &(impl Query + Write),
node: Key,
update: impl FnOnce(M) -> M,
) -> Result<()>
where
M: Mixin,
{
// TODO: implement in terms of a merge operator instead of separate query and write ops.
// this would let us remove the `Query` bound, which would in turn let us update from within
// a batch.
//
// See https://github.com/facebook/rocksdb/wiki/Merge-Operator
//
// It looks like rocksdb allows you to specify a merge operator per column family.[^1]
// This means we can construct our column families with a merge operator that knows how to encode and decode mixins.
//
// [^1]: https://github.com/facebook/rocksdb/blob/9d37408f9af15c7a1ae42f9b94d06b27d98a011a/include/rocksdb/options.h#L128
let tree = cx.open(M::SPEC.keyspace);
match tree.get(node.as_ref())? {
None => Err(StoreError::Missing),
Some(buf) => {
let new = decode(buf).map(update).and_then(encode)?;
tree.set(node, new)
}
}
}
pub fn get_mixin<M: Mixin>(cx: &impl Query, node: Key) -> Result<Option<M>> {
cx.open(M::SPEC.keyspace).get(node)?.map(decode).transpose()
}
pub fn add_mixin<M: Mixin>(cx: &impl Write, node: Key, mixin: M) -> Result<()> {
cx.open(M::SPEC.keyspace).set(node, encode(mixin)?)
}
pub fn has_mixin<M: Mixin>(cx: &impl Query, node: Key) -> Result<bool> {
cx.open(M::SPEC.keyspace).has(node)
}
pub fn get_range<M: Mixin>(
cx: &impl Query,
range: impl RangeBounds<DateTime<Utc>>,
) -> impl Iterator<Item = Result<(Key, M)>> + '_ {
// TODO: Test this thoroughly
const MS: TimeDelta = TimeDelta::milliseconds(1);
let iter = match (range.start_bound(), range.end_bound()) {
(Bound::Unbounded, Bound::Unbounded) => Either::Left(cx.open(M::SPEC.keyspace).list()),
(min, max) => {
let lower = match min {
Bound::Unbounded => [u8::MIN; 16],
Bound::Included(inc) => Key::range(*inc).0,
Bound::Excluded(exc) => Key::range(*exc + MS).0,
};
let upper = match max {
Bound::Unbounded => [u8::MAX; 16],
Bound::Included(inc) => Key::range(*inc).1,
Bound::Excluded(exc) => Key::range(*exc - MS).1,
};
Either::Right(cx.open(M::SPEC.keyspace).range(lower, upper))
}
};
iter.bind_results(|(k, v)| {
let key = Key::from_slice(k.as_ref());
let val = decode(v)?;
Ok((key, val))
})
}
pub fn join_on<M>(
cx: &impl Query,
iter: impl IntoIterator<Item = Result<Key>>,
) -> Result<Vec<(Key, Option<M>)>>
where
M: Mixin,
{
let keys: Vec<Key> = iter.into_iter().try_collect()?;
cx.open(M::SPEC.keyspace)
.join(keys.iter())
.into_iter()
.zip(keys)
.map(|(opt, key)| {
let Some(buf) = opt? else {
return Ok((key, None));
};
let val = decode(buf)?;
Ok((key, Some(val)))
})
.try_collect()
}
pub(super) fn encode(data: impl bincode::Encode) -> Result<Vec<u8>> {
bincode::encode_to_vec(data, bincode::config::standard()).map_err(StoreError::Encoding)
}
pub(super) fn decode<T>(data: impl AsRef<[u8]>) -> Result<T>
where
T: bincode::Decode,
{
bincode::decode_from_slice(data.as_ref(), bincode::config::standard())
.map_err(StoreError::Decoding)
.map(|(v, _)| v)
}
}

View file

@ -1,432 +0,0 @@
use std::{collections::HashMap, sync::Arc};
use bincode::{Decode, Encode};
use rocksdb::{BoundColumnFamily, IteratorMode};
use crate::{
arrow::Direction, key::Tag, util::IterExt as _, Alias, Arrow, Backend, Error, Key, Keylike,
Mixin, Result, Store, OK, SPACES,
};
impl Store {
/// Initiate a transaction.
///
/// If the result is an error, the transaction is rolled back, and otherwise the transaction
/// is committed.
pub fn transaction<T>(&self, f: impl FnOnce(&Transaction<'_>) -> Result<T>) -> Result<T> {
// Load all the column family handles, because they can't be accessed through the
// `rocksdb::Transaction` struct, only the `TransactionDB`.
let spaces = SPACES
.into_iter()
.map(|name| (*name, self.inner.cf_handle(name).unwrap()))
.collect();
let tx = Transaction {
inner: self.inner.transaction(),
spaces,
};
let result = f(&tx);
if result.is_err() {
tx.inner.rollback()?;
} else {
tx.inner.commit()?;
}
result
}
/// Check whether a key exists in the registry,
pub fn is_registered(&self, key: Key) -> Result<bool> {
let cf = self
.inner
.cf_handle("registry")
.expect("failed to open registry");
self.inner
.get_pinned_cf(&cf, key)
.map(|opt| opt.is_some())
.map_err(Error::Internal)
}
}
/// A database transaction, in which either each action succeeds, or everything fails
/// together.
///
/// The transaction struct is the interface for quering and manipulating persisted content.
pub struct Transaction<'db> {
inner: rocksdb::Transaction<'db, Backend>,
spaces: HashMap<&'static str, Arc<BoundColumnFamily<'db>>>,
}
/// Methods for manipulating the registry.
///
/// Before you can manipulate a vertex, its needs to be registered.
impl Transaction<'_> {
/// Register a new vertex.
pub fn create_vertex(&self, key: Key, tag: Tag) -> Result<()> {
self.with("registry").set(key, [tag.0])
}
/// Delete a vertex from the registry.
pub fn delete_vertex(&self, key: Key) -> Result<()> {
// TODO: also make this delete all related data?
self.with("registry").del(key)
}
/// Check whether a vertex is registered in the database.
pub fn is_registered(&self, key: Key) -> Result<bool> {
self.with("registry").has(key)
}
}
/// Methods for manipulating mixins.
///
/// For each implementor of [`Mixin`], a vertex can have at most one record of that type
/// associated with it.
impl Transaction<'_> {
/// Query the store for a value associated with the vertex `key` identifies.
///
/// Using a [`Key`] is more efficient than using an alias.
pub fn lookup<M>(&self, key: impl Keylike) -> Result<(Key, M)>
where
M: Mixin,
{
// Checked translate isn't needed, we'll complain if we can't find the data.
let canonicalized_key = key.translate(&self)?;
let raw = self.with(M::SPACE).get(canonicalized_key)?;
let value = decode(raw.as_ref())?;
Ok((canonicalized_key, value))
}
/// Associate a new mixin value with the key.
///
/// # Errors
///
/// - `Error::Conflict` if a mixin of this type is already associated with the vertex
/// - `Error::Undefined` if `key` is not in the registry.
pub fn insert<M>(&self, key: impl Keylike, data: M) -> Result<()>
where
M: Mixin,
{
let key = key.checked_translate(&self)?;
let data = encode(data)?;
let ns = self.with(M::SPACE);
// Check for conflicts. Fail if the key already exists, otherwise set the key
// to the given value.
if ns.has(key)? {
Err(Error::Conflict)
} else {
ns.set(key, data)
}
}
/// Apply an update function to the mixin identified by the key.
///
/// # Errors
///
/// - `Error::Undefined` if the `key` is not registered
/// - `Error::Missing` if `key` does not exist in the keyspace associated with `M`
pub fn update<M>(&self, key: impl Keylike, f: impl FnOnce(Key, M) -> Result<M>) -> Result<()>
where
M: Mixin,
{
let key = key.checked_translate(self)?;
let (key, old) = self.lookup::<M>(key)?;
let new = f(key, old).and_then(encode)?;
self.with(M::SPACE).set(key, new)
}
/// Remove the mixin from the vertex `key` refers to.
///
/// Doesn't complain if the value does not exist in the expected keyspace.
pub fn remove<M>(&self, key: impl Keylike) -> Result<Option<M>>
where
M: Mixin,
{
// Checked translate isn't needed because we don't care if the key is bogus.
let canonical_key = key.translate(self)?;
let ns = self.with(M::SPACE);
match ns.pop(canonical_key) {
Ok(Some(val)) => decode(&val).map(Some),
Ok(None) => Ok(None),
Err(err) => Err(err),
}
}
/// List all key-value pairs for mixins of type `M`.
pub fn list<M>(&self) -> impl Iterator<Item = Result<(Key, M)>> + '_
where
M: Mixin,
{
self.with(M::SPACE).list().bind_results(|(k, v)| {
let v = decode(v.as_ref())?;
let k = Key::from_slice(k.as_ref());
Ok((k, v))
})
}
}
/// Methods for interacting with [aliases][Alias], which are unique alternate keys.
impl Transaction<'_> {
/// Look up the key that the given alias maps to.
///
/// If the key was deleted, but the alias wasn't properly cleaned up,
pub fn lookup_alias<A>(&self, alias: A) -> Result<Key>
where
A: Alias,
{
let (l, _) = A::SPACE;
let raw = self.with(l).get(alias.to_string())?;
Ok(Key::from_slice(raw.as_ref()))
}
/// Create a new alias of type `A` for the given [`Key`].
///
/// If the alias already exists, this function returns `Conflict`.
pub fn insert_alias<A>(&self, key: Key, alias: A) -> Result<()>
where
A: Alias,
{
let (l, r) = A::SPACE;
let alias = alias.to_string();
if self.with(l).has(&alias)? {
return Err(Error::Conflict);
}
self.with(l).set(&alias, key)?;
self.with(r).set(key, &alias)?;
OK
}
/// Delete the alias of type `A` that points to `key`.
pub fn remove_alias<A>(&self, key: Key) -> Result<()>
where
A: Alias,
{
let (l, r) = A::SPACE;
// First, pop the reverse mapping, which will give us the encoded
// key for the normal mapping. If it doesn't exist, don't delete
// the normal mapping.
if let Some(alias) = self.with(r).pop(key)? {
self.with(l).pop(alias)?;
}
OK
}
}
impl Transaction<'_> {
/// Find an arrow of type `A` with the given `tail` and `head`.
pub fn lookup_arrow<A>(&self, (tail, head): (Key, Key)) -> Result<Option<A>>
where
A: Arrow,
{
let (l, _) = A::SPACE;
match self.with(l).get(tail.fuse(head)) {
Ok(raw) => decode(raw.as_ref()).map(Some),
Err(Error::Missing) => Ok(None),
Err(err) => Err(err),
}
}
/// Create a new arrow of type `A` and associate the label with it.
///
/// # Errors
///
/// - `Error::Undefined` if either key is not registered
pub fn insert_arrow<A>(&self, (tail, head): (Key, Key), label: A) -> Result<()>
where
A: Arrow,
{
if !self.is_registered(tail)? {
return Err(Error::Undefined { key: tail });
}
if !self.is_registered(head)? {
return Err(Error::Undefined { key: head });
}
let (l, r) = A::SPACE;
let label = encode(label)?;
self.with(l).set(tail.fuse(head), &label)?;
self.with(r).set(head.fuse(tail), &label)?;
OK
}
/// Delete an arrow from the data store.
pub fn remove_arrow<A>(&self, (tail, head): (Key, Key)) -> Result<()>
where
A: Arrow,
{
self.with(A::SPACE.0).del(tail.fuse(head))?;
self.with(A::SPACE.1).del(head.fuse(tail))?;
OK
}
/// Check whether an arrow exists.
pub fn exists<A>(&self, (tail, head): (Key, Key)) -> Result<bool>
where
A: Arrow,
{
self.with(A::SPACE.0).has(tail.fuse(head))
}
/// Get all arrows of type `A` "pointing at" `key`.
pub fn list_incoming<A>(&self, key: impl Keylike) -> impl Iterator<Item = Result<(Key, A)>> + '_
where
A: Arrow,
{
self.list_arrows_where(Direction::Incoming, key)
}
/// Get all arrows of type `A` "pointing away from" `key`.
pub fn list_outgoing<A>(&self, key: impl Keylike) -> impl Iterator<Item = Result<(Key, A)>> + '_
where
A: Arrow,
{
self.list_arrows_where(Direction::Outgoing, key)
}
/// Get all arrows of type `A`.
pub fn list_arrows<A>(&self) -> impl Iterator<Item = Result<(Key, A, Key)>> + '_
where
A: Arrow,
{
self.with(A::SPACE.0).list().bind_results(|(k, v)| {
let (tail, head) = Key::split(k.as_ref());
decode(v.as_ref()).map(|label| (tail, label, head))
})
}
/// Select arrows with the given direction relative to the given key.
fn list_arrows_where<A>(
&self,
direction: Direction,
key: impl Keylike,
) -> impl Iterator<Item = Result<(Key, A)>> + '_
where
A: Arrow,
{
// Keys in space 0 are arranged with the tail at the start, and the ones in space 1
// are arranged with the head at the start. This allows us to efficiently prefix scan
// regardless of the direction, at the cost of increased space usage.
let space = match direction {
Direction::Outgoing => A::SPACE.0,
Direction::Incoming => A::SPACE.1,
};
let key = key.translate(&self).unwrap();
#[cfg(test)]
eprintln!("scanning {} using prefix {key}", space.0);
self.with(space).scan(key).bind_results(|(k, v)| {
// Because we're prefix scanning on the first half of the key, we only want to
// get the second here.
let (_this, other) = Key::split(k.as_ref());
#[cfg(test)]
eprintln!(" found {_this}:{other}");
decode(v.as_ref()).map(|label| (other, label))
})
}
pub(crate) fn quiver(&self, tag: Tag) -> Quiver<'_> {
Quiver { tag, tx: &self }
}
}
impl Transaction<'_> {
/// Use a keyspace.
fn with(&self, name: impl AsRef<str>) -> Keyspace<'_> {
Keyspace {
cf: self.spaces[name.as_ref()].clone(),
tx: &self,
}
}
}
/// Provides the basic API for a keyspace/column family.
struct Keyspace<'db> {
tx: &'db Transaction<'db>,
cf: Arc<BoundColumnFamily<'db>>,
}
impl<'db> Keyspace<'db> {
/// Retrieve a value from the database. Returns `Missing` if the key does not exist.
fn get(&self, key: impl AsRef<[u8]>) -> Result<impl AsRef<[u8]> + 'db> {
self.tx
.inner
.get_pinned_cf(&self.cf, key)
.map_err(Error::Internal)
.and_then(|opt| opt.ok_or(Error::Missing))
}
/// Set the value at `key` to `val`.
fn set(&self, key: impl AsRef<[u8]>, val: impl AsRef<[u8]>) -> Result<()> {
self.tx
.inner
.put_cf(&self.cf, key, val)
.map_err(Error::Internal)
}
/// Delete the key-value pair identified by `key`.
fn del(&self, key: impl AsRef<[u8]>) -> Result<()> {
self.tx.inner.delete_cf(&self.cf, &key)?;
OK
}
/// Remove the key and associated value from the keyspace, and return its previous value.
fn pop(&self, key: impl AsRef<[u8]>) -> Result<Option<Vec<u8>>> {
let old = self.tx.inner.get_for_update_cf(&self.cf, &key, true)?;
self.del(key)?;
Ok(old)
}
/// Check whether the key exists in the keyspace.
fn has(&self, key: impl AsRef<[u8]>) -> Result<bool> {
self.tx
.inner
.get_pinned_cf(&self.cf, key)
.map_err(Error::Internal)
.map(|opt| opt.is_some())
}
/// Execute a prefix scan.
fn scan(
&self,
prefix: impl AsRef<[u8]> + 'db,
) -> impl Iterator<Item = Result<(impl AsRef<[u8]> + 'static, impl AsRef<[u8]> + 'static)>> + 'db
{
let t = prefix.as_ref().to_vec();
self.tx
.inner
.prefix_iterator_cf(&self.cf, prefix.as_ref())
// The prefix iterator may "overshoot". This makes it stop when it reaches
// the end of the range that has the prefix.
.take_while(move |r| match r {
Ok((ref k, _)) => k.starts_with(&t),
_ => true,
})
.map_err(Error::Internal)
}
/// Show all items in the entire keyspace.
fn list(
&self,
) -> impl Iterator<Item = Result<(impl AsRef<[u8]> + 'static, impl AsRef<[u8]> + 'static)>> + 'db
{
self.tx
.inner
.full_iterator_cf(&self.cf, IteratorMode::Start)
.map_err(Error::Internal)
}
}
/// The quiver allows one to manipulate all parallel edges tagged with a particular type.
pub struct Quiver<'db> {
tx: &'db Transaction<'db>,
tag: Tag,
}
impl<'db> Quiver<'db> {
pub fn insert(&self, origin: Key, target: Key, identity: Key) -> Result<()> {
let fused = origin.fuse(target);
self.tx.with("multi:id-map").set(identity, fused)?;
let mut triple = [0; 48];
triple[..32].copy_from_slice(&fused);
triple[32..].copy_from_slice(identity.as_ref());
self.tx.with("multi:index/l").set(triple, b"")?;
triple[..32].rotate_left(16);
self.tx.with("multi:index/r").set(triple, b"")?;
OK
}
pub fn list_incoming(&self, target: Key) -> impl Iterator<Item = Result<(Key, Key)>> + 'db {
self.tx
.with("multi:index/r")
.scan(target)
.map_ok(|(k, _)| Key::split(&k.as_ref()[16..]))
}
}
fn encode(data: impl Encode) -> Result<Vec<u8>> {
bincode::encode_to_vec(data, bincode::config::standard()).map_err(Error::Encoding)
}
fn decode<T>(data: &[u8]) -> Result<T>
where
T: Decode,
{
bincode::decode_from_slice(data, bincode::config::standard())
.map_err(Error::Decoding)
.map(|(v, _)| v)
}
#[cfg(test)]
mod tests;

View file

@ -1,256 +0,0 @@
use super::*;
use crate::Space;
#[derive(Encode, Decode)]
struct TestArrow;
impl Arrow for TestArrow {
const SPACE: (Space, Space) = (Space("test-arrow/l"), Space("test-arrow/r"));
}
const TEST_TAG: Tag = Tag(69);
macro_rules! keygen {
{ $($name:ident)* } => {
$(
let $name = Key::gen();
eprintln!(concat!(stringify!($name), "={}"), $name);
)*
}
}
fn with_test_arrow(f: impl Fn(Key, Key, &Transaction<'_>, usize) -> Result<()>) -> Result<()> {
Store::with_tmp(|db| {
// Run these tests 128 times because misuse of prefix iterator may cause weird,
// obscure bugs :3
//
// Also, because we don't wipe the store between test runs, we have more chances
// to discover weird bugs that we wouldn't catch if there was only a single run.
Ok(for n in 0..128 {
eprintln!("--- run {n} ---");
db.transaction(|tx| {
keygen!(target origin);
tx.create_vertex(target, TEST_TAG)?;
tx.create_vertex(origin, TEST_TAG)?;
tx.insert_arrow((origin, target), TestArrow)?;
let l: Vec<String> = tx
.with("test-arrow/l")
.list()
.map_ok(|(k, _)| Key::split(k.as_ref()))
.map_ok(|(a, b)| format!("({a}, {b})"))
.try_collect()?;
eprintln!("test-arrow/l = {l:#?}");
let r: Vec<String> = tx
.with("test-arrow/r")
.list()
.map_ok(|(k, _)| Key::split(k.as_ref()))
.map_ok(|(a, b)| format!("({a}, {b})"))
.try_collect()?;
eprintln!("test-arrow/r = {r:#?}");
f(origin, target, &tx, n)
})?;
eprintln!("--- end run {n} ---");
})
})
}
#[test]
fn target_incoming() -> Result<()> {
with_test_arrow(|origin, target, tx, _| {
let ti: Vec<_> = tx.list_incoming::<TestArrow>(target).keys().try_collect()?;
eprintln!("target.incoming = {ti:#?}");
assert!(ti.contains(&origin), "origin ∈ target.incoming");
assert!(!ti.contains(&target), "target ∉ target.incoming");
OK
})
}
#[test]
fn target_outgoing() -> Result<()> {
with_test_arrow(|origin, target, tx, _| {
let to: Vec<_> = tx.list_outgoing::<TestArrow>(target).keys().try_collect()?;
eprintln!("target.outgoing = {to:#?}");
assert!(!to.contains(&target), "target ∉ target.outgoing");
assert!(!to.contains(&origin), "origin ∉ target.outgoing");
OK
})
}
#[test]
fn origin_incoming() -> Result<()> {
with_test_arrow(|origin, target, tx, _| {
let oi: Vec<_> = tx.list_incoming::<TestArrow>(origin).keys().try_collect()?;
eprintln!("origin.incoming = {oi:#?}");
assert!(!oi.contains(&origin), "origin ∉ origin.incoming");
assert!(!oi.contains(&target), "target ∉ origin.incoming");
OK
})
}
#[test]
fn origin_outgoing() -> Result<()> {
with_test_arrow(|origin, target, tx, _| {
let oo: Vec<_> = tx.list_outgoing::<TestArrow>(origin).keys().try_collect()?;
eprintln!("origin.outgoing = {oo:#?}");
assert!(oo.contains(&target), "target ∈ origin.outgoing");
assert!(!oo.contains(&origin), "origin ∉ origin.outgoing");
OK
})
}
#[test]
fn fanout() -> Result<()> {
let targets: [Key; 128] = std::array::from_fn(|_| Key::gen());
let origin = Key::gen();
Store::with_tmp(|db| {
db.transaction(|tx| {
tx.create_vertex(origin, TEST_TAG)?;
for t in targets {
tx.create_vertex(t, TEST_TAG)?;
tx.insert_arrow((origin, t), TestArrow)?;
}
let oo: Vec<_> = tx.list_outgoing::<TestArrow>(origin).keys().try_collect()?;
for t in targets {
assert!(oo.contains(&t), "∀ t ∈ targets: t ∈ origin.outgoing");
let ti: Vec<_> = tx.list_incoming::<TestArrow>(t).keys().try_collect()?;
assert!(
ti == vec! {origin},
"∀ t ∈ targets: t.incoming = {{origin}}"
);
}
OK
})
})
}
#[test]
fn fanin() -> Result<()> {
let origins: [Key; 128] = std::array::from_fn(|_| Key::gen());
let target = Key::gen();
Store::with_tmp(|db| {
db.transaction(|tx| {
tx.create_vertex(target, TEST_TAG)?;
for o in origins {
tx.create_vertex(o, TEST_TAG)?;
tx.insert_arrow((o, target), TestArrow)?;
}
let ti: Vec<_> = tx.list_incoming::<TestArrow>(target).keys().try_collect()?;
for o in origins {
let oo: Vec<_> = tx.list_outgoing::<TestArrow>(o).keys().try_collect()?;
assert!(ti.contains(&o), "∀ o ∈ origins: o ∈ target.incoming");
assert!(
oo == vec! {target},
"∀ o ∈ origins: o.outgoing = {{target}}"
);
}
OK
})
})
}
#[test]
fn distinct_many_to_many() -> Result<()> {
let origins: [Key; 32] = std::array::from_fn(|_| Key::gen());
let targets: [Key; 32] = std::array::from_fn(|_| Key::gen());
Store::with_tmp(|db| {
db.transaction(|tx| {
for t in targets {
tx.create_vertex(t, TEST_TAG)?;
}
for o in origins {
tx.create_vertex(o, TEST_TAG)?;
for t in targets {
tx.insert_arrow((o, t), TestArrow)?;
}
}
let ti: HashMap<Key, Vec<Key>> = targets
.into_iter()
.map(|t| {
tx.list_incoming::<TestArrow>(t)
.keys()
.try_collect()
.map(|v: Vec<_>| (t, v))
})
.collect::<Result<_>>()?;
// For each origin point, there must be a target that has it as "incoming".
assert!(
origins
.into_iter()
.all(|o| { targets.into_iter().any(|t| { ti[&t].contains(&o) }) }),
"∀ o ∈ origins: ∃ t ∈ targets: o ∈ t.incoming"
);
// Each target has each origin as incoming.
assert!(
origins
.into_iter()
.all(|o| { targets.into_iter().all(|t| { ti[&t].contains(&o) }) }),
"∀ o ∈ origins: ∀ t ∈ targets: o ∈ t.incoming"
);
let to: HashMap<Key, Vec<Key>> = targets
.into_iter()
.map(|t| {
tx.list_outgoing::<TestArrow>(t)
.keys()
.try_collect()
.map(|v: Vec<_>| (t, v))
})
.collect::<Result<_>>()?;
// Our arrows point only from origins to targets, and there's a bug if there
// exists a target such that its outgoing set is non-empty.
assert!(
!targets.into_iter().any(|t| !to[&t].is_empty()),
"∄ t ∈ targets: t.outgoing ≠ ∅"
);
let oo: HashMap<Key, Vec<Key>> = origins
.into_iter()
.map(|o| {
tx.list_outgoing::<TestArrow>(o)
.keys()
.try_collect()
.map(|v: Vec<_>| (o, v))
})
.collect::<Result<_>>()?;
// Each origin has each target as outgoing.
assert!(
origins
.into_iter()
.all(|o| targets.into_iter().all(|t| oo[&o].contains(&t))),
"∀ o ∈ origins: ∀ t ∈ targets: t ∈ o.outgoing"
);
OK
})
})
}

178
lib/store/src/types.rs Normal file
View file

@ -0,0 +1,178 @@
//! Defining a [`Schema`].
//!
//! There is a lot of complicated machinery here to make it so that you have to write very little code to
//! define new types. Basically, if you want to define a thing to store, you need to implement the trait
//! for it (e.g. [`Arrow`]), and also implement [`DataType`], where you create a specification describing which
//! namespaces store records of that type.
//!
//! Then, when you construct a new `Store`, you need to pass in a [`Schema`], or the database won't be able
//! to operate on the types.
//!
//! [`Arrow`]: super::Arrow
use std::collections::HashSet;
use derive_more::Display;
/// The namespace where all vertices must be registered.
pub(crate) const NODE_HEADERS: Keyspace = Keyspace("header:node");
/// The namespace where multiedge identities are mapped to endpoints.
pub(crate) const MULTIEDGE_HEADERS: Keyspace = Keyspace("header:multiedge");
/// A specification of all user-defined namespaces.
///
/// # Example
///
/// The below example correctly defines a [basic arrow] and demonstrates its use by inserting one and then
/// testing whether it exists. If the appropriate keyspaces are not known to the store, this will panic.
///
/// ```rust
/// use store::{ arrow::Arrow, types::Schema, Store, Key, OK };
///
/// // Each kind of value has a derive macro.
/// #[derive(Arrow)]
/// struct MyArrow { origin: Key, target: Key }
///
/// fn main () -> store::Result<()> {
/// // Here, we make sure that the namespaces used for `MyArrow` are known.
/// let schema = Schema::new()
/// .has::<MyArrow>();
///
/// let result = Store::test(schema, |db| {
/// let origin = Key::gen();
/// let target = Key::gen();
///
/// let mut changes = db.batch();
/// changes.create(MyArrow { origin, target });
/// db.apply(changes)?;
///
/// db.exists::<MyArrow>(origin, target)
/// })?;
///
/// assert!(result);
/// OK
/// }
/// ```
///
/// [basic arrow]: crate::arrow::Basic
pub struct Schema(pub(crate) HashSet<Keyspace>);
impl Schema {
/// Construct a new empty schema.
pub fn new() -> Schema {
Schema(HashSet::from_iter([NODE_HEADERS, MULTIEDGE_HEADERS]))
}
/// Add the component to the schema.
pub fn has<C>(mut self) -> Schema
where
C: DataType,
{
self.add(C::SPEC);
self
}
/// Add a spec to the schema by mutable reference.
pub fn add(&mut self, spec: impl TypeSpec) -> &mut Schema {
spec.register(&mut self.0);
self
}
}
/// The name of a keyspace.
///
/// Specifically, this is the name of a RocksDB column family.
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Display)]
pub struct Keyspace(pub &'static str);
impl AsRef<str> for Keyspace {
fn as_ref(&self) -> &str {
self.0
}
}
/// A *thing* that is stored in the database, either an [arrow](ArrowSpec), an [alias](AliasSpec), or a
/// [mixin](MixinSpec).
///
/// All namespaces must be unique, and added to the [`Schema`].
pub trait DataType {
type Type: TypeSpec;
const SPEC: Self::Type;
}
/// The specification for an [`Arrow`](crate::Arrow).
///
/// The listed namespaces must be unique among all other namespaces.
#[derive(Clone, Copy)]
pub struct ArrowSpec {
/// The keyspace where edge keys are ordered `(origin, target)`.
pub by_origin: Keyspace,
/// The keyspace where edge keys are ordered `(target, origin)`.
pub by_target: Keyspace,
}
/// A specification for the namespaces needed to store an [`Alias`][crate::Alias].
#[derive(Clone, Copy)]
pub struct AliasSpec {
/// The alias -> key mapping table.
pub keyspace: Keyspace,
/// The key -> alias mapping table.
pub reversed: Keyspace,
}
/// Where do we store a mixin?
#[derive(Clone, Copy)]
pub struct MixinSpec {
/// The key -> mixin mapping table.
pub keyspace: Keyspace,
}
/// Describes how to add a [`DataType`] to a [`Schema`].
pub trait TypeSpec {
/// Register the namespaces.
fn register(&self, set: &mut HashSet<Keyspace>);
}
// TODO: better error messages.
impl TypeSpec for ArrowSpec {
fn register(&self, set: &mut HashSet<Keyspace>) {
if !set.insert(self.by_origin) {
panic! {
"Duplicate found while inserting Arrow::BY_ORIGIN: {}",
self.by_origin
}
}
if !set.insert(self.by_target) {
panic! {
"Duplicate found while inserting Arrow::BY_TARGET: {}",
self.by_target
}
}
}
}
impl TypeSpec for AliasSpec {
fn register(&self, set: &mut HashSet<Keyspace>) {
if !set.insert(self.keyspace) {
panic! {
"Duplicate found while inserting Alias::KEYSPACE: {}",
self.keyspace
}
}
if !set.insert(self.reversed) {
panic! {
"Duplicate found while inserting Alias::REVERSED: {}",
self.reversed
}
}
}
}
impl TypeSpec for MixinSpec {
fn register(&self, set: &mut HashSet<Keyspace>) {
if !set.insert(self.keyspace) {
panic! {
"Duplicate found while inserting Mixin::KEYSPACE: {}",
self.keyspace
}
}
}
}

View file

@ -46,6 +46,36 @@ pub trait IterExt: Iterator + Sized {
{
self.next().ok_or(e)?
}
/// `filter_map` meets `and_then`.
fn filter_bind_results<'a, I, O, E>(
self,
mut f: impl FnMut(I) -> Result<Option<O>, E> + 'a,
) -> impl Iterator<Item = Result<O, E>> + 'a
where
Self: Iterator<Item = Result<I, E>> + 'a,
{
self.filter_map(move |r| r.and_then(|x| f(x)).transpose())
}
/// Like [`Iterator::find`].
fn find_ok<'a, I, E>(mut self, mut f: impl FnMut(&I) -> bool) -> Result<Option<I>, E>
where
Self: Iterator<Item = Result<I, E>> + 'a,
{
self.find(move |r| r.as_ref().is_ok_and(|x| f(x)))
.transpose()
}
}
impl<I> IterExt for I where I: Iterator {}
/// Curried function for creating a tuple, where the first argument is the first tuple element, and the next
/// argument is the second tuple element.
pub fn key<K, V>(key: K) -> impl FnOnce(V) -> (K, V) {
move |val| (key, val)
}
/// Curried function for creating a tuple, where the first argument is the first tuple element, and the next
/// argument is the second tuple element.
pub fn val<K, V>(val: V) -> impl FnOnce(K) -> (K, V) {
move |key| (key, val)
}

View file

@ -1,4 +1,6 @@
unstable_features = true
overflow_delimited_expr = true
group_imports = "StdExternalCrate"
use_field_init_shorthand = true
reorder_modules = false
reorder_imports = false
struct_lit_width = 30