From cccc7bbfeff0586fadc44ebf4cbbd2f1c8e9974e Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Thu, 1 Aug 2024 15:27:33 +0100 Subject: [PATCH 01/56] Typos/formatting --- src/Equinox.CosmosStore.Prometheus/CosmosStorePrometheus.fs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Equinox.CosmosStore.Prometheus/CosmosStorePrometheus.fs b/src/Equinox.CosmosStore.Prometheus/CosmosStorePrometheus.fs index 2808bbde3..7eb31da95 100644 --- a/src/Equinox.CosmosStore.Prometheus/CosmosStorePrometheus.fs +++ b/src/Equinox.CosmosStore.Prometheus/CosmosStorePrometheus.fs @@ -112,6 +112,7 @@ type LogSink(customTags: seq) = | Op (Operation.Tip304, m) -> observeTip ("R", "query", "tip", "ok", "304") m | Op (Operation.Query, m) -> observe ("R", "query", "query", "ok") m | QueryRes (_direction, m) -> observeRes ("R", "query", "queryPage") m + | Op (Operation.Index, m) -> observe ("R", "linq", "query", "ok") m | Op (Operation.Write, m) -> observe ("W", "transact", "sync", "ok") m | Op (Operation.Conflict, m) -> observe ("W", "transact", "conflict", "conflict") m | Op (Operation.Resync, m) -> observe ("W", "transact", "resync", "conflict") m From 5893044bc97b8016fa07ad528bfb86d1e46df9f6 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Thu, 28 Mar 2024 00:30:16 +0000 Subject: [PATCH 02/56] feat(CosmosStore.Linq): Indexed Queries --- README.md | 4 +- samples/Store/Integration/LogIntegration.fs | 1 + src/Equinox.CosmosStore/CosmosStore.fs | 11 +- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 157 ++++++++++++++++++ .../Equinox.CosmosStore.fsproj | 1 + .../CosmosFixturesInfrastructure.fs | 3 + tools/Equinox.Tool/Program.fs | 18 +- 7 files changed, 184 insertions(+), 11 deletions(-) create mode 100644 src/Equinox.CosmosStore/CosmosStoreLinq.fs diff --git a/README.md b/README.md index 0a70f4e82..fb33ef2cc 100644 --- a/README.md +++ b/README.md @@ -390,7 +390,7 @@ While Equinox is implemented in F#, and F# is a great fit for writing event-sour # use a wild card (LIKE) for the stream name eqx query -cl '$Us%' -un Snapshotted cosmos -d db -c $EQUINOX_COSMOS_VIEWS -b 100000 - # > Querying Default: SELECT c.u, c.p, c._etag FROM c WHERE c.p LIKE "$Us%" AND EXISTS (SELECT VALUE u FROM u IN c.u WHERE u.c = "Snapshotted") {} + # > Querying Default: SELECT c._etag, c.u[0].d, c.p FROM c WHERE c.p LIKE "$Us%" AND EXISTS (SELECT VALUE u FROM u IN c.u WHERE u.c = "Snapshotted") {} # > Page 7166s, 7166u, 0e 320.58RU 3.9s {} # > Page 1608s, 1608u, 0e 68.59RU 0.9s {} # > TOTALS 1c, 8774s, 389.17RU 4.7s {} @@ -403,7 +403,7 @@ While Equinox is implemented in F#, and F# is a great fit for writing event-sour # add criteria filtering based on an Uncompressed Unfold eqx query -cn '$User' -un EmailIndex -uc 'u.d.email = "a@b.com"' cosmos -d db -c $EQUINOX_COSMOS_VIEWS -b 100000 - # > Querying Default: SELECT c.u, c.p, c._etag FROM c WHERE c.p LIKE "$User-%" AND EXISTS (SELECT VALUE u FROM u IN c.u WHERE u.c = "EmailIndex" AND u.d.email = "a@b.com") {} + # > Querying Default: SELECT c._etag, c.u[0].d, c.p FROM c WHERE c.p LIKE "$User-%" AND EXISTS (SELECT VALUE u FROM u IN c.u WHERE u.c = "EmailIndex" AND u.d.email = "a@b.com") {} # > Page 0s, 0u, 0e 2.8RU 0.7s {} # > TOTALS 0c, 0s, 2.80RU 0.7s {} # 👈 only 2.8RU if nothing is returned diff --git a/samples/Store/Integration/LogIntegration.fs b/samples/Store/Integration/LogIntegration.fs index 4adbab382..a791db975 100644 --- a/samples/Store/Integration/LogIntegration.fs +++ b/samples/Store/Integration/LogIntegration.fs @@ -38,6 +38,7 @@ module EquinoxCosmosInterop = | Log.Metric.Query (Direction.Backward,c,m) -> "CosmosQueryB", m, Some c, m.ru | Log.Metric.QueryResponse (Direction.Forward,m) -> "CosmosResponseF", m, None, m.ru | Log.Metric.QueryResponse (Direction.Backward,m) -> "CosmosResponseB", m, None, m.ru + | Log.Metric.Index m -> "CosmosLinq", m, None, m.ru | Log.Metric.SyncSuccess m -> "CosmosSync200", m, None, m.ru | Log.Metric.SyncConflict m -> "CosmosSync409", m, None, m.ru | Log.Metric.SyncResync m -> "CosmosSyncResync", m, None, m.ru diff --git a/src/Equinox.CosmosStore/CosmosStore.fs b/src/Equinox.CosmosStore/CosmosStore.fs index abc42dd8f..18ed7bf13 100644 --- a/src/Equinox.CosmosStore/CosmosStore.fs +++ b/src/Equinox.CosmosStore/CosmosStore.fs @@ -228,6 +228,8 @@ module Log = | Delete of Measurement /// Trimmed the Tip | Trim of Measurement + /// Queried via the Index + | Index of Measurement let [] (|MetricEvent|_|) (logEvent: Serilog.Events.LogEvent): Metric voption = let mutable p = Unchecked.defaultof<_> logEvent.Properties.TryGetValue(PropertyTag, &p) |> ignore @@ -256,7 +258,7 @@ module Log = let internal eventLen (x: #IEventData<_>) = EncodedBody.jsonUtf8Bytes x.Data + EncodedBody.jsonUtf8Bytes x.Meta + 80 let internal batchLen = Seq.sumBy eventLen [] - type Operation = Tip | Tip404 | Tip304 | Query | Write | Resync | Conflict | Prune | Delete | Trim + type Operation = Tip | Tip404 | Tip304 | Query | Index | Write | Resync | Conflict | Prune | Delete | Trim let (|Op|QueryRes|PruneRes|) = function | Metric.Tip s -> Op (Operation.Tip, s) | Metric.TipNotFound s -> Op (Operation.Tip404, s) @@ -265,6 +267,8 @@ module Log = | Metric.Query (_, _, s) -> Op (Operation.Query, s) | Metric.QueryResponse (direction, s) -> QueryRes (direction, s) + | Metric.Index s -> Op (Operation.Index, s) + | Metric.SyncSuccess s -> Op (Operation.Write, s) | Metric.SyncResync s -> Op (Operation.Resync, s) | Metric.SyncConflict s -> Op (Operation.Conflict, s) @@ -299,6 +303,7 @@ module Log = let epoch = System.Diagnostics.Stopwatch.StartNew() member val internal Tip = Counters() with get, set member val internal Read = Counters() with get, set + member val internal Index = Counters() with get, set member val internal Write = Counters() with get, set member val internal Resync = Counters() with get, set member val internal Conflict = Counters() with get, set @@ -326,6 +331,7 @@ module Log = epoch.Tip.Ingest m | Op (Operation.Query, BucketMsRu m) -> epoch.Read.Ingest m | QueryRes (_direction, _) -> () + | Op (Operation.Index, BucketMsRu m) -> epoch.Index.Ingest m | Op (Operation.Write, BucketMsRu m) -> epoch.Write.Ingest m | Op (Operation.Conflict, BucketMsRu m) -> epoch.Conflict.Ingest m | Op (Operation.Resync, BucketMsRu m) -> epoch.Resync.Ingest m @@ -342,13 +348,14 @@ module Log = let stats = [|nameof res.Tip, res.Tip nameof res.Read, res.Read + nameof res.Index, res.Index nameof res.Write, res.Write nameof res.Resync, res.Resync nameof res.Conflict, res.Conflict nameof res.Prune, res.Prune nameof res.Delete, res.Delete nameof res.Trim, res.Trim |] - let isRead = function nameof res.Tip | nameof res.Read | nameof res.Prune -> true | _ -> false + let isRead = function nameof res.Tip | nameof res.Read | nameof res.Index | nameof res.Prune -> true | _ -> false let buckets = stats |> Seq.collect (fun (_n, stat) -> stat.Buckets) |> Seq.distinct |> Seq.sort |> Seq.toArray if Array.isEmpty buckets then () else diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs new file mode 100644 index 000000000..108ba5854 --- /dev/null +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -0,0 +1,157 @@ +namespace Equinox.CosmosStore.Linq + +open FSharp.Control +open Serilog +open System +open System.Linq +open System.Linq.Expressions + +module Internal = + + open Microsoft.Azure.Cosmos + let private taskEnum (iterator: FeedIterator<'T>) = taskSeq { + while iterator.HasMoreResults do + let! response = iterator.ReadNextAsync() + let m = response.Diagnostics.GetQueryMetrics().CumulativeMetrics + yield struct (response.Diagnostics.GetClientElapsedTime(), response.RequestCharge, response.Resource, + int m.RetrievedDocumentCount, int m.RetrievedDocumentSize, int m.OutputDocumentSize) } + let inline miB x = float x / 1024. / 1024. + let enum<'T, 'R> (desc: string) (container: Container) (parse: 'T -> 'R) (queryDefinition: QueryDefinition) = taskSeq { + if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.enum {desc} {query}", desc, queryDefinition.QueryText) + let sw = System.Diagnostics.Stopwatch.StartNew() + let iterator = container.GetItemQueryIterator<'T>(queryDefinition) + let mutable responses, items, totalRtt, totalRu, totalRdc, totalRds, totalOds = 0, 0, TimeSpan.Zero, 0., 0, 0, 0 + try for rtt, rc, response, rdc, rds, ods in taskEnum iterator do + responses <- responses + 1 + totalRdc <- totalRdc + rdc + totalRds <- totalRds + rds + totalOds <- totalOds + ods + totalRu <- totalRu + rc + totalRtt <- totalRtt + rtt + for item in response do + items <- items + 1 + yield parse item + finally Log.Information("CosmosStoreQuery.enum {desc} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", + desc, items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, sw.ElapsedMilliseconds) } + + open Microsoft.Azure.Cosmos.Linq + let tryScalar<'T, 'R> desc container (query: IQueryable<'T>) (parse: 'T -> 'R): Async<'R option> = + query.Take(1).ToQueryDefinition() |> enum<'T, 'R> desc container parse |> TaskSeq.tryHead |> Async.AwaitTask + let page<'T0, 'T1, 'R> desc container (pageSize: int) pageIndex (query: IQueryable<'T0>) (parse: 'T1 -> 'R): Async<'R[]> = + query.Skip(pageIndex * pageSize).Take(pageSize).ToQueryDefinition() |> enum desc container parse |> TaskSeq.toArrayAsync |> Async.AwaitTask + let count (desc: string) (query: IQueryable<'T>): System.Threading.Tasks.Task = task { + if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.count {desc} {query}", desc, query.ToQueryDefinition().QueryText) + let sw = System.Diagnostics.Stopwatch.StartNew() + let! (r: Response) = query.CountAsync() + let m = r.Diagnostics.GetQueryMetrics().CumulativeMetrics + Log.Information("CosmosStoreQuery.count {desc} {count} ({rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", + desc, r.Resource, m.RetrievedDocumentCount, miB m.RetrievedDocumentSize, miB m.OutputDocumentSize, r.RequestCharge, sw.ElapsedMilliseconds) + return r.Resource } + +module Expressions = + + module Expression = + let replace find replace = + { new ExpressionVisitor() with + override _.Visit node = + if node = find then replace + else base.Visit node } + // https://stackoverflow.com/a/8829845/11635 + let compose (selector: Expression>) (predicate: Expression>) = + let param = Expression.Parameter(typeof<'T>, "x") + let prop = (replace selector.Parameters[0] param).Visit(selector.Body) + let body = (replace predicate.Parameters[0] prop).Visit(predicate.Body) + Expression.Lambda>(body, param) + + type IQueryable<'T> with + member source.OrderBy(indexSelector: Expression>, propertyName: string, descending) = + let indexSortProperty = Expression.PropertyOrField(indexSelector.Body, propertyName) + let keySelector = Expression.Lambda(indexSortProperty, indexSelector.Parameters[0]) + let call = Expression.Call( + typeof, + (if descending then "OrderByDescending" else "OrderBy"), + [| typeof<'T>; indexSortProperty.Type |], + source.Expression, + keySelector) + source.Provider.CreateQuery<'T>(call) + +type Query<'T, 'P0, 'P1, 'R>(container, description, query: IQueryable<'T>, render: Expressions.Expression>, hydrate: 'P1 -> 'R) = + member _.CountAsync() = query |> Internal.count description + member _.HydratePage(pageSize, pageIndex): Async<'R[]> = + let items = query.Select(render) + Internal.page<'P0, 'P1, 'R> description container pageSize pageIndex items hydrate + member x.ItemsAndCount(pageSize, pageIndex) = async { + let countQ = x.CountAsync() // start in parallel + let! items = x.HydratePage(pageSize, pageIndex) + let! count = countQ |> Async.AwaitTask + return items, count } + +module Index = + + [] + type Item<'I> = + { p: string + _etag: string + u: Unfold<'I> ResizeArray } + and [] + Unfold<'I> = + { c: string + d: 'I } + + let inline prefix categoryName = $"%s{categoryName}-" + let queryCategory<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName: IQueryable> = + let prefix = prefix categoryName + container.GetItemLinqQueryable>().Where(fun d -> d.p.StartsWith(prefix)) + let queryIndex<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName caseName: IQueryable> = + let prefix = prefix categoryName + container.GetItemLinqQueryable>().Where(fun d -> d.p.StartsWith(prefix) && d.u[0].c = caseName) + + let tryStreamName desc container (query: IQueryable>): Async = + Internal.tryScalar desc container (query.Select(fun x -> x.p)) FsCodec.StreamName.Internal.trust + + // We want to generate a projection statement of the shape: VALUE {"sn": root["p"], "snap": root["u"][0].["d"]} + // However the Cosmos SDK does not support F# (or C#) records yet https://github.com/Azure/azure-cosmos-dotnet-v3/issues/3728 + // F#'s LINQ support cannot translate parameterless constructor invocations in a Lambda well; + // the best native workaround without Expression Manipulation is/was https://stackoverflow.com/a/78206722/11635 + // In C#, you can generate an Expression that works with the Cosmos SDK via `.Select(x => new { sn = x.p, snap = x.u[0].d })` + // This hack is based on https://stackoverflow.com/a/73506241/11635 + type SnAndSnap<'I>() = + member val sn: FsCodec.StreamName = Unchecked.defaultof<_> with get, set + [)>] + member val snap: 'I = Unchecked.defaultof<_> with get, set + static member FromIndexQuery(snapExpression: Expression, 'I>>) = + let param = Expression.Parameter(typeof>, "x") + let targetType = typeof> + let snMember = targetType.GetMember(nameof Unchecked.defaultof>.sn)[0] + let snapMember = targetType.GetMember(nameof Unchecked.defaultof>.snap)[0] + Expression.Lambda, SnAndSnap<'I>>>( + Expression.MemberInit( + Expression.New(targetType.GetConstructor [||]), + [| Expression.Bind(snMember, Expression.PropertyOrField(param, nameof Unchecked.defaultof>.p)) :> MemberBinding + Expression.Bind(snapMember, (Expressions.Expression.replace snapExpression.Parameters[0] param).Visit(snapExpression.Body)) |]), + [| param |]) + +/// Enables querying based on an Index stored +[] +type IndexContext<'I>(container, categoryName, caseName) = + + member val Description = $"{categoryName}/{caseName}" with get, set + + /// Fetches a base Queryable that's filtered based on the `categoryName` and `caseName` + /// NOTE this is relatively expensive to compute a Count on, compared to `CategoryQueryable` + member _.Queryable(): IQueryable> = Index.queryIndex<'I> container categoryName caseName + + /// Fetches a base Queryable that's filtered only on the `categoryName` + member _.CategoryQueryable(): IQueryable> = Index.queryCategory<'I> container categoryName + + /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria + member x.TryStreamNameWhere(criteria: Expression, bool>>): Async = + Index.tryStreamName x.Description container (x.Queryable().Where(criteria)) + + /// Query the items, `Select()`ing as type `P` per the `render` function. Items are parsed from the results via the `hydrate` function + member x.Query<'P0, 'P1, 'R>(query: IQueryable>, render: Expression, 'P0>>, hydrate: 'P1 -> 'R) = + Query, 'P0, 'P1, 'R>(container, x.Description, query, render, hydrate) + + /// Query the items, grabbing the Stream name and the Snapshot; The StreamName and the (Decompressed if applicable) Snapshot are passed to `hydrate` + member x.QueryStreamNameAndSnapshot(query, renderSnapshot, hydrate) = + x.Query, Index.SnAndSnap, 'R>(query, Index.SnAndSnap<'I>.FromIndexQuery renderSnapshot, hydrate) diff --git a/src/Equinox.CosmosStore/Equinox.CosmosStore.fsproj b/src/Equinox.CosmosStore/Equinox.CosmosStore.fsproj index e3848e9e5..b57cea590 100644 --- a/src/Equinox.CosmosStore/Equinox.CosmosStore.fsproj +++ b/src/Equinox.CosmosStore/Equinox.CosmosStore.fsproj @@ -13,6 +13,7 @@ + diff --git a/tests/Equinox.CosmosStore.Integration/CosmosFixturesInfrastructure.fs b/tests/Equinox.CosmosStore.Integration/CosmosFixturesInfrastructure.fs index cdf23059d..eefeba99a 100644 --- a/tests/Equinox.CosmosStore.Integration/CosmosFixturesInfrastructure.fs +++ b/tests/Equinox.CosmosStore.Integration/CosmosFixturesInfrastructure.fs @@ -33,6 +33,7 @@ module SerilogHelpers = type EqxAct = | Tip | TipNotFound | TipNotModified | ResponseForward | ResponseBackward + | Index | QueryForward | QueryBackward | Append | Resync | Conflict | PruneResponse | Delete | Trim | Prune @@ -50,6 +51,7 @@ module SerilogHelpers = | Metric.SyncAppend _ | Metric.SyncCalve _ -> EqxAct.Append | Metric.SyncAppendConflict _ | Metric.SyncCalveConflict _ -> EqxAct.Conflict #else + | Metric.Index _ -> EqxAct.Index | Metric.SyncSuccess _ -> EqxAct.Append | Metric.SyncResync _ -> EqxAct.Resync | Metric.SyncConflict _ -> EqxAct.Conflict @@ -75,6 +77,7 @@ module SerilogHelpers = | Metric.SyncAppend s | Metric.SyncCalve s | Metric.SyncCalveConflict s | Metric.SyncAppendConflict s -> Write s #else + | Metric.Index s -> Response s // Stubbed out for now | Metric.SyncSuccess s | Metric.SyncConflict s -> Write s | Metric.SyncResync s -> Resync s diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 64d15e589..cd0513142 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -136,15 +136,17 @@ and [] QueryParameters = | CategoryLike _ -> "Specify category name to match against `p` as a Cosmos LIKE expression (with `%` as wildcard, e.g. `$UserServices-%`)." | UnfoldName _ -> "Specify unfold Name to match against `u.c`, e.g. `Snapshotted`" | UnfoldCriteria _ -> "Specify constraints on Unfold (reference unfold fields via `u.d.`, top level fields via `c.`), e.g. `u.d.name = \"TenantName1\"`." - | Mode _ -> "readOnly: Only read `u`nfolds, not `_etag`.\n" + - "readWithStream: Read `u`nfolds and `p` (stream name), but not `_etag`.\n" + - "default: Retrieve full data (p, u, _etag). <- Default for normal queries\n" + + | Mode _ -> "default: `_etag` plus snapwithStream (_etag, p, u[0].d). <- Default for normal queries\n" + + "snaponly: Only read `u[0].d`\n" + + "snapwithstream: Read `u[0].d` and `p` (stream name), but not `_etag`.\n" + + "readonly: Only read `u`nfolds, not `_etag`.\n" + + "readwithstream: Read `u`nfolds and `p` (stream name), but not `_etag`.\n" + "raw: Read all Items(documents) in full. <- Default when Output File specified\n" | File _ -> "Export full retrieved JSON to file. NOTE this switches the default mode to `Raw`" | Pretty -> "Render the JSON indented over multiple lines" | Console -> "Also emit the JSON to the console. Default: Gather statistics (but only write to a File if specified)" | Cosmos _ -> "Parameters for CosmosDB." -and [] Mode = ReadOnly | ReadWithStream | Default | Raw +and [] Mode = Default | SnapOnly | SnapWithStream | ReadOnly | ReadWithStream | Raw and [] Criteria = SingleStream of string | CatName of string | CatLike of string | Unfiltered and QueryArguments(p: ParseResults) = member val Mode = p.GetResult(QueryParameters.Mode, if p.Contains QueryParameters.File then Mode.Raw else Mode.Default) @@ -353,7 +355,7 @@ let prettySerdes = lazy FsCodec.SystemTextJson.Serdes(FsCodec.SystemTextJson.Opt module CosmosQuery = - let inline miB x = float x / 1024. / 1024. + let inline miB x = Equinox.CosmosStore.Linq.Internal.miB x let private unixEpoch = DateTime.UnixEpoch type System.Text.Json.JsonElement with member x.Utf8ByteCount = if x.ValueKind = System.Text.Json.JsonValueKind.Null then 0 else x.GetRawText() |> System.Text.Encoding.UTF8.GetByteCount @@ -374,9 +376,11 @@ module CosmosQuery = | Criteria.Unfiltered -> warnOnUnfiltered (); "1=1" let selectedFields = match a.Mode with - | Mode.ReadOnly -> "c.u" + | Mode.Default -> "c._etag, c.p, c.u[0].d" + | Mode.SnapOnly -> "c.u[0].d" + | Mode.SnapWithStream -> "c.p, c.u[0].d" + | Mode.ReadOnly -> "c.p, c.u" | Mode.ReadWithStream -> "c.p, c.u" - | Mode.Default -> "c.p, c.u, c._etag" | Mode.Raw -> "*" let unfoldFilter = let exists cond = $"EXISTS (SELECT VALUE u FROM u IN c.u WHERE {cond})" From 4e7a42879f76e7fc05b67675a16e0a3b96cc3da1 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Thu, 28 Mar 2024 01:06:26 +0000 Subject: [PATCH 03/56] CL --- CHANGELOG.md | 1 + tools/Equinox.Tool/Program.fs | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0cbe982f..6a5f5afe7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The `Unreleased` section name is replaced by the expected version of next releas - `Equinox.CosmosStore.EventsContext.Sync`: Support syncing of unfolds [#460](https://github.com/jet/equinox/pull/460) - `eqx stats`: `-O`, `-N` flags extract oldest and newest `_ts` within a store [#459](https://github.com/jet/equinox/pull/459) - `eqx`: `-Q` flag omits timestamps from console output logging [#459](https://github.com/jet/equinox/pull/459) +- `Equinox.CosmosStore.Linq`: Add LINQ querying support for Indexed `u`nfolds (`AccessStrategy.Custom`+`CosmosStoreCategory.shouldCompress`) [#450](https://github.com/jet/equinox/pull/450) ### Changed diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index cd0513142..705e6cf03 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -370,18 +370,18 @@ module CosmosQuery = Log.Write(lel, "No StreamName or CategoryName/CategoryLike specified - Unfold Criteria better be unambiguous") let partitionKeyCriteria = match a.Criteria with - | Criteria.SingleStream sn -> $"c.p = \"{sn}\"" - | Criteria.CatName n -> $"c.p LIKE \"{n}-%%\"" - | Criteria.CatLike pat -> $"c.p LIKE \"{pat}\"" - | Criteria.Unfiltered -> warnOnUnfiltered (); "1=1" + | Criteria.SingleStream sn -> $"c.p = \"{sn}\"" + | Criteria.CatName n -> $"c.p LIKE \"{n}-%%\"" + | Criteria.CatLike pat -> $"c.p LIKE \"{pat}\"" + | Criteria.Unfiltered -> warnOnUnfiltered (); "1=1" let selectedFields = match a.Mode with - | Mode.Default -> "c._etag, c.p, c.u[0].d" - | Mode.SnapOnly -> "c.u[0].d" - | Mode.SnapWithStream -> "c.p, c.u[0].d" - | Mode.ReadOnly -> "c.p, c.u" - | Mode.ReadWithStream -> "c.p, c.u" - | Mode.Raw -> "*" + | Mode.Default -> "c._etag, c.p, c.u[0].d" + | Mode.SnapOnly -> "c.u[0].d" + | Mode.SnapWithStream -> "c.p, c.u[0].d" + | Mode.ReadOnly -> "c.u" // TOCONSIDER remove; adjust TryLoad/TryHydrateTip + | Mode.ReadWithStream -> "c.p, c.u" // TOCONSIDER remove; adjust TryLoad/TryHydrateTip + | Mode.Raw -> "*" let unfoldFilter = let exists cond = $"EXISTS (SELECT VALUE u FROM u IN c.u WHERE {cond})" match [| match a.UnfoldName with None -> () | Some un -> $"u.c = \"{un}\"" From 1e48f21ffa8ba928ba18e8693d971de6ef48d221 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Thu, 28 Mar 2024 01:12:08 +0000 Subject: [PATCH 04/56] Dependency tweaks --- .../Equinox.CosmosStore.Prometheus.fsproj | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Equinox.CosmosStore.Prometheus/Equinox.CosmosStore.Prometheus.fsproj b/src/Equinox.CosmosStore.Prometheus/Equinox.CosmosStore.Prometheus.fsproj index 057fa2ab3..28eacfe32 100644 --- a/src/Equinox.CosmosStore.Prometheus/Equinox.CosmosStore.Prometheus.fsproj +++ b/src/Equinox.CosmosStore.Prometheus/Equinox.CosmosStore.Prometheus.fsproj @@ -9,8 +9,10 @@ - - + + + + From 8ad198c46f7ee6a63e502c8844cf6ef41f6ee67e Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Thu, 28 Mar 2024 01:16:22 +0000 Subject: [PATCH 05/56] Tweak example queries --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fb33ef2cc..de908ea92 100644 --- a/README.md +++ b/README.md @@ -390,7 +390,7 @@ While Equinox is implemented in F#, and F# is a great fit for writing event-sour # use a wild card (LIKE) for the stream name eqx query -cl '$Us%' -un Snapshotted cosmos -d db -c $EQUINOX_COSMOS_VIEWS -b 100000 - # > Querying Default: SELECT c._etag, c.u[0].d, c.p FROM c WHERE c.p LIKE "$Us%" AND EXISTS (SELECT VALUE u FROM u IN c.u WHERE u.c = "Snapshotted") {} + # > Querying Default: SELECT c.p, c._etag, c.u[0].d FROM c WHERE c.p LIKE "$Us%" AND EXISTS (SELECT VALUE u FROM u IN c.u WHERE u.c = "Snapshotted") {} # > Page 7166s, 7166u, 0e 320.58RU 3.9s {} # > Page 1608s, 1608u, 0e 68.59RU 0.9s {} # > TOTALS 1c, 8774s, 389.17RU 4.7s {} @@ -403,7 +403,7 @@ While Equinox is implemented in F#, and F# is a great fit for writing event-sour # add criteria filtering based on an Uncompressed Unfold eqx query -cn '$User' -un EmailIndex -uc 'u.d.email = "a@b.com"' cosmos -d db -c $EQUINOX_COSMOS_VIEWS -b 100000 - # > Querying Default: SELECT c._etag, c.u[0].d, c.p FROM c WHERE c.p LIKE "$User-%" AND EXISTS (SELECT VALUE u FROM u IN c.u WHERE u.c = "EmailIndex" AND u.d.email = "a@b.com") {} + # > Querying Default: SELECT c.p, c._etag, c.u[0].d FROM c WHERE c.p LIKE "$User-%" AND EXISTS (SELECT VALUE u FROM u IN c.u WHERE u.c = "EmailIndex" AND u.d.email = "a@b.com") {} # > Page 0s, 0u, 0e 2.8RU 0.7s {} # > TOTALS 0c, 0s, 2.80RU 0.7s {} # 👈 only 2.8RU if nothing is returned From 29ec5c4b101b81b35b7d571c38e52104e979a52a Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Thu, 28 Mar 2024 02:17:00 +0000 Subject: [PATCH 06/56] 4.1.0-alpha.1 --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a5f5afe7..b7b2c80ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ The `Unreleased` section name is replaced by the expected version of next releas ## [Unreleased] + +## 4.1.0-alpha.1 - 2024-03-28 + ### Added - `Equinox.CosmosStore`: Roundtrip `D` and `M` encoding values as per `DynamoStore`, enabling more extensive control of compression [#472](https://github.com/jet/equinox/pull/472) From d80ddf9dd44fcb9d621c01c75ceb55b559b372f5 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Fri, 29 Mar 2024 09:55:20 +0000 Subject: [PATCH 07/56] Add missing Disposal --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 108ba5854..83b80c61b 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -19,7 +19,7 @@ module Internal = let enum<'T, 'R> (desc: string) (container: Container) (parse: 'T -> 'R) (queryDefinition: QueryDefinition) = taskSeq { if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.enum {desc} {query}", desc, queryDefinition.QueryText) let sw = System.Diagnostics.Stopwatch.StartNew() - let iterator = container.GetItemQueryIterator<'T>(queryDefinition) + use iterator = container.GetItemQueryIterator<'T>(queryDefinition) let mutable responses, items, totalRtt, totalRu, totalRdc, totalRds, totalOds = 0, 0, TimeSpan.Zero, 0., 0, 0, 0 try for rtt, rc, response, rdc, rds, ods in taskEnum iterator do responses <- responses + 1 From 7298b12ba9a018a976fe8661ceb9d9140a6d88df Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Sat, 30 Mar 2024 03:20:57 +0000 Subject: [PATCH 08/56] Polish --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 138 +++++++++++++++------ 1 file changed, 99 insertions(+), 39 deletions(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 83b80c61b..1924d2317 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -1,27 +1,29 @@ namespace Equinox.CosmosStore.Linq +open Equinox.Core.Infrastructure open FSharp.Control open Serilog open System +open System.ComponentModel open System.Linq open System.Linq.Expressions module Internal = open Microsoft.Azure.Cosmos - let private taskEnum (iterator: FeedIterator<'T>) = taskSeq { + [] // In case of emergency, use this, but log an issue so we can understand why + let enum_ (iterator: FeedIterator<'T>) = taskSeq { while iterator.HasMoreResults do let! response = iterator.ReadNextAsync() let m = response.Diagnostics.GetQueryMetrics().CumulativeMetrics yield struct (response.Diagnostics.GetClientElapsedTime(), response.RequestCharge, response.Resource, int m.RetrievedDocumentCount, int m.RetrievedDocumentSize, int m.OutputDocumentSize) } let inline miB x = float x / 1024. / 1024. - let enum<'T, 'R> (desc: string) (container: Container) (parse: 'T -> 'R) (queryDefinition: QueryDefinition) = taskSeq { - if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.enum {desc} {query}", desc, queryDefinition.QueryText) + let taskEnum<'T> (desc: string) (iterator: FeedIterator<'T>) = taskSeq { let sw = System.Diagnostics.Stopwatch.StartNew() - use iterator = container.GetItemQueryIterator<'T>(queryDefinition) + use _ = iterator let mutable responses, items, totalRtt, totalRu, totalRdc, totalRds, totalOds = 0, 0, TimeSpan.Zero, 0., 0, 0, 0 - try for rtt, rc, response, rdc, rds, ods in taskEnum iterator do + try for rtt, rc, response, rdc, rds, ods in enum_ iterator do responses <- responses + 1 totalRdc <- totalRdc + rdc totalRds <- totalRds + rds @@ -30,23 +32,68 @@ module Internal = totalRtt <- totalRtt + rtt for item in response do items <- items + 1 - yield parse item + yield item finally Log.Information("CosmosStoreQuery.enum {desc} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", desc, items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, sw.ElapsedMilliseconds) } + (* Query preparation *) + + /// Generates a TOP 1 SQL query + let top1 (query: IQueryable<'T>) = + query.Take(1) + /// Generates an `OFFSET skip LIMIT take` Cosmos SQL query + /// NOTE: such a query gets more expensive the more your Skip traverses, so use with care + /// NOTE: (continuation tokens are the key to more linear costs) + let offsetLimit (skip: int, take: int) (query: IQueryable<'T>) = + query.Skip(skip).Take(take) + + (* IAsyncEnumerable aka TaskSeq wrapping *) + open Microsoft.Azure.Cosmos.Linq - let tryScalar<'T, 'R> desc container (query: IQueryable<'T>) (parse: 'T -> 'R): Async<'R option> = - query.Take(1).ToQueryDefinition() |> enum<'T, 'R> desc container parse |> TaskSeq.tryHead |> Async.AwaitTask - let page<'T0, 'T1, 'R> desc container (pageSize: int) pageIndex (query: IQueryable<'T0>) (parse: 'T1 -> 'R): Async<'R[]> = - query.Skip(pageIndex * pageSize).Take(pageSize).ToQueryDefinition() |> enum desc container parse |> TaskSeq.toArrayAsync |> Async.AwaitTask - let count (desc: string) (query: IQueryable<'T>): System.Threading.Tasks.Task = task { - if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.count {desc} {query}", desc, query.ToQueryDefinition().QueryText) + /// Runs a query that renders 'T, Hydrating the results as 'R (can be the same types but e.g. you might want to map an object to a JsonElement etc) + let enum<'T, 'R> desc (container: Container) (query: IQueryable<'T>): TaskSeq<'R> = + let queryDefinition = query.ToQueryDefinition() + if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.query {desc} {query}", desc, queryDefinition.QueryText) + container.GetItemQueryIterator<'R>(queryDefinition) |> taskEnum desc + + (* Scalar call dispatch *) + + /// Runs one of the typical Cosmos SDK extensions, e.g. CountAsync, logging the costs + let exec (desc: string) (query: IQueryable<'T>) run render: System.Threading.Tasks.Task<'R> = task { + if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.exec {desc} {query}", desc, query.ToQueryDefinition().QueryText) let sw = System.Diagnostics.Stopwatch.StartNew() - let! (r: Response) = query.CountAsync() - let m = r.Diagnostics.GetQueryMetrics().CumulativeMetrics - Log.Information("CosmosStoreQuery.count {desc} {count} ({rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", - desc, r.Resource, m.RetrievedDocumentCount, miB m.RetrievedDocumentSize, miB m.OutputDocumentSize, r.RequestCharge, sw.ElapsedMilliseconds) - return r.Resource } + let! (rsp: Response<'R>) = run query + let res = rsp.Resource + let summary = render res + let m = rsp.Diagnostics.GetQueryMetrics().CumulativeMetrics + Log.Information("CosmosStoreQuery.count {res} {desc} {count} ({rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", + desc, summary, m.RetrievedDocumentCount, miB m.RetrievedDocumentSize, miB m.OutputDocumentSize, rsp.RequestCharge, sw.ElapsedMilliseconds) + return res } + /// Run's query.CountAsync, with instrumentation equivalent to what query provides + let countAsync desc (query: IQueryable<'T>) ct = + exec desc query (_.CountAsync(ct)) id + + let tryHeadAsync<'T, 'R> desc (container: Container) (query: IQueryable<'T>) (_ct: CancellationToken) = + let queryDefinition = (top1 query).ToQueryDefinition() + if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.tryScalar {desc} {query}", desc, queryDefinition.QueryText) + container.GetItemQueryIterator<'R>(queryDefinition) |> taskEnum desc |> TaskSeq.tryHead + + /// Encapsulates an Indexed query expression + type Queryable<'T, 'P, 'R>(container, description, query: IQueryable<'T>, render: Expressions.Expression>) = + member _.Enum<'M>(query, hydrate: 'R -> 'M): TaskSeq<'M> = enum<'P, 'R> description container query |> TaskSeq.map hydrate + member _.CountAsync ct: System.Threading.Tasks.Task = + countAsync description query ct + member _.Count(): Async = + countAsync description query System.Threading.CancellationToken.None |> Async.AwaitTask + member x.Fetch<'M>(hydrate): TaskSeq<'M> = + x.Enum<'M>(query.Select render, hydrate) + member x.FetchPage<'M>(skip, take, hydrate): TaskSeq<'M> = + x.Enum<'M>(query.Select render |> offsetLimit (skip, take), hydrate) + (* In case of emergency, use these. but log an issue so we can understand why *) + [] member val Container = container + [] member val Description = description + [] member val Query = query + [] member val Render = render module Expressions = @@ -75,17 +122,18 @@ module Expressions = keySelector) source.Provider.CreateQuery<'T>(call) -type Query<'T, 'P0, 'P1, 'R>(container, description, query: IQueryable<'T>, render: Expressions.Expression>, hydrate: 'P1 -> 'R) = - member _.CountAsync() = query |> Internal.count description - member _.HydratePage(pageSize, pageIndex): Async<'R[]> = - let items = query.Select(render) - Internal.page<'P0, 'P1, 'R> description container pageSize pageIndex items hydrate - member x.ItemsAndCount(pageSize, pageIndex) = async { - let countQ = x.CountAsync() // start in parallel - let! items = x.HydratePage(pageSize, pageIndex) - let! count = countQ |> Async.AwaitTask - return items, count } +type Queryable<'T, 'P, 'R, 'M>(inner: Internal.Queryable<'T, 'P, 'R>, hydrate: 'R -> 'M) = + member val Inner = inner + member _.CountAsync = inner.CountAsync + member _.Count(): Async = inner.Count() + member _.Fetch(): TaskSeq<'M> = inner.Fetch hydrate + member _.FetchPage(skip, take): TaskSeq<'M> = inner.FetchPage(skip, take, hydrate) +module Queryable = + + let map<'T, 'P, 'R, 'M> (hydrate: 'R -> 'M) (inner: Internal.Queryable<'T, 'P, 'R>) = Queryable<'T, 'P, 'R, 'M>(inner, hydrate) + +/// Helpers for Querying and Projecting results based on relevant aspects of Equinox.CosmosStore's storage schema module Index = [] @@ -99,15 +147,15 @@ module Index = d: 'I } let inline prefix categoryName = $"%s{categoryName}-" - let queryCategory<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName: IQueryable> = + let byCategoryNameOnly<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName: IQueryable> = let prefix = prefix categoryName container.GetItemLinqQueryable>().Where(fun d -> d.p.StartsWith(prefix)) - let queryIndex<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName caseName: IQueryable> = + let byCaseName<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName caseName: IQueryable> = let prefix = prefix categoryName container.GetItemLinqQueryable>().Where(fun d -> d.p.StartsWith(prefix) && d.u[0].c = caseName) - let tryStreamName desc container (query: IQueryable>): Async = - Internal.tryScalar desc container (query.Select(fun x -> x.p)) FsCodec.StreamName.Internal.trust + let tryGetStreamNameAsync description container (query: IQueryable>) = + Internal.tryHeadAsync description container (query.Select(fun x -> x.p)) // We want to generate a projection statement of the shape: VALUE {"sn": root["p"], "snap": root["u"][0].["d"]} // However the Cosmos SDK does not support F# (or C#) records yet https://github.com/Azure/azure-cosmos-dotnet-v3/issues/3728 @@ -131,6 +179,15 @@ module Index = Expression.Bind(snapMember, (Expressions.Expression.replace snapExpression.Parameters[0] param).Visit(snapExpression.Body)) |]), [| param |]) + (* In case of emergency a) use this b) log an issue as to why you had to; can't think of a good reason *) + [] + let streamNameAndSnapshot_<'I, 'R> description container renderSnapshot (query: IQueryable>) = + Internal.Queryable, SnAndSnap<'I>, SnAndSnap<'R>>(container, description, query, SnAndSnap<'I>.FromIndexQuery renderSnapshot) + + /// Query the items, returning the Stream name and the Snapshot as a JsonElement (Decompressed if applicable) + let streamNameAndSnapshot<'I> description container renderSnapshot (query: IQueryable>) = + streamNameAndSnapshot_<'I, System.Text.Json.JsonElement> description container renderSnapshot query + /// Enables querying based on an Index stored [] type IndexContext<'I>(container, categoryName, caseName) = @@ -139,19 +196,22 @@ type IndexContext<'I>(container, categoryName, caseName) = /// Fetches a base Queryable that's filtered based on the `categoryName` and `caseName` /// NOTE this is relatively expensive to compute a Count on, compared to `CategoryQueryable` - member _.Queryable(): IQueryable> = Index.queryIndex<'I> container categoryName caseName + member _.ByCaseName(): IQueryable> = + Index.byCaseName<'I> container categoryName caseName /// Fetches a base Queryable that's filtered only on the `categoryName` - member _.CategoryQueryable(): IQueryable> = Index.queryCategory<'I> container categoryName + member _.ByCategory(): IQueryable> = + Index.byCategoryNameOnly<'I> container categoryName /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria - member x.TryStreamNameWhere(criteria: Expression, bool>>): Async = - Index.tryStreamName x.Description container (x.Queryable().Where(criteria)) + member x.TryGetStreamNameWhereAsync(criteria: Expression, bool>>, ct) = + Index.tryGetStreamNameAsync x.Description container (x.ByCategory().Where(criteria)) ct - /// Query the items, `Select()`ing as type `P` per the `render` function. Items are parsed from the results via the `hydrate` function - member x.Query<'P0, 'P1, 'R>(query: IQueryable>, render: Expression, 'P0>>, hydrate: 'P1 -> 'R) = - Query, 'P0, 'P1, 'R>(container, x.Description, query, render, hydrate) + /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria + member x.TryGetStreamNameWhere(criteria: Expression, bool>>): Async = + (fun ct -> x.TryGetStreamNameWhereAsync(criteria, ct)) |> Async.call /// Query the items, grabbing the Stream name and the Snapshot; The StreamName and the (Decompressed if applicable) Snapshot are passed to `hydrate` member x.QueryStreamNameAndSnapshot(query, renderSnapshot, hydrate) = - x.Query, Index.SnAndSnap, 'R>(query, Index.SnAndSnap<'I>.FromIndexQuery renderSnapshot, hydrate) + Index.streamNameAndSnapshot<'I> x.Description container renderSnapshot query + |> Queryable.map hydrate From 55f68b28793ebd651da83beb374e6025a7ad1c44 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Sat, 30 Mar 2024 22:48:45 +0000 Subject: [PATCH 09/56] Simplify --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 175 ++++++++++----------- 1 file changed, 79 insertions(+), 96 deletions(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 1924d2317..12f22acbc 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -1,16 +1,62 @@ namespace Equinox.CosmosStore.Linq open Equinox.Core.Infrastructure -open FSharp.Control open Serilog open System +open System.Collections.Generic open System.ComponentModel open System.Linq open System.Linq.Expressions +// We want to generate a projection statement of the shape: VALUE {"sn": root["p"], "snap": root["u"][0].["d"]} +// However the Cosmos SDK does not support F# (or C#) records yet https://github.com/Azure/azure-cosmos-dotnet-v3/issues/3728 +// F#'s LINQ support cannot translate parameterless constructor invocations in a Lambda well; +// the best native workaround without Expression Manipulation is/was https://stackoverflow.com/a/78206722/11635 +// In C#, you can generate an Expression that works with the Cosmos SDK via `.Select(x => new { sn = x.p, snap = x.u[0].d })` +// This hack is based on https://stackoverflow.com/a/73506241/11635 +type SnAndSnap<'I>() = + member val sn: FsCodec.StreamName = Unchecked.defaultof<_> with get, set + [)>] + member val snap: 'I = Unchecked.defaultof<_> with get, set + module Internal = + module Expression = + let replace find replace = + { new ExpressionVisitor() with + override _.Visit node = + if node = find then replace + else base.Visit node } + // https://stackoverflow.com/a/8829845/11635 + let compose (selector: Expression>) (predicate: Expression>) = + let param = Expression.Parameter(typeof<'T>, "x") + let prop = (replace selector.Parameters[0] param).Visit(selector.Body) + let body = (replace predicate.Parameters[0] prop).Visit(predicate.Body) + Expression.Lambda>(body, param) + let orderBy (source: IQueryable<'T>) (indexSelector: Expression>) (propertyName: string) descending = + let indexSortProperty = Expression.PropertyOrField(indexSelector.Body, propertyName) + let keySelector = Expression.Lambda(indexSortProperty, indexSelector.Parameters[0]) + let call = Expression.Call( + typeof, + (if descending then "OrderByDescending" else "OrderBy"), + [| typeof<'T>; indexSortProperty.Type |], + source.Expression, + keySelector) + source.Provider.CreateQuery<'T>(call) + let createSnAndSnapFromItemQuery<'T, 'I>(snExpression: Expression -> MemberExpression, snapExpression: Expression>) = + let param = Expression.Parameter(typeof<'T>, "x") + let targetType = typeof> + let snMember = targetType.GetMember(nameof Unchecked.defaultof>.sn)[0] + let snapMember = targetType.GetMember(nameof Unchecked.defaultof>.snap)[0] + Expression.Lambda>>( + Expression.MemberInit( + Expression.New(targetType.GetConstructor [||]), + [| Expression.Bind(snMember, snExpression param) :> MemberBinding + Expression.Bind(snapMember, (replace snapExpression.Parameters[0] param).Visit(snapExpression.Body)) |]), + [| param |]) + open Microsoft.Azure.Cosmos + open FSharp.Control // taskSeq [] // In case of emergency, use this, but log an issue so we can understand why let enum_ (iterator: FeedIterator<'T>) = taskSeq { while iterator.HasMoreResults do @@ -50,11 +96,11 @@ module Internal = (* IAsyncEnumerable aka TaskSeq wrapping *) open Microsoft.Azure.Cosmos.Linq - /// Runs a query that renders 'T, Hydrating the results as 'R (can be the same types but e.g. you might want to map an object to a JsonElement etc) - let enum<'T, 'R> desc (container: Container) (query: IQueryable<'T>): TaskSeq<'R> = + /// Runs a query that renders 'T, Hydrating the results as 'P (can be the same types but e.g. you might want to map an object to a JsonElement etc) + let enum<'T, 'P> desc (container: Container) (query: IQueryable<'T>): IAsyncEnumerable<'P> = let queryDefinition = query.ToQueryDefinition() if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.query {desc} {query}", desc, queryDefinition.QueryText) - container.GetItemQueryIterator<'R>(queryDefinition) |> taskEnum desc + container.GetItemQueryIterator<'P>(queryDefinition) |> taskEnum<'P> desc (* Scalar call dispatch *) @@ -76,62 +122,17 @@ module Internal = let tryHeadAsync<'T, 'R> desc (container: Container) (query: IQueryable<'T>) (_ct: CancellationToken) = let queryDefinition = (top1 query).ToQueryDefinition() if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.tryScalar {desc} {query}", desc, queryDefinition.QueryText) - container.GetItemQueryIterator<'R>(queryDefinition) |> taskEnum desc |> TaskSeq.tryHead - - /// Encapsulates an Indexed query expression - type Queryable<'T, 'P, 'R>(container, description, query: IQueryable<'T>, render: Expressions.Expression>) = - member _.Enum<'M>(query, hydrate: 'R -> 'M): TaskSeq<'M> = enum<'P, 'R> description container query |> TaskSeq.map hydrate - member _.CountAsync ct: System.Threading.Tasks.Task = - countAsync description query ct - member _.Count(): Async = - countAsync description query System.Threading.CancellationToken.None |> Async.AwaitTask - member x.Fetch<'M>(hydrate): TaskSeq<'M> = - x.Enum<'M>(query.Select render, hydrate) - member x.FetchPage<'M>(skip, take, hydrate): TaskSeq<'M> = - x.Enum<'M>(query.Select render |> offsetLimit (skip, take), hydrate) - (* In case of emergency, use these. but log an issue so we can understand why *) - [] member val Container = container - [] member val Description = description - [] member val Query = query - [] member val Render = render - -module Expressions = - - module Expression = - let replace find replace = - { new ExpressionVisitor() with - override _.Visit node = - if node = find then replace - else base.Visit node } - // https://stackoverflow.com/a/8829845/11635 - let compose (selector: Expression>) (predicate: Expression>) = - let param = Expression.Parameter(typeof<'T>, "x") - let prop = (replace selector.Parameters[0] param).Visit(selector.Body) - let body = (replace predicate.Parameters[0] prop).Visit(predicate.Body) - Expression.Lambda>(body, param) - - type IQueryable<'T> with - member source.OrderBy(indexSelector: Expression>, propertyName: string, descending) = - let indexSortProperty = Expression.PropertyOrField(indexSelector.Body, propertyName) - let keySelector = Expression.Lambda(indexSortProperty, indexSelector.Parameters[0]) - let call = Expression.Call( - typeof, - (if descending then "OrderByDescending" else "OrderBy"), - [| typeof<'T>; indexSortProperty.Type |], - source.Expression, - keySelector) - source.Provider.CreateQuery<'T>(call) - -type Queryable<'T, 'P, 'R, 'M>(inner: Internal.Queryable<'T, 'P, 'R>, hydrate: 'R -> 'M) = - member val Inner = inner - member _.CountAsync = inner.CountAsync - member _.Count(): Async = inner.Count() - member _.Fetch(): TaskSeq<'M> = inner.Fetch hydrate - member _.FetchPage(skip, take): TaskSeq<'M> = inner.FetchPage(skip, take, hydrate) - -module Queryable = - - let map<'T, 'P, 'R, 'M> (hydrate: 'R -> 'M) (inner: Internal.Queryable<'T, 'P, 'R>) = Queryable<'T, 'P, 'R, 'M>(inner, hydrate) + container.GetItemQueryIterator<'R>(queryDefinition) |> taskEnum desc |> FSharp.Control.TaskSeq.tryHead + + type Projection<'T, 'M>(query, description, container, enum: IQueryable<'T> -> IAsyncEnumerable<'M>) = + static member Create<'P>(query, description, container, hydrate: 'P -> 'M) = + Projection<'T, 'M>(query, description, container, enum<'T, 'P> description container >> TaskSeq.map hydrate) + member _.Enum: IAsyncEnumerable<'M> = query |> enum + member x.EnumPage(skip, take): IAsyncEnumerable<'M> = query |> offsetLimit (skip, take) |> enum + member _.CountAsync: CancellationToken -> Task = query |> countAsync description + [] member val Query: IQueryable<'T> = query + [] member val Description: string = description + [] member val Container: Container = container /// Helpers for Querying and Projecting results based on relevant aspects of Equinox.CosmosStore's storage schema module Index = @@ -141,8 +142,7 @@ module Index = { p: string _etag: string u: Unfold<'I> ResizeArray } - and [] - Unfold<'I> = + and [] Unfold<'I> = { c: string d: 'I } @@ -157,42 +157,25 @@ module Index = let tryGetStreamNameAsync description container (query: IQueryable>) = Internal.tryHeadAsync description container (query.Select(fun x -> x.p)) - // We want to generate a projection statement of the shape: VALUE {"sn": root["p"], "snap": root["u"][0].["d"]} - // However the Cosmos SDK does not support F# (or C#) records yet https://github.com/Azure/azure-cosmos-dotnet-v3/issues/3728 - // F#'s LINQ support cannot translate parameterless constructor invocations in a Lambda well; - // the best native workaround without Expression Manipulation is/was https://stackoverflow.com/a/78206722/11635 - // In C#, you can generate an Expression that works with the Cosmos SDK via `.Select(x => new { sn = x.p, snap = x.u[0].d })` - // This hack is based on https://stackoverflow.com/a/73506241/11635 - type SnAndSnap<'I>() = - member val sn: FsCodec.StreamName = Unchecked.defaultof<_> with get, set - [)>] - member val snap: 'I = Unchecked.defaultof<_> with get, set - static member FromIndexQuery(snapExpression: Expression, 'I>>) = - let param = Expression.Parameter(typeof>, "x") - let targetType = typeof> - let snMember = targetType.GetMember(nameof Unchecked.defaultof>.sn)[0] - let snapMember = targetType.GetMember(nameof Unchecked.defaultof>.snap)[0] - Expression.Lambda, SnAndSnap<'I>>>( - Expression.MemberInit( - Expression.New(targetType.GetConstructor [||]), - [| Expression.Bind(snMember, Expression.PropertyOrField(param, nameof Unchecked.defaultof>.p)) :> MemberBinding - Expression.Bind(snapMember, (Expressions.Expression.replace snapExpression.Parameters[0] param).Visit(snapExpression.Body)) |]), - [| param |]) - - (* In case of emergency a) use this b) log an issue as to why you had to; can't think of a good reason *) - [] - let streamNameAndSnapshot_<'I, 'R> description container renderSnapshot (query: IQueryable>) = - Internal.Queryable, SnAndSnap<'I>, SnAndSnap<'R>>(container, description, query, SnAndSnap<'I>.FromIndexQuery renderSnapshot) - /// Query the items, returning the Stream name and the Snapshot as a JsonElement (Decompressed if applicable) - let streamNameAndSnapshot<'I> description container renderSnapshot (query: IQueryable>) = - streamNameAndSnapshot_<'I, System.Text.Json.JsonElement> description container renderSnapshot query + let projectStreamNameAndSnapshot<'I> snapExpression: Expression, SnAndSnap<'I>>> = + // a very ugly workaround for not being able to write query.Select,Internal.SnAndSnap<'I>>(fun x -> { p = x.p; snap = x.u[0].d }) + let pExpression item = Expression.PropertyOrField(item, nameof Unchecked.defaultof>.p) + Internal.Expression.createSnAndSnapFromItemQuery, 'I>(pExpression, snapExpression) + +type Query<'T, 'M>(inner: Internal.Projection<'T, 'M>) = + member _.Enum: IAsyncEnumerable<'M> = inner.Enum + member _.EnumPage(skip, take): IAsyncEnumerable<'M> = inner.EnumPage(skip, take) + member _.CountAsync ct: Task = inner.CountAsync ct + member _.Count(): Async = inner.CountAsync |> Async.call + [] member val Inner = inner /// Enables querying based on an Index stored [] type IndexContext<'I>(container, categoryName, caseName) = member val Description = $"{categoryName}/{caseName}" with get, set + member val Container = container /// Fetches a base Queryable that's filtered based on the `categoryName` and `caseName` /// NOTE this is relatively expensive to compute a Count on, compared to `CategoryQueryable` @@ -204,14 +187,14 @@ type IndexContext<'I>(container, categoryName, caseName) = Index.byCategoryNameOnly<'I> container categoryName /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria - member x.TryGetStreamNameWhereAsync(criteria: Expression, bool>>, ct) = + member x.TryGetStreamNameWhereAsync(criteria: Expressions.Expression, bool>>, ct) = Index.tryGetStreamNameAsync x.Description container (x.ByCategory().Where(criteria)) ct /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria - member x.TryGetStreamNameWhere(criteria: Expression, bool>>): Async = + member x.TryGetStreamNameWhere(criteria: Expressions.Expression, bool>>): Async = (fun ct -> x.TryGetStreamNameWhereAsync(criteria, ct)) |> Async.call /// Query the items, grabbing the Stream name and the Snapshot; The StreamName and the (Decompressed if applicable) Snapshot are passed to `hydrate` - member x.QueryStreamNameAndSnapshot(query, renderSnapshot, hydrate) = - Index.streamNameAndSnapshot<'I> x.Description container renderSnapshot query - |> Queryable.map hydrate + member x.QueryStreamNameAndSnapshot(query: IQueryable>, selectBody: Expression, 'I>>, + hydrate: SnAndSnap -> 'M) = + Internal.Projection.Create(query.Select(Index.projectStreamNameAndSnapshot<'I> selectBody), x.Description, container, hydrate) From a0f7fdec6b8b4b97523d2bb2e8fa5ef062dffe47 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Sat, 30 Mar 2024 23:07:33 +0000 Subject: [PATCH 10/56] Comments --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 12f22acbc..a820dbc3f 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -147,13 +147,19 @@ module Index = d: 'I } let inline prefix categoryName = $"%s{categoryName}-" + /// The cheapest search basis; the categoryName is a prefix of the `p` partition field + /// Depending on how much more selective the caseName is, `byCaseName` may be a better choice + /// (but e.g. if the ration is 1:1 then no point having additional criteria) let byCategoryNameOnly<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName: IQueryable> = let prefix = prefix categoryName container.GetItemLinqQueryable>().Where(fun d -> d.p.StartsWith(prefix)) + // Searches based on the prefix of the `p` field, but also checking the `c` of the relevant unfold is correct + // A good idea if that'll be significantly cheaper due to better selectivity let byCaseName<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName caseName: IQueryable> = let prefix = prefix categoryName container.GetItemLinqQueryable>().Where(fun d -> d.p.StartsWith(prefix) && d.u[0].c = caseName) + /// Returns the StreamName (from the `p` field) for a 0/1 item query; only the TOP 1 item is returned let tryGetStreamNameAsync description container (query: IQueryable>) = Internal.tryHeadAsync description container (query.Select(fun x -> x.p)) @@ -163,6 +169,7 @@ module Index = let pExpression item = Expression.PropertyOrField(item, nameof Unchecked.defaultof>.p) Internal.Expression.createSnAndSnapFromItemQuery, 'I>(pExpression, snapExpression) +/// Represents a query projecting information values from an Index and/or Snapshots with a view to rendering the items and/or a count type Query<'T, 'M>(inner: Internal.Projection<'T, 'M>) = member _.Enum: IAsyncEnumerable<'M> = inner.Enum member _.EnumPage(skip, take): IAsyncEnumerable<'M> = inner.EnumPage(skip, take) @@ -170,7 +177,7 @@ type Query<'T, 'M>(inner: Internal.Projection<'T, 'M>) = member _.Count(): Async = inner.CountAsync |> Async.call [] member val Inner = inner -/// Enables querying based on an Index stored +/// Enables querying based on uncompressed Indexed values stored as secondary unfolds alongside the snapshot [] type IndexContext<'I>(container, categoryName, caseName) = @@ -188,7 +195,7 @@ type IndexContext<'I>(container, categoryName, caseName) = /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria member x.TryGetStreamNameWhereAsync(criteria: Expressions.Expression, bool>>, ct) = - Index.tryGetStreamNameAsync x.Description container (x.ByCategory().Where(criteria)) ct + Index.tryGetStreamNameAsync x.Description container (x.ByCategory().Where criteria) ct /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria member x.TryGetStreamNameWhere(criteria: Expressions.Expression, bool>>): Async = From ecd191d02a82d756bd8a485d25daff8629d4f32c Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Sun, 31 Mar 2024 02:57:09 +0100 Subject: [PATCH 11/56] Tweaks --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index a820dbc3f..90c0d8b1b 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -125,8 +125,7 @@ module Internal = container.GetItemQueryIterator<'R>(queryDefinition) |> taskEnum desc |> FSharp.Control.TaskSeq.tryHead type Projection<'T, 'M>(query, description, container, enum: IQueryable<'T> -> IAsyncEnumerable<'M>) = - static member Create<'P>(query, description, container, hydrate: 'P -> 'M) = - Projection<'T, 'M>(query, description, container, enum<'T, 'P> description container >> TaskSeq.map hydrate) + static member Create<'P>(q, d, c, hydrate: 'P -> 'M) = Projection<'T, 'M>(q, d, c, enum<'T, 'P> d c >> TaskSeq.map hydrate) member _.Enum: IAsyncEnumerable<'M> = query |> enum member x.EnumPage(skip, take): IAsyncEnumerable<'M> = query |> offsetLimit (skip, take) |> enum member _.CountAsync: CancellationToken -> Task = query |> countAsync description @@ -173,7 +172,7 @@ module Index = type Query<'T, 'M>(inner: Internal.Projection<'T, 'M>) = member _.Enum: IAsyncEnumerable<'M> = inner.Enum member _.EnumPage(skip, take): IAsyncEnumerable<'M> = inner.EnumPage(skip, take) - member _.CountAsync ct: Task = inner.CountAsync ct + member _.CountAsync(ct: CancellationToken): Task = inner.CountAsync ct member _.Count(): Async = inner.CountAsync |> Async.call [] member val Inner = inner @@ -204,4 +203,4 @@ type IndexContext<'I>(container, categoryName, caseName) = /// Query the items, grabbing the Stream name and the Snapshot; The StreamName and the (Decompressed if applicable) Snapshot are passed to `hydrate` member x.QueryStreamNameAndSnapshot(query: IQueryable>, selectBody: Expression, 'I>>, hydrate: SnAndSnap -> 'M) = - Internal.Projection.Create(query.Select(Index.projectStreamNameAndSnapshot<'I> selectBody), x.Description, container, hydrate) + Internal.Projection.Create(query.Select(Index.projectStreamNameAndSnapshot<'I> selectBody), x.Description, container, hydrate) |> Query From 9c52bfbea8d1b080c0baffca5085717ccb3fa674 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Sun, 31 Mar 2024 19:36:38 +0100 Subject: [PATCH 12/56] Move extensions back to type --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 234 ++++++++++----------- 1 file changed, 114 insertions(+), 120 deletions(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 90c0d8b1b..17546963e 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -1,13 +1,110 @@ namespace Equinox.CosmosStore.Linq open Equinox.Core.Infrastructure +open FSharp.Control // taskSeq open Serilog open System -open System.Collections.Generic open System.ComponentModel open System.Linq open System.Linq.Expressions +/// Generic Expression Tree manipulation helpers / Cosmos SDK LINQ support incompleteness workarounds +type [] QueryExtensions = + static member Replace(find, replace) = // https://stackoverflow.com/a/8829845/11635 + { new ExpressionVisitor() with + override _.Visit node = + if node = find then replace + else base.Visit node } + static member Compose(selector: Expression>, predicate: Expression>) = + let param = Expression.Parameter(typeof<'T>, "x") + let prop = QueryExtensions.Replace(selector.Parameters[0], param).Visit(selector.Body) + let body = QueryExtensions.Replace(predicate.Parameters[0], prop).Visit(predicate.Body) + Expression.Lambda>(body, param) + [] + static member Where(source: IQueryable<'T>, indexSelector: Expression>, indexPredicate: Expression>): IQueryable<'T> = + source.Where(QueryExtensions.Compose(indexSelector, indexPredicate)) + [] + static member OrderBy(source: IQueryable<'T>, indexSelector: Expression>, propertyName: string, descending) = + let indexSortProperty = Expression.PropertyOrField(indexSelector.Body, propertyName) + let keySelector = Expression.Lambda(indexSortProperty, indexSelector.Parameters[0]) + let call = Expression.Call( + typeof, + (if descending then "OrderByDescending" else "OrderBy"), + [| typeof<'T>; indexSortProperty.Type |], + source.Expression, + keySelector) + source.Provider.CreateQuery<'T>(call) + +module Internal = + open Microsoft.Azure.Cosmos + open Microsoft.Azure.Cosmos.Linq + let inline miB x = float x / 1024. / 1024. + module Query = + /// Generates an `OFFSET skip LIMIT take` Cosmos SQL query + /// NOTE: such a query gets more expensive the more your Skip traverses, so use with care + /// NOTE: (continuation tokens are the key to more linear costs) + let offsetLimit (skip: int, take: int) (query: IQueryable<'T>) = + query.Skip(skip).Take(take) + let [] enum_ (iterator: FeedIterator<'T>) = taskSeq { + while iterator.HasMoreResults do + let! response = iterator.ReadNextAsync() + let m = response.Diagnostics.GetQueryMetrics().CumulativeMetrics + yield struct (response.Diagnostics.GetClientElapsedTime(), response.RequestCharge, response.Resource, + int m.RetrievedDocumentCount, int m.RetrievedDocumentSize, int m.OutputDocumentSize) } + let toAsyncEnum<'T> (desc: string) (iterator: FeedIterator<'T>) = taskSeq { + let sw = System.Diagnostics.Stopwatch.StartNew() + use _ = iterator + let mutable responses, items, totalRtt, totalRu, totalRdc, totalRds, totalOds = 0, 0, TimeSpan.Zero, 0., 0, 0, 0 + try for rtt, rc, response, rdc, rds, ods in enum_ iterator do + responses <- responses + 1 + totalRdc <- totalRdc + rdc + totalRds <- totalRds + rds + totalOds <- totalOds + ods + totalRu <- totalRu + rc + totalRtt <- totalRtt + rtt + for item in response do + items <- items + 1 + yield item + finally Log.Information("CosmosStoreQuery.enum {desc} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", + desc, items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, sw.ElapsedMilliseconds) } + /// Runs a query that renders 'T, Hydrating the results as 'P (can be the same types but e.g. you might want to map an object to a JsonElement etc) + let enum<'T, 'P> desc (container: Container) (query: IQueryable<'T>): TaskSeq<'P> = + let queryDefinition = query.ToQueryDefinition() + if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.query {desc} {query}", desc, queryDefinition.QueryText) + container.GetItemQueryIterator<'P>(queryDefinition) |> toAsyncEnum<'P> desc + module AggregateOp = + /// Runs one of the typical Cosmos SDK extensions, e.g. CountAsync, logging the costs + let exec (desc: string) (query: IQueryable<'T>) run render: System.Threading.Tasks.Task<'R> = task { + if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.exec {desc} {query}", desc, query.ToQueryDefinition().QueryText) + let sw = System.Diagnostics.Stopwatch.StartNew() + let! (rsp: Response<'R>) = run query + let res = rsp.Resource + let summary = render res + let m = rsp.Diagnostics.GetQueryMetrics().CumulativeMetrics + Log.Information("CosmosStoreQuery.count {res} {desc} {count} ({rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", + desc, summary, m.RetrievedDocumentCount, miB m.RetrievedDocumentSize, miB m.OutputDocumentSize, rsp.RequestCharge, sw.ElapsedMilliseconds) + return res } + /// Runs query.CountAsync, with instrumentation equivalent to what query provides + let countAsync desc (query: IQueryable<'T>) ct = + exec desc query (_.CountAsync(ct)) id + module Scalar = + /// Generates a TOP 1 SQL query + let top1 (query: IQueryable<'T>) = + query.Take(1) + /// Handles a query that's expected to yield 0 or 1 result item + let tryHeadAsync<'T, 'R> desc (container: Container) (query: IQueryable<'T>) (_ct: CancellationToken): Task<'R option> = + let queryDefinition = (top1 query).ToQueryDefinition() + if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.tryScalar {desc} {query}", desc, queryDefinition.QueryText) + container.GetItemQueryIterator<'R>(queryDefinition) |> Query.toAsyncEnum desc |> TaskSeq.tryHead + type Projection<'T, 'M>(query, description, container, enum: IQueryable<'T> -> TaskSeq<'M>) = + static member Create<'P>(q, d, c, hydrate: 'P -> 'M) = Projection<'T, 'M>(q, d, c, Query.enum<'T, 'P> d c >> TaskSeq.map hydrate) + member _.Enum: TaskSeq<'M> = query |> enum + member x.EnumPage(skip, take): TaskSeq<'M> = query |> Query.offsetLimit (skip, take) |> enum + member _.CountAsync: CancellationToken -> Task = query |> AggregateOp.countAsync description + [] member val Query: IQueryable<'T> = query + [] member val Description: string = description + [] member val Container: Container = container + // We want to generate a projection statement of the shape: VALUE {"sn": root["p"], "snap": root["u"][0].["d"]} // However the Cosmos SDK does not support F# (or C#) records yet https://github.com/Azure/azure-cosmos-dotnet-v3/issues/3728 // F#'s LINQ support cannot translate parameterless constructor invocations in a Lambda well; @@ -18,120 +115,17 @@ type SnAndSnap<'I>() = member val sn: FsCodec.StreamName = Unchecked.defaultof<_> with get, set [)>] member val snap: 'I = Unchecked.defaultof<_> with get, set - -module Internal = - - module Expression = - let replace find replace = - { new ExpressionVisitor() with - override _.Visit node = - if node = find then replace - else base.Visit node } - // https://stackoverflow.com/a/8829845/11635 - let compose (selector: Expression>) (predicate: Expression>) = - let param = Expression.Parameter(typeof<'T>, "x") - let prop = (replace selector.Parameters[0] param).Visit(selector.Body) - let body = (replace predicate.Parameters[0] prop).Visit(predicate.Body) - Expression.Lambda>(body, param) - let orderBy (source: IQueryable<'T>) (indexSelector: Expression>) (propertyName: string) descending = - let indexSortProperty = Expression.PropertyOrField(indexSelector.Body, propertyName) - let keySelector = Expression.Lambda(indexSortProperty, indexSelector.Parameters[0]) - let call = Expression.Call( - typeof, - (if descending then "OrderByDescending" else "OrderBy"), - [| typeof<'T>; indexSortProperty.Type |], - source.Expression, - keySelector) - source.Provider.CreateQuery<'T>(call) - let createSnAndSnapFromItemQuery<'T, 'I>(snExpression: Expression -> MemberExpression, snapExpression: Expression>) = - let param = Expression.Parameter(typeof<'T>, "x") - let targetType = typeof> - let snMember = targetType.GetMember(nameof Unchecked.defaultof>.sn)[0] - let snapMember = targetType.GetMember(nameof Unchecked.defaultof>.snap)[0] - Expression.Lambda>>( - Expression.MemberInit( - Expression.New(targetType.GetConstructor [||]), - [| Expression.Bind(snMember, snExpression param) :> MemberBinding - Expression.Bind(snapMember, (replace snapExpression.Parameters[0] param).Visit(snapExpression.Body)) |]), - [| param |]) - - open Microsoft.Azure.Cosmos - open FSharp.Control // taskSeq - [] // In case of emergency, use this, but log an issue so we can understand why - let enum_ (iterator: FeedIterator<'T>) = taskSeq { - while iterator.HasMoreResults do - let! response = iterator.ReadNextAsync() - let m = response.Diagnostics.GetQueryMetrics().CumulativeMetrics - yield struct (response.Diagnostics.GetClientElapsedTime(), response.RequestCharge, response.Resource, - int m.RetrievedDocumentCount, int m.RetrievedDocumentSize, int m.OutputDocumentSize) } - let inline miB x = float x / 1024. / 1024. - let taskEnum<'T> (desc: string) (iterator: FeedIterator<'T>) = taskSeq { - let sw = System.Diagnostics.Stopwatch.StartNew() - use _ = iterator - let mutable responses, items, totalRtt, totalRu, totalRdc, totalRds, totalOds = 0, 0, TimeSpan.Zero, 0., 0, 0, 0 - try for rtt, rc, response, rdc, rds, ods in enum_ iterator do - responses <- responses + 1 - totalRdc <- totalRdc + rdc - totalRds <- totalRds + rds - totalOds <- totalOds + ods - totalRu <- totalRu + rc - totalRtt <- totalRtt + rtt - for item in response do - items <- items + 1 - yield item - finally Log.Information("CosmosStoreQuery.enum {desc} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", - desc, items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, sw.ElapsedMilliseconds) } - - (* Query preparation *) - - /// Generates a TOP 1 SQL query - let top1 (query: IQueryable<'T>) = - query.Take(1) - /// Generates an `OFFSET skip LIMIT take` Cosmos SQL query - /// NOTE: such a query gets more expensive the more your Skip traverses, so use with care - /// NOTE: (continuation tokens are the key to more linear costs) - let offsetLimit (skip: int, take: int) (query: IQueryable<'T>) = - query.Skip(skip).Take(take) - - (* IAsyncEnumerable aka TaskSeq wrapping *) - - open Microsoft.Azure.Cosmos.Linq - /// Runs a query that renders 'T, Hydrating the results as 'P (can be the same types but e.g. you might want to map an object to a JsonElement etc) - let enum<'T, 'P> desc (container: Container) (query: IQueryable<'T>): IAsyncEnumerable<'P> = - let queryDefinition = query.ToQueryDefinition() - if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.query {desc} {query}", desc, queryDefinition.QueryText) - container.GetItemQueryIterator<'P>(queryDefinition) |> taskEnum<'P> desc - - (* Scalar call dispatch *) - - /// Runs one of the typical Cosmos SDK extensions, e.g. CountAsync, logging the costs - let exec (desc: string) (query: IQueryable<'T>) run render: System.Threading.Tasks.Task<'R> = task { - if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.exec {desc} {query}", desc, query.ToQueryDefinition().QueryText) - let sw = System.Diagnostics.Stopwatch.StartNew() - let! (rsp: Response<'R>) = run query - let res = rsp.Resource - let summary = render res - let m = rsp.Diagnostics.GetQueryMetrics().CumulativeMetrics - Log.Information("CosmosStoreQuery.count {res} {desc} {count} ({rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", - desc, summary, m.RetrievedDocumentCount, miB m.RetrievedDocumentSize, miB m.OutputDocumentSize, rsp.RequestCharge, sw.ElapsedMilliseconds) - return res } - /// Run's query.CountAsync, with instrumentation equivalent to what query provides - let countAsync desc (query: IQueryable<'T>) ct = - exec desc query (_.CountAsync(ct)) id - - let tryHeadAsync<'T, 'R> desc (container: Container) (query: IQueryable<'T>) (_ct: CancellationToken) = - let queryDefinition = (top1 query).ToQueryDefinition() - if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.tryScalar {desc} {query}", desc, queryDefinition.QueryText) - container.GetItemQueryIterator<'R>(queryDefinition) |> taskEnum desc |> FSharp.Control.TaskSeq.tryHead - - type Projection<'T, 'M>(query, description, container, enum: IQueryable<'T> -> IAsyncEnumerable<'M>) = - static member Create<'P>(q, d, c, hydrate: 'P -> 'M) = Projection<'T, 'M>(q, d, c, enum<'T, 'P> d c >> TaskSeq.map hydrate) - member _.Enum: IAsyncEnumerable<'M> = query |> enum - member x.EnumPage(skip, take): IAsyncEnumerable<'M> = query |> offsetLimit (skip, take) |> enum - member _.CountAsync: CancellationToken -> Task = query |> countAsync description - [] member val Query: IQueryable<'T> = query - [] member val Description: string = description - [] member val Container: Container = container + static member CreateItemQueryLambda<'T>(snExpression: Expression -> MemberExpression, snapExpression: Expression>) = + let param = Expression.Parameter(typeof<'T>, "x") + let targetType = typeof> + let snMember = targetType.GetMember(nameof Unchecked.defaultof>.sn)[0] + let snapMember = targetType.GetMember(nameof Unchecked.defaultof>.snap)[0] + Expression.Lambda>>( + Expression.MemberInit( + Expression.New(targetType.GetConstructor [||]), + [| Expression.Bind(snMember, snExpression param) :> MemberBinding + Expression.Bind(snapMember, QueryExtensions.Replace(snapExpression.Parameters[0], param).Visit(snapExpression.Body)) |]), + [| param |]) /// Helpers for Querying and Projecting results based on relevant aspects of Equinox.CosmosStore's storage schema module Index = @@ -159,19 +153,19 @@ module Index = container.GetItemLinqQueryable>().Where(fun d -> d.p.StartsWith(prefix) && d.u[0].c = caseName) /// Returns the StreamName (from the `p` field) for a 0/1 item query; only the TOP 1 item is returned - let tryGetStreamNameAsync description container (query: IQueryable>) = - Internal.tryHeadAsync description container (query.Select(fun x -> x.p)) + let tryGetStreamNameAsync description container (query: IQueryable>) ct = + Internal.Scalar.tryHeadAsync description container (query.Select(fun x -> x.p)) ct /// Query the items, returning the Stream name and the Snapshot as a JsonElement (Decompressed if applicable) let projectStreamNameAndSnapshot<'I> snapExpression: Expression, SnAndSnap<'I>>> = // a very ugly workaround for not being able to write query.Select,Internal.SnAndSnap<'I>>(fun x -> { p = x.p; snap = x.u[0].d }) let pExpression item = Expression.PropertyOrField(item, nameof Unchecked.defaultof>.p) - Internal.Expression.createSnAndSnapFromItemQuery, 'I>(pExpression, snapExpression) + SnAndSnap.CreateItemQueryLambda(pExpression, snapExpression) /// Represents a query projecting information values from an Index and/or Snapshots with a view to rendering the items and/or a count type Query<'T, 'M>(inner: Internal.Projection<'T, 'M>) = - member _.Enum: IAsyncEnumerable<'M> = inner.Enum - member _.EnumPage(skip, take): IAsyncEnumerable<'M> = inner.EnumPage(skip, take) + member _.Enum: TaskSeq<'M> = inner.Enum + member _.EnumPage(skip, take): TaskSeq<'M> = inner.EnumPage(skip, take) member _.CountAsync(ct: CancellationToken): Task = inner.CountAsync ct member _.Count(): Async = inner.CountAsync |> Async.call [] member val Inner = inner From 575ef4e627e44db74c1a17d060ebc46e88586d10 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Thu, 4 Apr 2024 12:23:14 +0100 Subject: [PATCH 13/56] Predicate finessing, log fixes --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 46 ++++++++++++++++------ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 17546963e..7131c0c56 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -10,19 +10,13 @@ open System.Linq.Expressions /// Generic Expression Tree manipulation helpers / Cosmos SDK LINQ support incompleteness workarounds type [] QueryExtensions = - static member Replace(find, replace) = // https://stackoverflow.com/a/8829845/11635 + static member Replace(find, replace) = { new ExpressionVisitor() with override _.Visit node = if node = find then replace else base.Visit node } - static member Compose(selector: Expression>, predicate: Expression>) = - let param = Expression.Parameter(typeof<'T>, "x") - let prop = QueryExtensions.Replace(selector.Parameters[0], param).Visit(selector.Body) - let body = QueryExtensions.Replace(predicate.Parameters[0], prop).Visit(predicate.Body) - Expression.Lambda>(body, param) [] - static member Where(source: IQueryable<'T>, indexSelector: Expression>, indexPredicate: Expression>): IQueryable<'T> = - source.Where(QueryExtensions.Compose(indexSelector, indexPredicate)) + static member Replace(x: Expression, find, replace) = QueryExtensions.Replace(find, replace).Visit(x) [] static member OrderBy(source: IQueryable<'T>, indexSelector: Expression>, propertyName: string, descending) = let indexSortProperty = Expression.PropertyOrField(indexSelector.Body, propertyName) @@ -35,6 +29,29 @@ type [] QueryExtensions = keySelector) source.Provider.CreateQuery<'T>(call) +/// Predicate manipulation helpers +type [] Predicate = + /// F# maps `fun` expressions to Expression trees, only when the target is a `member` arg + /// See https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/query/linq-to-sql for the list of supported constructs + static member Create<'T> expr: Expression> = expr + [] // https://stackoverflow.com/a/8829845/11635 + static member Compose(selector: Expression>, projector: Expression>): Expression> = + let param = Expression.Parameter(typeof<'T>, "x") + let prop = selector.Body.Replace(selector.Parameters[0], param) + let body = projector.Body.Replace(projector.Parameters[0], prop) + Expression.Lambda>(body, param) + [] // https://stackoverflow.com/a/22569086/11635 + static member And<'T>(l: Expression>, r: Expression>) = + let rBody = r.Body.Replace(r.Parameters[0], l.Parameters[0]) + Expression.Lambda>(Expression.AndAlso(l.Body, rBody), l.Parameters) + [] // https://stackoverflow.com/a/22569086/11635 + static member Or<'T>(l: Expression>, r: Expression>) = + let rBody = r.Body.Replace(r.Parameters[0], l.Parameters[0]) + Expression.Lambda>(Expression.OrElse(l.Body, rBody), l.Parameters) + [] + static member Where(source: IQueryable<'T>, indexSelector: Expression>, indexPredicate: Expression>): IQueryable<'T> = + source.Where(indexSelector.Compose indexPredicate) + module Internal = open Microsoft.Azure.Cosmos open Microsoft.Azure.Cosmos.Linq @@ -66,7 +83,7 @@ module Internal = items <- items + 1 yield item finally Log.Information("CosmosStoreQuery.enum {desc} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", - desc, items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, sw.ElapsedMilliseconds) } + desc, items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, sw.ElapsedMilliseconds) } /// Runs a query that renders 'T, Hydrating the results as 'P (can be the same types but e.g. you might want to map an object to a JsonElement etc) let enum<'T, 'P> desc (container: Container) (query: IQueryable<'T>): TaskSeq<'P> = let queryDefinition = query.ToQueryDefinition() @@ -75,13 +92,13 @@ module Internal = module AggregateOp = /// Runs one of the typical Cosmos SDK extensions, e.g. CountAsync, logging the costs let exec (desc: string) (query: IQueryable<'T>) run render: System.Threading.Tasks.Task<'R> = task { - if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.exec {desc} {query}", desc, query.ToQueryDefinition().QueryText) + if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.count {desc} {query}", desc, query.ToQueryDefinition().QueryText) let sw = System.Diagnostics.Stopwatch.StartNew() let! (rsp: Response<'R>) = run query let res = rsp.Resource let summary = render res let m = rsp.Diagnostics.GetQueryMetrics().CumulativeMetrics - Log.Information("CosmosStoreQuery.count {res} {desc} {count} ({rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", + Log.Information("CosmosStoreQuery.count {desc} {count} ({rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", desc, summary, m.RetrievedDocumentCount, miB m.RetrievedDocumentSize, miB m.OutputDocumentSize, rsp.RequestCharge, sw.ElapsedMilliseconds) return res } /// Runs query.CountAsync, with instrumentation equivalent to what query provides @@ -124,7 +141,7 @@ type SnAndSnap<'I>() = Expression.MemberInit( Expression.New(targetType.GetConstructor [||]), [| Expression.Bind(snMember, snExpression param) :> MemberBinding - Expression.Bind(snapMember, QueryExtensions.Replace(snapExpression.Parameters[0], param).Visit(snapExpression.Body)) |]), + Expression.Bind(snapMember, snapExpression.Body.Replace(snapExpression.Parameters[0], param)) |]), [| param |]) /// Helpers for Querying and Projecting results based on relevant aspects of Equinox.CosmosStore's storage schema @@ -177,6 +194,11 @@ type IndexContext<'I>(container, categoryName, caseName) = member val Description = $"{categoryName}/{caseName}" with get, set member val Container = container + /// Helper to make F# consumption code more terse (the F# compiler generates Expression trees only when a function is passed to a `member`) + /// Example: `i.Predicate(fun e -> e.name = name)` + /// See https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/query/linq-to-sql for the list of supported constructs + member _.Predicate expr: Expression> = expr + /// Fetches a base Queryable that's filtered based on the `categoryName` and `caseName` /// NOTE this is relatively expensive to compute a Count on, compared to `CategoryQueryable` member _.ByCaseName(): IQueryable> = From 4d83e8cf0d8df830573dcd064500b1aecd558974 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Thu, 4 Apr 2024 15:39:52 +0100 Subject: [PATCH 14/56] RU Format tweak --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 7131c0c56..9389bd8a5 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -82,7 +82,7 @@ module Internal = for item in response do items <- items + 1 yield item - finally Log.Information("CosmosStoreQuery.enum {desc} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", + finally Log.Information("CosmosStoreQuery.enum {desc} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc:f2} RU {latency} ms", desc, items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, sw.ElapsedMilliseconds) } /// Runs a query that renders 'T, Hydrating the results as 'P (can be the same types but e.g. you might want to map an object to a JsonElement etc) let enum<'T, 'P> desc (container: Container) (query: IQueryable<'T>): TaskSeq<'P> = From 6320e99849522e4edc6b81417b9fd28064657324 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Fri, 5 Apr 2024 12:03:02 +0100 Subject: [PATCH 15/56] Add OrderByLambda; Explicate OrderByPropertyName --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 44 +++++++++++++--------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 9389bd8a5..453b90f3a 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -7,6 +7,7 @@ open System open System.ComponentModel open System.Linq open System.Linq.Expressions +open System.Runtime.CompilerServices /// Generic Expression Tree manipulation helpers / Cosmos SDK LINQ support incompleteness workarounds type [] QueryExtensions = @@ -15,40 +16,47 @@ type [] QueryExtensions = override _.Visit node = if node = find then replace else base.Visit node } - [] + [] static member Replace(x: Expression, find, replace) = QueryExtensions.Replace(find, replace).Visit(x) - [] - static member OrderBy(source: IQueryable<'T>, indexSelector: Expression>, propertyName: string, descending) = - let indexSortProperty = Expression.PropertyOrField(indexSelector.Body, propertyName) - let keySelector = Expression.Lambda(indexSortProperty, indexSelector.Parameters[0]) + [] // https://stackoverflow.com/a/8829845/11635 + static member Compose(selector: Expression>, projector: Expression>): Expression> = + let param = Expression.Parameter(typeof<'T>, "x") + let prop = selector.Body.Replace(selector.Parameters[0], param) + let body = projector.Body.Replace(projector.Parameters[0], prop) + Expression.Lambda>(body, param) + [] + static member OrderBy(source: IQueryable<'T>, indexSelector: Expression>, keySelector: Expression>, descending) = + QueryExtensions.OrderByLambda<'T>(source, indexSelector.Compose keySelector, descending) + [] // https://stackoverflow.com/a/233505/11635 + static member OrderByPropertyName(source: IQueryable<'T>, indexSelector: Expression>, propertyName: string, descending) = + let indexProperty = Expression.PropertyOrField(indexSelector.Body, propertyName) + let delegateType = typedefof>.MakeGenericType(typeof<'T>, indexProperty.Type) + let keySelector = Expression.Lambda(delegateType, indexProperty, indexSelector.Parameters[0]) + QueryExtensions.OrderByLambda(source, keySelector, descending) + // NOTE not an extension method as OrderByPropertyName and OrderBy represent the as-strongly-typed-as-possible top level use cases + // NOTE no support for a `comparison` arg is warranted as CosmosDB only handles direct scalar prop expressions, https://stackoverflow.com/a/69268191/11635 + static member OrderByLambda<'T>(source: IQueryable<'T>, keySelector: LambdaExpression, descending) = let call = Expression.Call( typeof, (if descending then "OrderByDescending" else "OrderBy"), - [| typeof<'T>; indexSortProperty.Type |], - source.Expression, - keySelector) - source.Provider.CreateQuery<'T>(call) + [| typeof<'T>; keySelector.ReturnType |], + [| source.Expression; keySelector |]) + source.Provider.CreateQuery<'T>(call) :?> IOrderedQueryable<'T> /// Predicate manipulation helpers type [] Predicate = /// F# maps `fun` expressions to Expression trees, only when the target is a `member` arg /// See https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/query/linq-to-sql for the list of supported constructs static member Create<'T> expr: Expression> = expr - [] // https://stackoverflow.com/a/8829845/11635 - static member Compose(selector: Expression>, projector: Expression>): Expression> = - let param = Expression.Parameter(typeof<'T>, "x") - let prop = selector.Body.Replace(selector.Parameters[0], param) - let body = projector.Body.Replace(projector.Parameters[0], prop) - Expression.Lambda>(body, param) - [] // https://stackoverflow.com/a/22569086/11635 + [] // https://stackoverflow.com/a/22569086/11635 static member And<'T>(l: Expression>, r: Expression>) = let rBody = r.Body.Replace(r.Parameters[0], l.Parameters[0]) Expression.Lambda>(Expression.AndAlso(l.Body, rBody), l.Parameters) - [] // https://stackoverflow.com/a/22569086/11635 + [] // https://stackoverflow.com/a/22569086/11635 static member Or<'T>(l: Expression>, r: Expression>) = let rBody = r.Body.Replace(r.Parameters[0], l.Parameters[0]) Expression.Lambda>(Expression.OrElse(l.Body, rBody), l.Parameters) - [] + [] static member Where(source: IQueryable<'T>, indexSelector: Expression>, indexPredicate: Expression>): IQueryable<'T> = source.Where(indexSelector.Compose indexPredicate) From 6d8da9311565046fd07d1fef71944eadc17df3fc Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Mon, 8 Apr 2024 23:23:58 +0100 Subject: [PATCH 16/56] deps: Update Min Equinox ver to 4.0.2 --- src/Equinox.CosmosStore/Equinox.CosmosStore.fsproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Equinox.CosmosStore/Equinox.CosmosStore.fsproj b/src/Equinox.CosmosStore/Equinox.CosmosStore.fsproj index b57cea590..213d78b61 100644 --- a/src/Equinox.CosmosStore/Equinox.CosmosStore.fsproj +++ b/src/Equinox.CosmosStore/Equinox.CosmosStore.fsproj @@ -18,7 +18,7 @@ - + From 446c77a8a89498a28cf61fcfee3508238f45a89b Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Tue, 9 Apr 2024 00:44:16 +0100 Subject: [PATCH 17/56] Add metrics wiring --- src/Equinox.CosmosStore/CosmosStore.fs | 2 +- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 67 +++++++++++++--------- 2 files changed, 40 insertions(+), 29 deletions(-) diff --git a/src/Equinox.CosmosStore/CosmosStore.fs b/src/Equinox.CosmosStore/CosmosStore.fs index 18ed7bf13..7ab557851 100644 --- a/src/Equinox.CosmosStore/CosmosStore.fs +++ b/src/Equinox.CosmosStore/CosmosStore.fs @@ -228,7 +228,7 @@ module Log = | Delete of Measurement /// Trimmed the Tip | Trim of Measurement - /// Queried via the Index + /// Queried via the Index; count=-1 -> aggregate operation | Index of Measurement let [] (|MetricEvent|_|) (logEvent: Serilog.Events.LogEvent): Metric voption = let mutable p = Unchecked.defaultof<_> diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 453b90f3a..1fcf6fbc8 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -1,6 +1,7 @@ namespace Equinox.CosmosStore.Linq -open Equinox.Core.Infrastructure +open Equinox.Core +open Equinox.CosmosStore.Core // Log, JsonCompressedBase64Converter open FSharp.Control // taskSeq open Serilog open System @@ -76,8 +77,8 @@ module Internal = let m = response.Diagnostics.GetQueryMetrics().CumulativeMetrics yield struct (response.Diagnostics.GetClientElapsedTime(), response.RequestCharge, response.Resource, int m.RetrievedDocumentCount, int m.RetrievedDocumentSize, int m.OutputDocumentSize) } - let toAsyncEnum<'T> (desc: string) (iterator: FeedIterator<'T>) = taskSeq { - let sw = System.Diagnostics.Stopwatch.StartNew() + let [] toAsyncEnum<'T> log (container: Container) cat (iterator: FeedIterator<'T>) = taskSeq { + let startTicks = System.Diagnostics.Stopwatch.GetTimestamp() use _ = iterator let mutable responses, items, totalRtt, totalRu, totalRdc, totalRds, totalOds = 0, 0, TimeSpan.Zero, 0., 0, 0, 0 try for rtt, rc, response, rdc, rds, ods in enum_ iterator do @@ -90,44 +91,53 @@ module Internal = for item in response do items <- items + 1 yield item - finally Log.Information("CosmosStoreQuery.enum {desc} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc:f2} RU {latency} ms", - desc, items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, sw.ElapsedMilliseconds) } + finally + let interval = StopwatchInterval(startTicks, System.Diagnostics.Stopwatch.GetTimestamp()) + let log = let evt = Log.Metric.Index { database = container.Database.Id; container = container.Id; stream = cat + FsCodec.StreamName.Category.SeparatorStr + interval = interval; bytes = totalOds; count = items; ru = totalRu } in log |> Log.event evt + log.Information("EqxCosmos {action:l} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc:f2} RU {latency} ms", + "Index", items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, interval.ElapsedMilliseconds) } /// Runs a query that renders 'T, Hydrating the results as 'P (can be the same types but e.g. you might want to map an object to a JsonElement etc) - let enum<'T, 'P> desc (container: Container) (query: IQueryable<'T>): TaskSeq<'P> = + let enum<'T, 'P> (log: ILogger) (container: Container) cat (query: IQueryable<'T>): TaskSeq<'P> = let queryDefinition = query.ToQueryDefinition() - if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.query {desc} {query}", desc, queryDefinition.QueryText) - container.GetItemQueryIterator<'P>(queryDefinition) |> toAsyncEnum<'P> desc + if log.IsEnabled Serilog.Events.LogEventLevel.Debug then log.Debug("CosmosStoreQuery.query {cat} {query}", cat, queryDefinition.QueryText) + container.GetItemQueryIterator<'P>(queryDefinition) |> toAsyncEnum<'P> log container cat module AggregateOp = /// Runs one of the typical Cosmos SDK extensions, e.g. CountAsync, logging the costs - let exec (desc: string) (query: IQueryable<'T>) run render: System.Threading.Tasks.Task<'R> = task { - if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.count {desc} {query}", desc, query.ToQueryDefinition().QueryText) - let sw = System.Diagnostics.Stopwatch.StartNew() + let [] exec (log: ILogger) (container: Container) (op: string) (cat: string) (query: IQueryable<'T>) run render: System.Threading.Tasks.Task<'R> = task { + let startTicks = System.Diagnostics.Stopwatch.GetTimestamp() let! (rsp: Response<'R>) = run query let res = rsp.Resource let summary = render res let m = rsp.Diagnostics.GetQueryMetrics().CumulativeMetrics - Log.Information("CosmosStoreQuery.count {desc} {count} ({rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", - desc, summary, m.RetrievedDocumentCount, miB m.RetrievedDocumentSize, miB m.OutputDocumentSize, rsp.RequestCharge, sw.ElapsedMilliseconds) + let interval = StopwatchInterval(startTicks, System.Diagnostics.Stopwatch.GetTimestamp()) + let totalOds, totalRu = m.OutputDocumentSize, rsp.RequestCharge + let log = let evt = Log.Metric.Index { database = container.Database.Id; container = container.Id; stream = cat + FsCodec.StreamName.Category.SeparatorStr + interval = interval; bytes = int totalOds; count = -1; ru = totalRu } in log |> Log.event evt + log.Information("EqxCosmos {action:l} {cat} {count} ({rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", + op, cat, summary, m.RetrievedDocumentCount, miB m.RetrievedDocumentSize, miB totalOds, totalRu, interval.ElapsedMilliseconds) return res } /// Runs query.CountAsync, with instrumentation equivalent to what query provides - let countAsync desc (query: IQueryable<'T>) ct = - exec desc query (_.CountAsync(ct)) id + let countAsync (log: ILogger) container cat (query: IQueryable<'T>) ct = + if log.IsEnabled Serilog.Events.LogEventLevel.Debug then log.Debug("CosmosStoreQuery.count {cat} {query}", cat, query.ToQueryDefinition().QueryText) + exec log container "count" cat query (_.CountAsync(ct)) id module Scalar = /// Generates a TOP 1 SQL query let top1 (query: IQueryable<'T>) = query.Take(1) /// Handles a query that's expected to yield 0 or 1 result item - let tryHeadAsync<'T, 'R> desc (container: Container) (query: IQueryable<'T>) (_ct: CancellationToken): Task<'R option> = + let tryHeadAsync<'T, 'R> (log: ILogger) (container: Container) cat (query: IQueryable<'T>) (_ct: CancellationToken): Task<'R option> = let queryDefinition = (top1 query).ToQueryDefinition() - if Log.IsEnabled Serilog.Events.LogEventLevel.Debug then Log.Debug("CosmosStoreQuery.tryScalar {desc} {query}", desc, queryDefinition.QueryText) - container.GetItemQueryIterator<'R>(queryDefinition) |> Query.toAsyncEnum desc |> TaskSeq.tryHead - type Projection<'T, 'M>(query, description, container, enum: IQueryable<'T> -> TaskSeq<'M>) = - static member Create<'P>(q, d, c, hydrate: 'P -> 'M) = Projection<'T, 'M>(q, d, c, Query.enum<'T, 'P> d c >> TaskSeq.map hydrate) + if log.IsEnabled Serilog.Events.LogEventLevel.Debug then log.Debug("CosmosStoreQuery.tryScalar {cat} {query}", queryDefinition.QueryText) + container.GetItemQueryIterator<'R>(queryDefinition) |> Query.toAsyncEnum log container cat |> TaskSeq.tryHead + type Projection<'T, 'M>(query, category, container, enum: IQueryable<'T> -> TaskSeq<'M>, count: IQueryable<'T> -> CancellationToken -> Task) = + static member Create<'P>(q, cat, c, log, hydrate: 'P -> 'M) = + Projection<'T, 'M>(q, cat, c, Query.enum<'T, 'P> log c cat >> TaskSeq.map hydrate, AggregateOp.countAsync log c cat) member _.Enum: TaskSeq<'M> = query |> enum member x.EnumPage(skip, take): TaskSeq<'M> = query |> Query.offsetLimit (skip, take) |> enum - member _.CountAsync: CancellationToken -> Task = query |> AggregateOp.countAsync description + member _.CountAsync: CancellationToken -> Task = query |> count [] member val Query: IQueryable<'T> = query - [] member val Description: string = description + [] member val Category: string = category [] member val Container: Container = container // We want to generate a projection statement of the shape: VALUE {"sn": root["p"], "snap": root["u"][0].["d"]} @@ -138,7 +148,7 @@ module Internal = // This hack is based on https://stackoverflow.com/a/73506241/11635 type SnAndSnap<'I>() = member val sn: FsCodec.StreamName = Unchecked.defaultof<_> with get, set - [)>] + [)>] member val snap: 'I = Unchecked.defaultof<_> with get, set static member CreateItemQueryLambda<'T>(snExpression: Expression -> MemberExpression, snapExpression: Expression>) = let param = Expression.Parameter(typeof<'T>, "x") @@ -178,8 +188,8 @@ module Index = container.GetItemLinqQueryable>().Where(fun d -> d.p.StartsWith(prefix) && d.u[0].c = caseName) /// Returns the StreamName (from the `p` field) for a 0/1 item query; only the TOP 1 item is returned - let tryGetStreamNameAsync description container (query: IQueryable>) ct = - Internal.Scalar.tryHeadAsync description container (query.Select(fun x -> x.p)) ct + let tryGetStreamNameAsync log cat container (query: IQueryable>) ct = + Internal.Scalar.tryHeadAsync log cat container (query.Select(fun x -> x.p)) ct /// Query the items, returning the Stream name and the Snapshot as a JsonElement (Decompressed if applicable) let projectStreamNameAndSnapshot<'I> snapExpression: Expression, SnAndSnap<'I>>> = @@ -197,8 +207,9 @@ type Query<'T, 'M>(inner: Internal.Projection<'T, 'M>) = /// Enables querying based on uncompressed Indexed values stored as secondary unfolds alongside the snapshot [] -type IndexContext<'I>(container, categoryName, caseName) = +type IndexContext<'I>(container, categoryName, caseName, log) = + member val Log = defaultArg log Log.Logger member val Description = $"{categoryName}/{caseName}" with get, set member val Container = container @@ -218,7 +229,7 @@ type IndexContext<'I>(container, categoryName, caseName) = /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria member x.TryGetStreamNameWhereAsync(criteria: Expressions.Expression, bool>>, ct) = - Index.tryGetStreamNameAsync x.Description container (x.ByCategory().Where criteria) ct + Index.tryGetStreamNameAsync x.Log container categoryName (x.ByCategory().Where criteria) ct /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria member x.TryGetStreamNameWhere(criteria: Expressions.Expression, bool>>): Async = @@ -227,4 +238,4 @@ type IndexContext<'I>(container, categoryName, caseName) = /// Query the items, grabbing the Stream name and the Snapshot; The StreamName and the (Decompressed if applicable) Snapshot are passed to `hydrate` member x.QueryStreamNameAndSnapshot(query: IQueryable>, selectBody: Expression, 'I>>, hydrate: SnAndSnap -> 'M) = - Internal.Projection.Create(query.Select(Index.projectStreamNameAndSnapshot<'I> selectBody), x.Description, container, hydrate) |> Query + Internal.Projection.Create(query.Select(Index.projectStreamNameAndSnapshot<'I> selectBody), categoryName, container, x.Log, hydrate) |> Query From fdad5da215d81a0f6d3207685d9e3b7145f5bb62 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Tue, 9 Apr 2024 00:44:26 +0100 Subject: [PATCH 18/56] Release 4.1.0-alpha.6 --- CHANGELOG.md | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7b2c80ef..33d4c73bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ The `Unreleased` section name is replaced by the expected version of next releas ## [Unreleased] -## 4.1.0-alpha.1 - 2024-03-28 +## 4.1.0 - 2024/5 ### Added diff --git a/README.md b/README.md index de908ea92..3bcf739f5 100644 --- a/README.md +++ b/README.md @@ -170,7 +170,7 @@ The components within this repository are delivered as multi-targeted Nuget pack - `Equinox.Core` [![NuGet](https://img.shields.io/nuget/v/Equinox.Core.svg)](https://www.nuget.org/packages/Equinox.Core/): Hosts generic utility types frequently useful alongside Equinox: [`TaskCell`](https://github.com/jet/equinox/blob/master/src/Equinox.Core/TaskCell.fs#L36), [`Batcher`, `BatcherCache`, `BatcherDictionary`](https://github.com/jet/equinox/blob/master/src/Equinox.Core/Batching.fs#L44). ([depends](https://www.fuget.org/packages/Equinox.Core) on `System.Runtime.Caching`) - `Equinox.MemoryStore` [![MemoryStore NuGet](https://img.shields.io/nuget/v/Equinox.MemoryStore.svg)](https://www.nuget.org/packages/Equinox.MemoryStore/): In-memory store for integration testing/performance base-lining/providing out-of-the-box zero dependency storage for examples. ([depends](https://www.fuget.org/packages/Equinox.MemoryStore) on `Equinox`) -- `Equinox.CosmosStore` [![CosmosStore NuGet](https://img.shields.io/nuget/v/Equinox.CosmosStore.svg)](https://www.nuget.org/packages/Equinox.CosmosStore/): Azure CosmosDB Adapter with integrated 'unfolds' feature, facilitating optimal read performance in terms of latency and RU costs, instrumented to meet Jet's production monitoring requirements. ([depends](https://www.fuget.org/packages/Equinox.CosmosStore) on `Equinox`, `Equinox`, `Microsoft.Azure.Cosmos` >= `3.43.1`, `System.Text.Json`, `FSharp.Control.TaskSeq`) +- `Equinox.CosmosStore` [![CosmosStore NuGet](https://img.shields.io/nuget/v/Equinox.CosmosStore.svg)](https://www.nuget.org/packages/Equinox.CosmosStore/): Azure CosmosDB Adapter with integrated 'unfolds' feature, facilitating optimal read performance in terms of latency and RU costs, instrumented to meet Jet's production monitoring requirements. ([depends](https://www.fuget.org/packages/Equinox.CosmosStore) on `Equinox` v `4.0.2`, `Equinox`, `Microsoft.Azure.Cosmos` >= `3.43.1`, `System.Text.Json`, `FSharp.Control.TaskSeq`) - `Equinox.CosmosStore.Prometheus` [![CosmosStore.Prometheus NuGet](https://img.shields.io/nuget/v/Equinox.CosmosStore.Prometheus.svg)](https://www.nuget.org/packages/Equinox.CosmosStore.Prometheus/): Integration package providing a `Serilog.Core.ILogEventSink` that extracts detailed metrics information attached to the `LogEvent`s and feeds them to the `prometheus-net`'s `Prometheus.Metrics` static instance. ([depends](https://www.fuget.org/packages/Equinox.CosmosStore.Prometheus) on `Equinox.CosmosStore`, `prometheus-net >= 3.6.0`) - `Equinox.DynamoStore` [![DynamoStore NuGet](https://img.shields.io/nuget/v/Equinox.DynamoStore.svg)](https://www.nuget.org/packages/Equinox.DynamoStore/): Amazon DynamoDB Adapter with integrated 'unfolds' feature, facilitating optimal read performance in terms of latency and RC costs, patterned after `Equinox.CosmosStore`. ([depends](https://www.fuget.org/packages/Equinox.DynamoStore) on `Equinox`, `FSharp.AWS.DynamoDB` >= `0.12.0-beta`, `FSharp.Control.TaskSeq`) - `Equinox.DynamoStore.Prometheus` [![DynamoStore.Prometheus NuGet](https://img.shields.io/nuget/v/Equinox.DynamoStore.Prometheus.svg)](https://www.nuget.org/packages/Equinox.DynamoStore.Prometheus/): Integration package providing a `Serilog.Core.ILogEventSink` that extracts detailed metrics information attached to the `LogEvent`s and feeds them to the `prometheus-net`'s `Prometheus.Metrics` static instance. ([depends](https://www.fuget.org/packages/Equinox.CosmosStore.Prometheus) on `Equinox.DynamoStore`, `prometheus-net >= 3.6.0`) From 08b9abb51516a27cd8093fc949d2c06f2135ff25 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Tue, 9 Apr 2024 01:49:08 +0100 Subject: [PATCH 19/56] fix log arg --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 1fcf6fbc8..7b8b4bf25 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -209,7 +209,7 @@ type Query<'T, 'M>(inner: Internal.Projection<'T, 'M>) = [] type IndexContext<'I>(container, categoryName, caseName, log) = - member val Log = defaultArg log Log.Logger + member val Log = log member val Description = $"{categoryName}/{caseName}" with get, set member val Container = container From 70b83f7a3a66a777bcf1eb9dbe4dc87757f96034 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Mon, 22 Apr 2024 11:58:09 +0100 Subject: [PATCH 20/56] Add queryLogLevel 4.1.0-alpha.7 --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 57 +++++++++++++--------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 7b8b4bf25..f586ad16c 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -97,11 +97,14 @@ module Internal = interval = interval; bytes = totalOds; count = items; ru = totalRu } in log |> Log.event evt log.Information("EqxCosmos {action:l} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc:f2} RU {latency} ms", "Index", items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, interval.ElapsedMilliseconds) } + /// Runs a query that can by hydrated as 'T + let enum<'T> (log: ILogger) (container: Container) cat (queryDefinition: QueryDefinition): TaskSeq<'T> = + container.GetItemQueryIterator<'T>(queryDefinition) |> toAsyncEnum<'T> log container cat /// Runs a query that renders 'T, Hydrating the results as 'P (can be the same types but e.g. you might want to map an object to a JsonElement etc) - let enum<'T, 'P> (log: ILogger) (container: Container) cat (query: IQueryable<'T>): TaskSeq<'P> = + let enumAs<'T, 'P> (log: ILogger) (container: Container) cat logLevel (query: IQueryable<'T>): TaskSeq<'P> = let queryDefinition = query.ToQueryDefinition() - if log.IsEnabled Serilog.Events.LogEventLevel.Debug then log.Debug("CosmosStoreQuery.query {cat} {query}", cat, queryDefinition.QueryText) - container.GetItemQueryIterator<'P>(queryDefinition) |> toAsyncEnum<'P> log container cat + if log.IsEnabled logLevel then log.Write(logLevel, "CosmosStoreQuery.query {cat} {query}", cat, queryDefinition.QueryText) + enum<'P> log container cat queryDefinition module AggregateOp = /// Runs one of the typical Cosmos SDK extensions, e.g. CountAsync, logging the costs let [] exec (log: ILogger) (container: Container) (op: string) (cat: string) (query: IQueryable<'T>) run render: System.Threading.Tasks.Task<'R> = task { @@ -118,21 +121,21 @@ module Internal = op, cat, summary, m.RetrievedDocumentCount, miB m.RetrievedDocumentSize, miB totalOds, totalRu, interval.ElapsedMilliseconds) return res } /// Runs query.CountAsync, with instrumentation equivalent to what query provides - let countAsync (log: ILogger) container cat (query: IQueryable<'T>) ct = - if log.IsEnabled Serilog.Events.LogEventLevel.Debug then log.Debug("CosmosStoreQuery.count {cat} {query}", cat, query.ToQueryDefinition().QueryText) + let countAsync (log: ILogger) container cat logLevel (query: IQueryable<'T>) ct = + if log.IsEnabled logLevel then log.Write(logLevel, "CosmosStoreQuery.count {cat} {query}", cat, query.ToQueryDefinition().QueryText) exec log container "count" cat query (_.CountAsync(ct)) id module Scalar = /// Generates a TOP 1 SQL query let top1 (query: IQueryable<'T>) = query.Take(1) /// Handles a query that's expected to yield 0 or 1 result item - let tryHeadAsync<'T, 'R> (log: ILogger) (container: Container) cat (query: IQueryable<'T>) (_ct: CancellationToken): Task<'R option> = + let tryHeadAsync<'T, 'R> (log: ILogger) (container: Container) cat logLevel (query: IQueryable<'T>) (_ct: CancellationToken): Task<'R option> = let queryDefinition = (top1 query).ToQueryDefinition() - if log.IsEnabled Serilog.Events.LogEventLevel.Debug then log.Debug("CosmosStoreQuery.tryScalar {cat} {query}", queryDefinition.QueryText) + if log.IsEnabled logLevel then log.Write(logLevel, "CosmosStoreQuery.tryScalar {cat} {query}", queryDefinition.QueryText) container.GetItemQueryIterator<'R>(queryDefinition) |> Query.toAsyncEnum log container cat |> TaskSeq.tryHead type Projection<'T, 'M>(query, category, container, enum: IQueryable<'T> -> TaskSeq<'M>, count: IQueryable<'T> -> CancellationToken -> Task) = - static member Create<'P>(q, cat, c, log, hydrate: 'P -> 'M) = - Projection<'T, 'M>(q, cat, c, Query.enum<'T, 'P> log c cat >> TaskSeq.map hydrate, AggregateOp.countAsync log c cat) + static member Create<'P>(q, cat, c, log, hydrate: 'P -> 'M, logLevel) = + Projection<'T, 'M>(q, cat, c, Query.enumAs<'T, 'P> log c cat logLevel >> TaskSeq.map hydrate, AggregateOp.countAsync log c cat logLevel) member _.Enum: TaskSeq<'M> = query |> enum member x.EnumPage(skip, take): TaskSeq<'M> = query |> Query.offsetLimit (skip, take) |> enum member _.CountAsync: CancellationToken -> Task = query |> count @@ -162,6 +165,14 @@ type SnAndSnap<'I>() = Expression.Bind(snapMember, snapExpression.Body.Replace(snapExpression.Parameters[0], param)) |]), [| param |]) +/// Represents a query projecting information values from an Index and/or Snapshots with a view to rendering the items and/or a count +type Query<'T, 'M>(inner: Internal.Projection<'T, 'M>) = + member _.Enum: TaskSeq<'M> = inner.Enum + member _.EnumPage(skip, take): TaskSeq<'M> = inner.EnumPage(skip, take) + member _.CountAsync(ct: CancellationToken): Task = inner.CountAsync ct + member _.Count(): Async = inner.CountAsync |> Async.call + [] member val Inner = inner + /// Helpers for Querying and Projecting results based on relevant aspects of Equinox.CosmosStore's storage schema module Index = @@ -188,8 +199,8 @@ module Index = container.GetItemLinqQueryable>().Where(fun d -> d.p.StartsWith(prefix) && d.u[0].c = caseName) /// Returns the StreamName (from the `p` field) for a 0/1 item query; only the TOP 1 item is returned - let tryGetStreamNameAsync log cat container (query: IQueryable>) ct = - Internal.Scalar.tryHeadAsync log cat container (query.Select(fun x -> x.p)) ct + let tryGetStreamNameAsync log cat logLevel container (query: IQueryable>) ct = + Internal.Scalar.tryHeadAsync log cat logLevel container (query.Select(fun x -> x.p)) ct /// Query the items, returning the Stream name and the Snapshot as a JsonElement (Decompressed if applicable) let projectStreamNameAndSnapshot<'I> snapExpression: Expression, SnAndSnap<'I>>> = @@ -197,18 +208,14 @@ module Index = let pExpression item = Expression.PropertyOrField(item, nameof Unchecked.defaultof>.p) SnAndSnap.CreateItemQueryLambda(pExpression, snapExpression) -/// Represents a query projecting information values from an Index and/or Snapshots with a view to rendering the items and/or a count -type Query<'T, 'M>(inner: Internal.Projection<'T, 'M>) = - member _.Enum: TaskSeq<'M> = inner.Enum - member _.EnumPage(skip, take): TaskSeq<'M> = inner.EnumPage(skip, take) - member _.CountAsync(ct: CancellationToken): Task = inner.CountAsync ct - member _.Count(): Async = inner.CountAsync |> Async.call - [] member val Inner = inner + let createSnAndSnapshotQuery<'I, 'M> log container cat logLevel (hydrate: SnAndSnap -> 'M) (query: IQueryable>) = + Internal.Projection.Create(query, cat, container, log, hydrate, logLevel) |> Query, 'M> /// Enables querying based on uncompressed Indexed values stored as secondary unfolds alongside the snapshot [] -type IndexContext<'I>(container, categoryName, caseName, log) = +type IndexContext<'I>(container, categoryName, caseName, log, []?queryLogLevel) = + let queryLogLevel = defaultArg queryLogLevel Serilog.Events.LogEventLevel.Debug member val Log = log member val Description = $"{categoryName}/{caseName}" with get, set member val Container = container @@ -228,8 +235,9 @@ type IndexContext<'I>(container, categoryName, caseName, log) = Index.byCategoryNameOnly<'I> container categoryName /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria - member x.TryGetStreamNameWhereAsync(criteria: Expressions.Expression, bool>>, ct) = - Index.tryGetStreamNameAsync x.Log container categoryName (x.ByCategory().Where criteria) ct + member x.TryGetStreamNameWhereAsync(criteria: Expressions.Expression, bool>>, ct, [] ?logLevel) = + let logLevel = defaultArg logLevel queryLogLevel + Index.tryGetStreamNameAsync x.Log container categoryName logLevel (x.ByCategory().Where criteria) ct /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria member x.TryGetStreamNameWhere(criteria: Expressions.Expression, bool>>): Async = @@ -237,5 +245,8 @@ type IndexContext<'I>(container, categoryName, caseName, log) = /// Query the items, grabbing the Stream name and the Snapshot; The StreamName and the (Decompressed if applicable) Snapshot are passed to `hydrate` member x.QueryStreamNameAndSnapshot(query: IQueryable>, selectBody: Expression, 'I>>, - hydrate: SnAndSnap -> 'M) = - Internal.Projection.Create(query.Select(Index.projectStreamNameAndSnapshot<'I> selectBody), categoryName, container, x.Log, hydrate) |> Query + hydrate: SnAndSnap -> 'M, + [] ?logLevel): Query, 'M> = + let logLevel = defaultArg logLevel queryLogLevel + query.Select(Index.projectStreamNameAndSnapshot<'I> selectBody) + |> Index.createSnAndSnapshotQuery x.Log container categoryName logLevel hydrate From 532102cae5d72c90d3707e4e2e4f62bff391c3b0 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Tue, 23 Apr 2024 11:27:42 +0100 Subject: [PATCH 21/56] Format latency as n0 per rest of system --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index f586ad16c..2e879a066 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -95,7 +95,7 @@ module Internal = let interval = StopwatchInterval(startTicks, System.Diagnostics.Stopwatch.GetTimestamp()) let log = let evt = Log.Metric.Index { database = container.Database.Id; container = container.Id; stream = cat + FsCodec.StreamName.Category.SeparatorStr interval = interval; bytes = totalOds; count = items; ru = totalRu } in log |> Log.event evt - log.Information("EqxCosmos {action:l} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc:f2} RU {latency} ms", + log.Information("EqxCosmos {action:l} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc:f2} RU {lat:n0} ms", "Index", items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, interval.ElapsedMilliseconds) } /// Runs a query that can by hydrated as 'T let enum<'T> (log: ILogger) (container: Container) cat (queryDefinition: QueryDefinition): TaskSeq<'T> = @@ -117,7 +117,7 @@ module Internal = let totalOds, totalRu = m.OutputDocumentSize, rsp.RequestCharge let log = let evt = Log.Metric.Index { database = container.Database.Id; container = container.Id; stream = cat + FsCodec.StreamName.Category.SeparatorStr interval = interval; bytes = int totalOds; count = -1; ru = totalRu } in log |> Log.event evt - log.Information("EqxCosmos {action:l} {cat} {count} ({rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {latency} ms", + log.Information("EqxCosmos {action:l} {cat} {count} ({rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {lat:n0} ms", op, cat, summary, m.RetrievedDocumentCount, miB m.RetrievedDocumentSize, miB totalOds, totalRu, interval.ElapsedMilliseconds) return res } /// Runs query.CountAsync, with instrumentation equivalent to what query provides From 497c56229dd2cfa021f9fc6e3eb36022e746324b Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Wed, 8 May 2024 15:27:38 +0100 Subject: [PATCH 22/56] Release 4.1.0-alpha.9 --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 2e879a066..e56ac83a4 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -250,3 +250,5 @@ type IndexContext<'I>(container, categoryName, caseName, log, []?quer let logLevel = defaultArg logLevel queryLogLevel query.Select(Index.projectStreamNameAndSnapshot<'I> selectBody) |> Index.createSnAndSnapshotQuery x.Log container categoryName logLevel hydrate + +// TODO remove this! From 1af541b5795e1d1f27947a6726a5bbafd815572a Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Fri, 24 May 2024 17:13:16 +0100 Subject: [PATCH 23/56] Tidy --- tools/Equinox.Tool/Program.fs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 705e6cf03..38a1c67da 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -356,14 +356,13 @@ let prettySerdes = lazy FsCodec.SystemTextJson.Serdes(FsCodec.SystemTextJson.Opt module CosmosQuery = let inline miB x = Equinox.CosmosStore.Linq.Internal.miB x - let private unixEpoch = DateTime.UnixEpoch type System.Text.Json.JsonElement with member x.Utf8ByteCount = if x.ValueKind = System.Text.Json.JsonValueKind.Null then 0 else x.GetRawText() |> System.Text.Encoding.UTF8.GetByteCount type System.Text.Json.JsonDocument with member x.Cast<'T>() = System.Text.Json.JsonSerializer.Deserialize<'T>(x.RootElement) member x.Timestamp = let ok, p = x.RootElement.TryGetProperty("_ts") - if ok then p.GetDouble() |> unixEpoch.AddSeconds |> Some else None + if ok then p.GetDouble() |> DateTime.UnixEpoch.AddSeconds |> Some else None let private composeSql (a: QueryArguments) = let inline warnOnUnfiltered () = let lel = if a.Mode = Mode.Raw then LogEventLevel.Debug elif a.Filepath = None then LogEventLevel.Warning else LogEventLevel.Information From 4dddeddd5e5f26bfa0bc3bb26b99a778302a9da5 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Tue, 4 Jun 2024 13:44:18 +0100 Subject: [PATCH 24/56] Consolidate log/enum --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 96 +++++++++++----------- tools/Equinox.Tool/Program.fs | 40 +++++---- 2 files changed, 68 insertions(+), 68 deletions(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index e56ac83a4..846c0cf3a 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -77,7 +77,8 @@ module Internal = let m = response.Diagnostics.GetQueryMetrics().CumulativeMetrics yield struct (response.Diagnostics.GetClientElapsedTime(), response.RequestCharge, response.Resource, int m.RetrievedDocumentCount, int m.RetrievedDocumentSize, int m.OutputDocumentSize) } - let [] toAsyncEnum<'T> log (container: Container) cat (iterator: FeedIterator<'T>) = taskSeq { + /// Runs a query that can be hydrated as 'T + let enum<'T> (log: ILogger) (container: Container) cat (iterator: FeedIterator<'T>) = taskSeq { let startTicks = System.Diagnostics.Stopwatch.GetTimestamp() use _ = iterator let mutable responses, items, totalRtt, totalRu, totalRdc, totalRds, totalOds = 0, 0, TimeSpan.Zero, 0., 0, 0, 0 @@ -97,14 +98,11 @@ module Internal = interval = interval; bytes = totalOds; count = items; ru = totalRu } in log |> Log.event evt log.Information("EqxCosmos {action:l} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc:f2} RU {lat:n0} ms", "Index", items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, interval.ElapsedMilliseconds) } - /// Runs a query that can by hydrated as 'T - let enum<'T> (log: ILogger) (container: Container) cat (queryDefinition: QueryDefinition): TaskSeq<'T> = - container.GetItemQueryIterator<'T>(queryDefinition) |> toAsyncEnum<'T> log container cat /// Runs a query that renders 'T, Hydrating the results as 'P (can be the same types but e.g. you might want to map an object to a JsonElement etc) let enumAs<'T, 'P> (log: ILogger) (container: Container) cat logLevel (query: IQueryable<'T>): TaskSeq<'P> = let queryDefinition = query.ToQueryDefinition() if log.IsEnabled logLevel then log.Write(logLevel, "CosmosStoreQuery.query {cat} {query}", cat, queryDefinition.QueryText) - enum<'P> log container cat queryDefinition + container.GetItemQueryIterator<'P> queryDefinition |> enum log container cat module AggregateOp = /// Runs one of the typical Cosmos SDK extensions, e.g. CountAsync, logging the costs let [] exec (log: ILogger) (container: Container) (op: string) (cat: string) (query: IQueryable<'T>) run render: System.Threading.Tasks.Task<'R> = task { @@ -132,7 +130,7 @@ module Internal = let tryHeadAsync<'T, 'R> (log: ILogger) (container: Container) cat logLevel (query: IQueryable<'T>) (_ct: CancellationToken): Task<'R option> = let queryDefinition = (top1 query).ToQueryDefinition() if log.IsEnabled logLevel then log.Write(logLevel, "CosmosStoreQuery.tryScalar {cat} {query}", queryDefinition.QueryText) - container.GetItemQueryIterator<'R>(queryDefinition) |> Query.toAsyncEnum log container cat |> TaskSeq.tryHead + container.GetItemQueryIterator<'R> queryDefinition |> Query.enum log container cat |> TaskSeq.tryHead type Projection<'T, 'M>(query, category, container, enum: IQueryable<'T> -> TaskSeq<'M>, count: IQueryable<'T> -> CancellationToken -> Task) = static member Create<'P>(q, cat, c, log, hydrate: 'P -> 'M, logLevel) = Projection<'T, 'M>(q, cat, c, Query.enumAs<'T, 'P> log c cat logLevel >> TaskSeq.map hydrate, AggregateOp.countAsync log c cat logLevel) @@ -143,26 +141,30 @@ module Internal = [] member val Category: string = category [] member val Container: Container = container -// We want to generate a projection statement of the shape: VALUE {"sn": root["p"], "snap": root["u"][0].["d"]} +// We want to generate a projection statement of the shape: VALUE {"sn": root["p"], "d": root["u"][0].["d"], "D": root["u"][0].["D"]} // However the Cosmos SDK does not support F# (or C#) records yet https://github.com/Azure/azure-cosmos-dotnet-v3/issues/3728 // F#'s LINQ support cannot translate parameterless constructor invocations in a Lambda well; // the best native workaround without Expression Manipulation is/was https://stackoverflow.com/a/78206722/11635 -// In C#, you can generate an Expression that works with the Cosmos SDK via `.Select(x => new { sn = x.p, snap = x.u[0].d })` +// In C#, you can generate an Expression that works with the Cosmos SDK via `.Select(x => new { sn = x.p, d = x.u[0].d, D = x.u[0].D })` // This hack is based on https://stackoverflow.com/a/73506241/11635 -type SnAndSnap<'I>() = +type SnAndSnap() = member val sn: FsCodec.StreamName = Unchecked.defaultof<_> with get, set - [)>] - member val snap: 'I = Unchecked.defaultof<_> with get, set - static member CreateItemQueryLambda<'T>(snExpression: Expression -> MemberExpression, snapExpression: Expression>) = + member val d: System.Text.Json.JsonElement = Unchecked.defaultof<_> with get, set + member val D: int = Unchecked.defaultof<_> with get, set + static member CreateItemQueryLambda<'T, 'U>( + snExpression: Expression -> MemberExpression, + uExpression: Expression>) = let param = Expression.Parameter(typeof<'T>, "x") - let targetType = typeof> - let snMember = targetType.GetMember(nameof Unchecked.defaultof>.sn)[0] - let snapMember = targetType.GetMember(nameof Unchecked.defaultof>.snap)[0] - Expression.Lambda>>( + let targetType = typeof + let snMember = targetType.GetMember(nameof Unchecked.defaultof.sn)[0] + let dMember = targetType.GetMember(nameof Unchecked.defaultof.d)[0] + let formatMember = targetType.GetMember(nameof Unchecked.defaultof.D)[0] + Expression.Lambda>( Expression.MemberInit( Expression.New(targetType.GetConstructor [||]), [| Expression.Bind(snMember, snExpression param) :> MemberBinding - Expression.Bind(snapMember, snapExpression.Body.Replace(snapExpression.Parameters[0], param)) |]), + Expression.Bind(dMember, uExpression.Body.Replace(uExpression.Parameters[0], param)) + Expression.Bind(formatMember, uExpression.Body.Replace(uExpression.Parameters[0], param)) |]), [| param |]) /// Represents a query projecting information values from an Index and/or Snapshots with a view to rendering the items and/or a count @@ -177,39 +179,41 @@ type Query<'T, 'M>(inner: Internal.Projection<'T, 'M>) = module Index = [] - type Item<'I> = + type Item = { p: string _etag: string - u: Unfold<'I> ResizeArray } - and [] Unfold<'I> = + u: Unfold ResizeArray } + and [] Unfold = { c: string - d: 'I } + d: System.Text.Json.JsonElement + D: int } let inline prefix categoryName = $"%s{categoryName}-" /// The cheapest search basis; the categoryName is a prefix of the `p` partition field /// Depending on how much more selective the caseName is, `byCaseName` may be a better choice /// (but e.g. if the ration is 1:1 then no point having additional criteria) - let byCategoryNameOnly<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName: IQueryable> = + let byCategoryNameOnly<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName: IQueryable = let prefix = prefix categoryName - container.GetItemLinqQueryable>().Where(fun d -> d.p.StartsWith(prefix)) + container.GetItemLinqQueryable().Where(fun d -> d.p.StartsWith(prefix)) // Searches based on the prefix of the `p` field, but also checking the `c` of the relevant unfold is correct // A good idea if that'll be significantly cheaper due to better selectivity - let byCaseName<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName caseName: IQueryable> = + let byCaseName<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName caseName: IQueryable = let prefix = prefix categoryName - container.GetItemLinqQueryable>().Where(fun d -> d.p.StartsWith(prefix) && d.u[0].c = caseName) + container.GetItemLinqQueryable().Where(fun d -> d.p.StartsWith(prefix) && d.u[0].c = caseName) /// Returns the StreamName (from the `p` field) for a 0/1 item query; only the TOP 1 item is returned - let tryGetStreamNameAsync log cat logLevel container (query: IQueryable>) ct = + let tryGetStreamNameAsync log cat logLevel container (query: IQueryable) ct = Internal.Scalar.tryHeadAsync log cat logLevel container (query.Select(fun x -> x.p)) ct - /// Query the items, returning the Stream name and the Snapshot as a JsonElement (Decompressed if applicable) - let projectStreamNameAndSnapshot<'I> snapExpression: Expression, SnAndSnap<'I>>> = - // a very ugly workaround for not being able to write query.Select,Internal.SnAndSnap<'I>>(fun x -> { p = x.p; snap = x.u[0].d }) - let pExpression item = Expression.PropertyOrField(item, nameof Unchecked.defaultof>.p) - SnAndSnap.CreateItemQueryLambda(pExpression, snapExpression) + // /// Query the items, returning the Stream name and the Snapshot as a JsonElement (Decompressed if applicable) + // let projectStreamNameAndSnapshot<'I> uExpression: Expression> = + // // a very ugly workaround for not being able to write query.Select(fun x -> { p = x.p; d = x.u[0].d; D = x.u[0].D }) + // let pExpression item = Expression.PropertyOrField(item, nameof Unchecked.defaultof.p) + // let uItem name item = Expression.PropertyOrField(uExpression, name) + // SnAndSnap.CreateItemQueryLambda(pExpression, uExpression, uItem (nameof Unchecked.defaultof.d), uItem (nameof Unchecked.defaultof.D)) - let createSnAndSnapshotQuery<'I, 'M> log container cat logLevel (hydrate: SnAndSnap -> 'M) (query: IQueryable>) = - Internal.Projection.Create(query, cat, container, log, hydrate, logLevel) |> Query, 'M> + let createSnAndSnapshotQuery<'M> log container cat logLevel (hydrate: SnAndSnap -> 'M) (query: IQueryable) = + Internal.Projection.Create(query, cat, container, log, hydrate, logLevel) |> Query /// Enables querying based on uncompressed Indexed values stored as secondary unfolds alongside the snapshot [] @@ -227,28 +231,26 @@ type IndexContext<'I>(container, categoryName, caseName, log, []?quer /// Fetches a base Queryable that's filtered based on the `categoryName` and `caseName` /// NOTE this is relatively expensive to compute a Count on, compared to `CategoryQueryable` - member _.ByCaseName(): IQueryable> = + member _.ByCaseName(): IQueryable = Index.byCaseName<'I> container categoryName caseName /// Fetches a base Queryable that's filtered only on the `categoryName` - member _.ByCategory(): IQueryable> = - Index.byCategoryNameOnly<'I> container categoryName + member _.ByCategory(): IQueryable = + Index.byCategoryNameOnly container categoryName /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria - member x.TryGetStreamNameWhereAsync(criteria: Expressions.Expression, bool>>, ct, [] ?logLevel) = + member x.TryGetStreamNameWhereAsync(criteria: Expressions.Expression>, ct, [] ?logLevel) = let logLevel = defaultArg logLevel queryLogLevel Index.tryGetStreamNameAsync x.Log container categoryName logLevel (x.ByCategory().Where criteria) ct /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria - member x.TryGetStreamNameWhere(criteria: Expressions.Expression, bool>>): Async = + member x.TryGetStreamNameWhere(criteria: Expressions.Expression>): Async = (fun ct -> x.TryGetStreamNameWhereAsync(criteria, ct)) |> Async.call - /// Query the items, grabbing the Stream name and the Snapshot; The StreamName and the (Decompressed if applicable) Snapshot are passed to `hydrate` - member x.QueryStreamNameAndSnapshot(query: IQueryable>, selectBody: Expression, 'I>>, - hydrate: SnAndSnap -> 'M, - [] ?logLevel): Query, 'M> = - let logLevel = defaultArg logLevel queryLogLevel - query.Select(Index.projectStreamNameAndSnapshot<'I> selectBody) - |> Index.createSnAndSnapshotQuery x.Log container categoryName logLevel hydrate - -// TODO remove this! + // /// Query the items, grabbing the Stream name and the Snapshot; The StreamName and the (Decompressed if applicable) Snapshot are passed to `hydrate` + // member x.QueryStreamNameAndSnapshot(query: IQueryable, selectBody: Expression>, + // hydrate: SnAndSnap -> 'M, + // [] ?logLevel): Query = + // let logLevel = defaultArg logLevel queryLogLevel + // query.Select(Index.projectStreamNameAndSnapshot selectBody) + // |> Index.createSnAndSnapshotQuery x.Log container categoryName logLevel hydrate diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 38a1c67da..c6ae037e0 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -355,6 +355,7 @@ let prettySerdes = lazy FsCodec.SystemTextJson.Serdes(FsCodec.SystemTextJson.Opt module CosmosQuery = + open FSharp.Control let inline miB x = Equinox.CosmosStore.Linq.Internal.miB x type System.Text.Json.JsonElement with member x.Utf8ByteCount = if x.ValueKind = System.Text.Json.JsonValueKind.Null then 0 else x.GetRawText() |> System.Text.Encoding.UTF8.GetByteCount @@ -389,44 +390,41 @@ module CosmosQuery = | [| x |] -> x |> exists | xs -> String.Join(" AND ", xs) |> exists $"SELECT {selectedFields} FROM c WHERE {partitionKeyCriteria} AND {unfoldFilter} ORDER BY c.i" - let private makeQuery (a: QueryArguments) = - let sql = composeSql a - Log.Information("Querying {mode}: {q}", a.Mode, sql) - let storeConfig = a.ConfigureStore(Log.Logger) - let container = match storeConfig with Store.Config.Cosmos (cc, _, _) -> cc.Container | _ -> failwith "Query requires Cosmos" - let opts = Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = a.CosmosArgs.QueryMaxItems) - container.GetItemQueryIterator(sql, requestOptions = opts) - let run (a: QueryArguments) = async { + let run (a: QueryArguments) = task { let sw, sw2 = System.Diagnostics.Stopwatch(), System.Diagnostics.Stopwatch.StartNew() let serdes = if a.Pretty then prettySerdes.Value else FsCodec.SystemTextJson.Serdes.Default let maybeFileStream = a.Filepath |> Option.map (fun p -> Log.Information("Dumping {mode} content to {path}", a.Mode, System.IO.FileInfo(p).FullName) System.IO.File.Create p) // Silently truncate if it exists, makes sense for typical usage - use query = makeQuery a let pageStreams, accStreams = System.Collections.Generic.HashSet(), System.Collections.Generic.HashSet() let mutable accI, accE, accU, accRus, accBytesRead = 0L, 0L, 0L, 0., 0L - try while query.HasMoreResults do - sw.Restart() - let! page = query.ReadNextAsync(CancellationToken.None) |> Async.AwaitTaskCorrect - let pageSize = page.Resource |> Seq.sumBy _.RootElement.Utf8ByteCount - let newestAge = page.Resource |> Seq.choose _.Timestamp |> Seq.tryLast |> Option.map (fun ts -> ts - DateTime.UtcNow) - let items = [| for x in page.Resource -> x.Cast() |] + + let storeConfig, queryOpts = a.ConfigureStore(Log.Logger), Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = a.CosmosArgs.QueryMaxItems) + let container = match storeConfig with Store.Config.Cosmos (cc, _, _) -> cc.Container | _ -> failwith "Query requires Cosmos" + let sql = composeSql a + Log.Information("Querying {mode}: {q}", a.Mode, sql) + let qd = Microsoft.Azure.Cosmos.QueryDefinition sql + let qi = container.GetItemQueryIterator(qd, requestOptions = queryOpts) + try for rtt, rc, items, rdc, rds, ods in Equinox.CosmosStore.Linq.Internal.Query.enum_ qi do + let newestAge = items |> Seq.choose _.Timestamp |> Seq.tryLast |> Option.map (fun ts -> ts - DateTime.UtcNow) + let items = [| for x in items -> x.Cast() |] let inline arrayLen x = if isNull x then 0 else Array.length x pageStreams.Clear(); for x in items do if x.p <> null && pageStreams.Add x.p then accStreams.Add x.p |> ignore let pageI, pageE, pageU = items.Length, items |> Seq.sumBy (_.e >> arrayLen), items |> Seq.sumBy (_.u >> arrayLen) - Log.Information("Page {count}i {streams}s {es}e {us}u {ru}RU {s:N1}s {mib:N1}MiB age {age:dddd\.hh\:mm\:ss}", - pageI, pageStreams.Count, pageE, pageU, page.RequestCharge, sw.Elapsed.TotalSeconds, miB pageSize, Option.toNullable newestAge) + Log.Information("Page {rdc}>{count}i {streams}s {es}e {us}u {rc:f2} RU {s:N1}s {rds:f2}>{ods:f2} MiB age {age:dddd\.hh\:mm\:ss}", + rdc, pageI, pageStreams.Count, pageE, pageU, rc, rtt, miB rds, miB ods, Option.toNullable newestAge) maybeFileStream |> Option.iter (fun stream -> - for x in page.Resource do + for x in items do serdes.SerializeToStream(x, stream) stream.WriteByte(byte '\n')) if a.TeeConsole then - page.Resource |> Seq.iter (serdes.Serialize >> Console.WriteLine) + items |> Seq.iter (serdes.Serialize >> Console.WriteLine) accI <- accI + int64 pageI; accE <- accE + int64 pageE; accU <- accU + int64 pageU - accRus <- accRus + page.RequestCharge; accBytesRead <- accBytesRead + int64 pageSize + accRus <- accRus + rc; accBytesRead <- accBytesRead + int64 ods + sw.Restart() finally let fileSize = maybeFileStream |> Option.map _.Position |> Option.defaultValue 0 maybeFileStream |> Option.iter _.Close() // Before we log so time includes flush time and no confusion @@ -548,7 +546,7 @@ type Arguments(p: ParseResults) = | InitAws a -> do! DynamoInit.table Log.Logger a | InitSql a -> do! SqlInit.databaseOrSchema Log.Logger a | Dump a -> do! Dump.run (Log.Logger, verboseConsole, maybeSeq) a - | Query a -> do! CosmosQuery.run (QueryArguments a) + | Query a -> do! CosmosQuery.run (QueryArguments a) |> Async.AwaitTaskCorrect | Stats a -> do! CosmosStats.run (Log.Logger, verboseConsole, maybeSeq) a | LoadTest a -> let n = p.GetResult(LogFile, fun () -> p.ProgramName + ".log") let reportFilename = System.IO.FileInfo(n).FullName From 75e61bc8fc297a65c3176d71f5c74441dbabaadc Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Tue, 4 Jun 2024 14:14:31 +0100 Subject: [PATCH 25/56] Tidy --- tools/Equinox.Tool/Program.fs | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index c6ae037e0..63bb3a9ab 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -356,6 +356,7 @@ let prettySerdes = lazy FsCodec.SystemTextJson.Serdes(FsCodec.SystemTextJson.Opt module CosmosQuery = open FSharp.Control + open Equinox.CosmosStore.Linq.Internal let inline miB x = Equinox.CosmosStore.Linq.Internal.miB x type System.Text.Json.JsonElement with member x.Utf8ByteCount = if x.ValueKind = System.Text.Json.JsonValueKind.Null then 0 else x.GetRawText() |> System.Text.Encoding.UTF8.GetByteCount @@ -390,23 +391,21 @@ module CosmosQuery = | [| x |] -> x |> exists | xs -> String.Join(" AND ", xs) |> exists $"SELECT {selectedFields} FROM c WHERE {partitionKeyCriteria} AND {unfoldFilter} ORDER BY c.i" + let private queryDef (a: QueryArguments) = + let sql = composeSql a + Log.Information("Querying {mode}: {q}", a.Mode, sql) + Microsoft.Azure.Cosmos.QueryDefinition sql let run (a: QueryArguments) = task { - let sw, sw2 = System.Diagnostics.Stopwatch(), System.Diagnostics.Stopwatch.StartNew() + let sw = System.Diagnostics.Stopwatch.StartNew() let serdes = if a.Pretty then prettySerdes.Value else FsCodec.SystemTextJson.Serdes.Default let maybeFileStream = a.Filepath |> Option.map (fun p -> Log.Information("Dumping {mode} content to {path}", a.Mode, System.IO.FileInfo(p).FullName) System.IO.File.Create p) // Silently truncate if it exists, makes sense for typical usage - + let storeConfig, qo = a.ConfigureStore(Log.Logger), Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = a.CosmosArgs.QueryMaxItems) + let container = match storeConfig with Store.Config.Cosmos (cc, _, _) -> cc.Container | _ -> failwith "Query requires Cosmos" let pageStreams, accStreams = System.Collections.Generic.HashSet(), System.Collections.Generic.HashSet() let mutable accI, accE, accU, accRus, accBytesRead = 0L, 0L, 0L, 0., 0L - - let storeConfig, queryOpts = a.ConfigureStore(Log.Logger), Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = a.CosmosArgs.QueryMaxItems) - let container = match storeConfig with Store.Config.Cosmos (cc, _, _) -> cc.Container | _ -> failwith "Query requires Cosmos" - let sql = composeSql a - Log.Information("Querying {mode}: {q}", a.Mode, sql) - let qd = Microsoft.Azure.Cosmos.QueryDefinition sql - let qi = container.GetItemQueryIterator(qd, requestOptions = queryOpts) - try for rtt, rc, items, rdc, rds, ods in Equinox.CosmosStore.Linq.Internal.Query.enum_ qi do + try for rtt, rc, items, rdc, rds, ods in container.GetItemQueryIterator(queryDef a, requestOptions = qo) |> Query.enum_ do let newestAge = items |> Seq.choose _.Timestamp |> Seq.tryLast |> Option.map (fun ts -> ts - DateTime.UtcNow) let items = [| for x in items -> x.Cast() |] let inline arrayLen x = if isNull x then 0 else Array.length x @@ -414,24 +413,21 @@ module CosmosQuery = let pageI, pageE, pageU = items.Length, items |> Seq.sumBy (_.e >> arrayLen), items |> Seq.sumBy (_.u >> arrayLen) Log.Information("Page {rdc}>{count}i {streams}s {es}e {us}u {rc:f2} RU {s:N1}s {rds:f2}>{ods:f2} MiB age {age:dddd\.hh\:mm\:ss}", rdc, pageI, pageStreams.Count, pageE, pageU, rc, rtt, miB rds, miB ods, Option.toNullable newestAge) - maybeFileStream |> Option.iter (fun stream -> for x in items do serdes.SerializeToStream(x, stream) stream.WriteByte(byte '\n')) if a.TeeConsole then items |> Seq.iter (serdes.Serialize >> Console.WriteLine) - accI <- accI + int64 pageI; accE <- accE + int64 pageE; accU <- accU + int64 pageU accRus <- accRus + rc; accBytesRead <- accBytesRead + int64 ods - sw.Restart() finally let fileSize = maybeFileStream |> Option.map _.Position |> Option.defaultValue 0 maybeFileStream |> Option.iter _.Close() // Before we log so time includes flush time and no confusion let categoryName = FsCodec.StreamName.parse >> FsCodec.StreamName.split >> fun struct (cn, _sid) -> cn let accCategories = accStreams |> Seq.map categoryName |> Seq.distinct |> Seq.length Log.Information("TOTALS {cats}c {streams:N0}s {count:N0}i {es:N0}e {us:N0}u {ru:N2}RU R/W {rmib:N1}/{wmib:N1}MiB {s:N1}s", - accCategories, accStreams.Count, accI, accE, accU, accRus, miB accBytesRead, miB fileSize, sw2.Elapsed.TotalSeconds) } + accCategories, accStreams.Count, accI, accE, accU, accRus, miB accBytesRead, miB fileSize, sw.Elapsed.TotalSeconds) } module DynamoInit = From 6186487a448dffadc7b7948e11d29e6980138082 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Tue, 4 Jun 2024 15:50:55 +0100 Subject: [PATCH 26/56] Log polish --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 36 ++++++++++++---------- tools/Equinox.Tool/Program.fs | 23 +++++++------- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 846c0cf3a..4fff31d81 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -71,18 +71,17 @@ module Internal = /// NOTE: (continuation tokens are the key to more linear costs) let offsetLimit (skip: int, take: int) (query: IQueryable<'T>) = query.Skip(skip).Take(take) - let [] enum_ (iterator: FeedIterator<'T>) = taskSeq { + let [] enum__ (iterator: FeedIterator<'T>) = taskSeq { while iterator.HasMoreResults do let! response = iterator.ReadNextAsync() let m = response.Diagnostics.GetQueryMetrics().CumulativeMetrics yield struct (response.Diagnostics.GetClientElapsedTime(), response.RequestCharge, response.Resource, int m.RetrievedDocumentCount, int m.RetrievedDocumentSize, int m.OutputDocumentSize) } - /// Runs a query that can be hydrated as 'T - let enum<'T> (log: ILogger) (container: Container) cat (iterator: FeedIterator<'T>) = taskSeq { + let enum_<'T> (log: ILogger) (container: Container) (action: string) cat logLevel (iterator: FeedIterator<'T>) = taskSeq { let startTicks = System.Diagnostics.Stopwatch.GetTimestamp() use _ = iterator let mutable responses, items, totalRtt, totalRu, totalRdc, totalRds, totalOds = 0, 0, TimeSpan.Zero, 0., 0, 0, 0 - try for rtt, rc, response, rdc, rds, ods in enum_ iterator do + try for rtt, rc, response, rdc, rds, ods in enum__ iterator do responses <- responses + 1 totalRdc <- totalRdc + rdc totalRds <- totalRds + rds @@ -94,10 +93,13 @@ module Internal = yield item finally let interval = StopwatchInterval(startTicks, System.Diagnostics.Stopwatch.GetTimestamp()) - let log = let evt = Log.Metric.Index { database = container.Database.Id; container = container.Id; stream = cat + FsCodec.StreamName.Category.SeparatorStr + let log = if cat = null then log else + let evt = Log.Metric.Index { database = container.Database.Id; container = container.Id; stream = cat + FsCodec.StreamName.Category.SeparatorStr interval = interval; bytes = totalOds; count = items; ru = totalRu } in log |> Log.event evt - log.Information("EqxCosmos {action:l} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc:f2} RU {lat:n0} ms", - "Index", items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, interval.ElapsedMilliseconds) } + log.Write(logLevel, "EqxCosmos {action:l} {count} ({trips}r {totalRtt:f0}ms; {rdc}i {rds:f2}>{ods:f2} MiB) {rc:f2} RU {lat:f0} ms", + action, items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, interval.ElapsedMilliseconds) } + /// Runs a query that can be hydrated as 'T + let enum log container cat = enum_ log container "Index" cat Events.LogEventLevel.Information /// Runs a query that renders 'T, Hydrating the results as 'P (can be the same types but e.g. you might want to map an object to a JsonElement etc) let enumAs<'T, 'P> (log: ILogger) (container: Container) cat logLevel (query: IQueryable<'T>): TaskSeq<'P> = let queryDefinition = query.ToQueryDefinition() @@ -115,7 +117,7 @@ module Internal = let totalOds, totalRu = m.OutputDocumentSize, rsp.RequestCharge let log = let evt = Log.Metric.Index { database = container.Database.Id; container = container.Id; stream = cat + FsCodec.StreamName.Category.SeparatorStr interval = interval; bytes = int totalOds; count = -1; ru = totalRu } in log |> Log.event evt - log.Information("EqxCosmos {action:l} {cat} {count} ({rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {lat:n0} ms", + log.Information("EqxCosmos {action:l} {cat} {count} ({rdc}i {rds:f2}>{ods:f2} MiB) {rc} RU {lat:f0} ms", op, cat, summary, m.RetrievedDocumentCount, miB m.RetrievedDocumentSize, miB totalOds, totalRu, interval.ElapsedMilliseconds) return res } /// Runs query.CountAsync, with instrumentation equivalent to what query provides @@ -129,8 +131,8 @@ module Internal = /// Handles a query that's expected to yield 0 or 1 result item let tryHeadAsync<'T, 'R> (log: ILogger) (container: Container) cat logLevel (query: IQueryable<'T>) (_ct: CancellationToken): Task<'R option> = let queryDefinition = (top1 query).ToQueryDefinition() - if log.IsEnabled logLevel then log.Write(logLevel, "CosmosStoreQuery.tryScalar {cat} {query}", queryDefinition.QueryText) - container.GetItemQueryIterator<'R> queryDefinition |> Query.enum log container cat |> TaskSeq.tryHead + if log.IsEnabled logLevel then log.Write(logLevel, "CosmosStoreQuery.tryScalar {cat} {query}", cat, queryDefinition.QueryText) + container.GetItemQueryIterator<'R> queryDefinition |> Query.enum_ log container "Scalar" cat logLevel |> TaskSeq.tryHead type Projection<'T, 'M>(query, category, container, enum: IQueryable<'T> -> TaskSeq<'M>, count: IQueryable<'T> -> CancellationToken -> Task) = static member Create<'P>(q, cat, c, log, hydrate: 'P -> 'M, logLevel) = Projection<'T, 'M>(q, cat, c, Query.enumAs<'T, 'P> log c cat logLevel >> TaskSeq.map hydrate, AggregateOp.countAsync log c cat logLevel) @@ -247,10 +249,10 @@ type IndexContext<'I>(container, categoryName, caseName, log, []?quer member x.TryGetStreamNameWhere(criteria: Expressions.Expression>): Async = (fun ct -> x.TryGetStreamNameWhereAsync(criteria, ct)) |> Async.call - // /// Query the items, grabbing the Stream name and the Snapshot; The StreamName and the (Decompressed if applicable) Snapshot are passed to `hydrate` - // member x.QueryStreamNameAndSnapshot(query: IQueryable, selectBody: Expression>, - // hydrate: SnAndSnap -> 'M, - // [] ?logLevel): Query = - // let logLevel = defaultArg logLevel queryLogLevel - // query.Select(Index.projectStreamNameAndSnapshot selectBody) - // |> Index.createSnAndSnapshotQuery x.Log container categoryName logLevel hydrate + /// Query the items, grabbing the Stream name and the Snapshot; The StreamName and the (Decompressed if applicable) Snapshot are passed to `hydrate` + member x.QueryStreamNameAndSnapshot(query: IQueryable>, selectBody: Expression, 'I>>, + hydrate: SnAndSnap -> 'M, + [] ?logLevel): Query, 'M> = + let logLevel = defaultArg logLevel queryLogLevel + query.Select(Index.projectStreamNameAndSnapshot<'I> selectBody) + |> Index.createSnAndSnapshotQuery x.Log container categoryName logLevel hydrate diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 63bb3a9ab..e8972cd52 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -306,10 +306,8 @@ module CosmosInit = module CosmosStats = - type Microsoft.Azure.Cosmos.Container with // NB DO NOT CONSIDER PROMULGATING THIS HACK - member container.QueryValue<'T>(sqlQuery : string) = task { - let! (res: Microsoft.Azure.Cosmos.FeedResponse<'T>) = container.GetItemQueryIterator<'T>(sqlQuery).ReadNextAsync() - return res |> Seq.exactlyOne } + open Equinox.CosmosStore.Linq.Internal + open FSharp.Control let run (log : ILogger, _verboseConsole, _maybeSeq) (p : ParseResults) = match p.GetSubCommand() with | StatsParameters.Cosmos sp -> @@ -329,8 +327,9 @@ module CosmosStats = let render = if log.IsEnabled LogEventLevel.Debug then snd else fst log.Information("Computing {measures} ({mode})", Seq.map render ops, (if inParallel then "in parallel" else "serially")) ops |> Seq.map (fun (name, sql) -> async { - log.Debug("Running query: {sql}", sql) - let res = container.QueryValue(sql) |> Async.AwaitTaskCorrect |> Async.RunSynchronously + let! res = Microsoft.Azure.Cosmos.QueryDefinition sql + |> container.GetItemQueryIterator + |> Query.enum_ log container "Stat" null LogEventLevel.Debug |> TaskSeq.head |> Async.AwaitTaskCorrect match name with | "Oldest" | "Newest" -> log.Information("{stat,-10}: {result,13} ({d:u})", name, res, DateTime.UnixEpoch.AddSeconds(float res)) | _ -> log.Information("{stat,-10}: {result,13:N0}", name, res) }) @@ -355,8 +354,8 @@ let prettySerdes = lazy FsCodec.SystemTextJson.Serdes(FsCodec.SystemTextJson.Opt module CosmosQuery = - open FSharp.Control open Equinox.CosmosStore.Linq.Internal + open FSharp.Control let inline miB x = Equinox.CosmosStore.Linq.Internal.miB x type System.Text.Json.JsonElement with member x.Utf8ByteCount = if x.ValueKind = System.Text.Json.JsonValueKind.Null then 0 else x.GetRawText() |> System.Text.Encoding.UTF8.GetByteCount @@ -405,14 +404,14 @@ module CosmosQuery = let container = match storeConfig with Store.Config.Cosmos (cc, _, _) -> cc.Container | _ -> failwith "Query requires Cosmos" let pageStreams, accStreams = System.Collections.Generic.HashSet(), System.Collections.Generic.HashSet() let mutable accI, accE, accU, accRus, accBytesRead = 0L, 0L, 0L, 0., 0L - try for rtt, rc, items, rdc, rds, ods in container.GetItemQueryIterator(queryDef a, requestOptions = qo) |> Query.enum_ do + try for rtt, rc, items, rdc, rds, ods in container.GetItemQueryIterator(queryDef a, requestOptions = qo) |> Query.enum__ do let newestAge = items |> Seq.choose _.Timestamp |> Seq.tryLast |> Option.map (fun ts -> ts - DateTime.UtcNow) let items = [| for x in items -> x.Cast() |] let inline arrayLen x = if isNull x then 0 else Array.length x pageStreams.Clear(); for x in items do if x.p <> null && pageStreams.Add x.p then accStreams.Add x.p |> ignore let pageI, pageE, pageU = items.Length, items |> Seq.sumBy (_.e >> arrayLen), items |> Seq.sumBy (_.u >> arrayLen) - Log.Information("Page {rdc}>{count}i {streams}s {es}e {us}u {rc:f2} RU {s:N1}s {rds:f2}>{ods:f2} MiB age {age:dddd\.hh\:mm\:ss}", - rdc, pageI, pageStreams.Count, pageE, pageU, rc, rtt, miB rds, miB ods, Option.toNullable newestAge) + Log.Information("Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2} MiB{rc,7:f2} RU{s,5:N1} s age {age:dddd\.hh\:mm\:ss}", + rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, rc, rtt.TotalSeconds, Option.toNullable newestAge) maybeFileStream |> Option.iter (fun stream -> for x in items do serdes.SerializeToStream(x, stream) @@ -426,8 +425,8 @@ module CosmosQuery = maybeFileStream |> Option.iter _.Close() // Before we log so time includes flush time and no confusion let categoryName = FsCodec.StreamName.parse >> FsCodec.StreamName.split >> fun struct (cn, _sid) -> cn let accCategories = accStreams |> Seq.map categoryName |> Seq.distinct |> Seq.length - Log.Information("TOTALS {cats}c {streams:N0}s {count:N0}i {es:N0}e {us:N0}u {ru:N2}RU R/W {rmib:N1}/{wmib:N1}MiB {s:N1}s", - accCategories, accStreams.Count, accI, accE, accU, accRus, miB accBytesRead, miB fileSize, sw.Elapsed.TotalSeconds) } + Log.Information("TOTALS {cats}c {streams:N0}s {count:N0}i {es:N0}e {us:N0}u R/W {rmib:N1}/{wmib:N1}MiB {ru:N2}RU {s:N1}s", + accCategories, accStreams.Count, accI, accE, accU, miB accBytesRead, miB fileSize, accRus, sw.Elapsed.TotalSeconds) } module DynamoInit = From ca1fc2756049be0e8a87b4f5cd6f15621e56ec7a Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Wed, 5 Jun 2024 14:11:59 +0100 Subject: [PATCH 27/56] feat: eqx top --- tools/Equinox.Tool/Program.fs | 141 ++++++++++++++++++++++++++++++---- 1 file changed, 128 insertions(+), 13 deletions(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index e8972cd52..bb0a0759a 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -30,6 +30,7 @@ type Parameters = | [] InitSql of ParseResults | [] Stats of ParseResults | [] Query of ParseResults + | [] Top of ParseResults interface IArgParserTemplate with member a.Usage = a |> function | Quiet -> "Omit timestamps from log output" @@ -44,6 +45,7 @@ type Parameters = | InitSql _ -> "Initialize Database Schema (supports `mssql`/`mysql`/`postgres` SqlStreamStore stores)." | Stats _ -> "inspect store to determine numbers of streams/documents/events and/or config (supports `cosmos` and `dynamo` stores)." | Query _ -> "Load/Summarise streams based on Cosmos SQL Queries (supports `cosmos` only)." + | Top _ -> "Scan to determine top categories and streams (supports `cosmos` only)." and [] InitParameters = | [] Rus of int | [] Autoscale @@ -168,9 +170,42 @@ and QueryArguments(p: ParseResults) = member x.Connect() = match Store.Cosmos.config Log.Logger (None, true) x.CosmosArgs with | Store.Config.Cosmos (cc, _, _) -> cc.Container | _ -> p.Raise "Query requires Cosmos" - member x.ConfigureStore(log: ILogger) = - let storeConfig = None, true - Store.Cosmos.config log storeConfig x.CosmosArgs +and [] TopParameters = + | [] StreamName of string + | [] CategoryName of string + | [] CategoryLike of string + | [] Streams + | [] Limit of int + | [] Cosmos of ParseResults + interface IArgParserTemplate with + member a.Usage = a |> function + | StreamName _ -> "Specify stream name to match against `p`, e.g. `$UserServices-f7c1ce63389a45bdbea1cccebb1b3c8a`." + | CategoryName _ -> "Specify category name to match against `p`, e.g. `$UserServices`." + | CategoryLike _ -> "Specify category name to match against `p` as a Cosmos LIKE expression (with `%` as wildcard, e.g. `$UserServices-%`." + | Streams -> "Stream level stats" + | Limit _ -> "Number of items to limit output to" + | Cosmos _ -> "Parameters for CosmosDB." +and TopArguments(p: ParseResults) = + member val Criteria = + match p.TryGetResult StreamName, p.TryGetResult CategoryName, p.TryGetResult CategoryLike with + | Some sn, None, None -> Criteria.SingleStream sn + | Some _, Some _, _ + | Some _, _, Some _ -> p.Raise "StreamName and CategoryLike/CategoryName mutually exclusive" + | None, Some cn, None -> Criteria.CatName cn + | None, None, Some cl -> Criteria.CatLike cl + | None, None, None -> Criteria.Unfiltered + | None, Some _, Some _ -> p.Raise "CategoryLike and CategoryName are mutually exclusive" + member val CosmosArgs = p.GetResult TopParameters.Cosmos |> Store.Cosmos.Arguments + member val StreamLevel = p.Contains TopParameters.Streams + member val Count = p.GetResult(TopParameters.Limit, 100) + member x.Connect() = match Store.Cosmos.config Log.Logger (None, true) x.CosmosArgs with + | Store.Config.Cosmos (cc, _, _) -> cc.Container + | _ -> failwith "Top requires Cosmos" + member x.Execute(sql) = let container = x.Connect() + let qd = Microsoft.Azure.Cosmos.QueryDefinition sql + let qo = Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = x.CosmosArgs.QueryMaxItems) + container.GetItemQueryIterator(qd, requestOptions = qo) + and [] DumpParameters = | [] Stream of FsCodec.StreamName | [] Correlation @@ -400,18 +435,20 @@ module CosmosQuery = let maybeFileStream = a.Filepath |> Option.map (fun p -> Log.Information("Dumping {mode} content to {path}", a.Mode, System.IO.FileInfo(p).FullName) System.IO.File.Create p) // Silently truncate if it exists, makes sense for typical usage - let storeConfig, qo = a.ConfigureStore(Log.Logger), Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = a.CosmosArgs.QueryMaxItems) - let container = match storeConfig with Store.Config.Cosmos (cc, _, _) -> cc.Container | _ -> failwith "Query requires Cosmos" + let qo = Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = a.CosmosArgs.QueryMaxItems) + let container = a.Connect() let pageStreams, accStreams = System.Collections.Generic.HashSet(), System.Collections.Generic.HashSet() let mutable accI, accE, accU, accRus, accBytesRead = 0L, 0L, 0L, 0., 0L - try for rtt, rc, items, rdc, rds, ods in container.GetItemQueryIterator(queryDef a, requestOptions = qo) |> Query.enum__ do - let newestAge = items |> Seq.choose _.Timestamp |> Seq.tryLast |> Option.map (fun ts -> ts - DateTime.UtcNow) - let items = [| for x in items -> x.Cast() |] + let it = container.GetItemQueryIterator(queryDef a, requestOptions = qo) + try for rtt, rc, items, rdc, rds, ods in it |> Query.enum__ do + let mutable newestTs = None + let items = [| for x in items -> newestTs <- max newestTs x.Timestamp + x.Cast() |] let inline arrayLen x = if isNull x then 0 else Array.length x pageStreams.Clear(); for x in items do if x.p <> null && pageStreams.Add x.p then accStreams.Add x.p |> ignore let pageI, pageE, pageU = items.Length, items |> Seq.sumBy (_.e >> arrayLen), items |> Seq.sumBy (_.u >> arrayLen) - Log.Information("Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2} MiB{rc,7:f2} RU{s,5:N1} s age {age:dddd\.hh\:mm\:ss}", - rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, rc, rtt.TotalSeconds, Option.toNullable newestAge) + Log.Information("Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}MiB{rc,7:f2}RU{s,5:N1}s age {age:dddd\.hh\:mm\:ss}", + rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, rc, rtt.TotalSeconds, newestTs.Value - DateTime.UtcNow) maybeFileStream |> Option.iter (fun stream -> for x in items do serdes.SerializeToStream(x, stream) @@ -424,9 +461,86 @@ module CosmosQuery = let fileSize = maybeFileStream |> Option.map _.Position |> Option.defaultValue 0 maybeFileStream |> Option.iter _.Close() // Before we log so time includes flush time and no confusion let categoryName = FsCodec.StreamName.parse >> FsCodec.StreamName.split >> fun struct (cn, _sid) -> cn - let accCategories = accStreams |> Seq.map categoryName |> Seq.distinct |> Seq.length - Log.Information("TOTALS {cats}c {streams:N0}s {count:N0}i {es:N0}e {us:N0}u R/W {rmib:N1}/{wmib:N1}MiB {ru:N2}RU {s:N1}s", - accCategories, accStreams.Count, accI, accE, accU, miB accBytesRead, miB fileSize, accRus, sw.Elapsed.TotalSeconds) } + let accCategories = System.Collections.Generic.HashSet(accStreams |> Seq.map categoryName).Count + Log.Information("TOTALS {count:N0}i {cats}c {streams:N0}s {es:N0}e {us:N0}u R/W {rmib:N1}/{wmib:N1}MiB {ru:N2}RU {s:N1}s", + accI, accCategories, accStreams.Count, accE, accU, miB accBytesRead, miB fileSize, accRus, sw.Elapsed.TotalSeconds) } + +module CosmosTop = + + open Equinox.CosmosStore.Linq.Internal + open FSharp.Control + + let cosmosTimeStamp (x: System.Text.Json.JsonElement) = x.GetProperty("_ts").GetDouble() |> DateTime.UnixEpoch.AddSeconds + let tryParseEquinoxBatch (x: System.Text.Json.JsonElement) = + let tryProp (id: string): ValueOption = + let mutable p = Unchecked.defaultof<_> + if x.TryGetProperty(id, &p) then ValueSome p else ValueNone + match tryProp "p" with + | ValueSome (je: System.Text.Json.JsonElement) when je.ValueKind = System.Text.Json.JsonValueKind.String -> + ValueSome struct (je.GetString() |> FsCodec.StreamName.parse, tryProp "e", tryProp "u") + | _ -> ValueNone + + let private composeSql (a: TopArguments) = + let partitionKeyCriteria = + match a.Criteria with + | Criteria.SingleStream sn -> $"c.p = \"{sn}\"" + | Criteria.CatName n -> $"c.p LIKE \"{n}-%%\"" + | Criteria.CatLike pat -> $"c.p LIKE \"{pat}\"" + | Criteria.Unfiltered -> "1=1" + $"SELECT * FROM c WHERE {partitionKeyCriteria}" + let arrayLen = function ValueNone -> 0 | ValueSome (x: System.Text.Json.JsonElement) -> x.GetArrayLength() + let scratch = new System.IO.MemoryStream() + let utf8Size (x: System.Text.Json.JsonElement) = + scratch.Position <- 0L + System.Text.Json.JsonSerializer.Serialize(scratch, x) + scratch.Position + [] + type Stat = + { key: string; count: int; events: int; unfolds: int; bytes: int64; eBytes: int64; uBytes: int64 } + static member Create(key, d: System.Text.Json.JsonElement, e: System.Text.Json.JsonElement voption, u: System.Text.Json.JsonElement voption) = + let eb = match e with ValueSome x -> utf8Size x | ValueNone -> 0 + let ub = match u with ValueSome x -> utf8Size x | ValueNone -> 0 + { key = key; count = 1; events = arrayLen e; unfolds = arrayLen u + bytes = utf8Size d; eBytes = eb; uBytes = ub } + member x.Merge y = + { key = x.key; count = x.count + y.count; events = x.events + y.events; unfolds = x.unfolds + y.unfolds + bytes = x.bytes + y.bytes; eBytes = x.eBytes + y.eBytes; uBytes = x.uBytes + y.uBytes } + override x.GetHashCode() = StringComparer.Ordinal.GetHashCode x.key + override x.Equals y = match y with :? Stat as y -> StringComparer.Ordinal.Equals(x.key, y.key) | _ -> false + let run (a: TopArguments) = task { + let sw = System.Diagnostics.Stopwatch.StartNew() + let pageStreams, accStreams = System.Collections.Generic.HashSet(), System.Collections.Generic.HashSet() + let mutable accI, accE, accU, accRus, accRds, accOds, accBytes = 0L, 0L, 0L, 0., 0L, 0L, 0L + let s = System.Collections.Generic.HashSet() + let categoryName = FsCodec.StreamName.split >> fun struct (cn, _sid) -> cn + let g = if a.StreamLevel then FsCodec.StreamName.toString else categoryName + try for rtt, rc, items, rdc, rds, ods in a.Execute(composeSql a) |> Query.enum__ do + let mutable pageI, pageE, pageU, pageB, newestTs = 0, 0, 0, 0L, DateTime.MinValue + for x in items do + newestTs <- max newestTs (cosmosTimeStamp x) + match tryParseEquinoxBatch x with + | ValueNone -> failwith $"Could not parse document:\n{prettySerdes.Value.Serialize x}" + | ValueSome (sn, e, u) -> + if pageStreams.Add sn then accStreams.Add sn |> ignore + let x = Stat.Create(g sn, x, e, u) + let mutable v = Unchecked.defaultof<_> + if s.TryGetValue(x, &v) then s.Remove x |> ignore; s.Add(v.Merge x) |> ignore + else s.Add x |> ignore + pageI <- pageI + 1; pageE <- pageE + x.events; pageU <- pageU + x.unfolds; pageB <- pageB + x.bytes + Log.Information("Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}>{jds,4:f2}MiB{rc,7:f2}RU{s,5:N1}s age {age:dddd\.hh\:mm\:ss}", + rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, miB pageB, rc, rtt.TotalSeconds, newestTs - DateTime.UtcNow) + pageStreams.Clear() + accI <- accI + int64 pageI; accE <- accE + int64 pageE; accU <- accU + int64 pageU + accRus <- accRus + rc; accRds <- accRds + int64 rds; accOds <- accOds + int64 ods; accBytes <- accBytes + pageB + finally + + let accCats = System.Collections.Generic.HashSet(accStreams |> Seq.map categoryName).Count + Log.Information("TOTALS {count:N0}i {cats}c {streams:N0}s {es:N0}e {us:N0}u {tmib:N1}MiB Read {rmib:N1}>{omib:N1} {ru:N2}RU {s:N1}s", + accI, accCats, accStreams.Count, accE, accU, miB accBytes, miB accRds, miB accOds, accRus, sw.Elapsed.TotalSeconds) + + for x in s |> Seq.sortByDescending _.bytes |> Seq.truncate a.Count do + Log.Information("{key,-20}:{count,7}i {mib,6:N1}MiB E{events,7} {emib,7:N1} U{unfolds,7} {umib,6:N1}", + x.key, x.count, miB x.bytes, x.events, miB x.eBytes, x.unfolds, miB x.uBytes) } module DynamoInit = @@ -542,6 +656,7 @@ type Arguments(p: ParseResults) = | InitSql a -> do! SqlInit.databaseOrSchema Log.Logger a | Dump a -> do! Dump.run (Log.Logger, verboseConsole, maybeSeq) a | Query a -> do! CosmosQuery.run (QueryArguments a) |> Async.AwaitTaskCorrect + | Top a -> do! CosmosTop.run (TopArguments a) |> Async.AwaitTaskCorrect | Stats a -> do! CosmosStats.run (Log.Logger, verboseConsole, maybeSeq) a | LoadTest a -> let n = p.GetResult(LogFile, fun () -> p.ProgramName + ".log") let reportFilename = System.IO.FileInfo(n).FullName From 373d089599eb5ae985f341f0d2c295ee540d49f1 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Fri, 7 Jun 2024 16:17:02 +0100 Subject: [PATCH 28/56] Sorting, C+C, D+M, unsorted --- .../CosmosStoreSerialization.fs | 0 tools/Equinox.Tool/Program.fs | 143 +++++++++++------- 2 files changed, 92 insertions(+), 51 deletions(-) create mode 100644 src/Equinox.CosmosStore/CosmosStoreSerialization.fs diff --git a/src/Equinox.CosmosStore/CosmosStoreSerialization.fs b/src/Equinox.CosmosStore/CosmosStoreSerialization.fs new file mode 100644 index 000000000..e69de29bb diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index bb0a0759a..61358ca0a 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -175,7 +175,9 @@ and [] TopParameters = | [] CategoryName of string | [] CategoryLike of string | [] Streams - | [] Limit of int + | [] TsOrder + | [] Limit of int + | [] Sort of Order | [] Cosmos of ParseResults interface IArgParserTemplate with member a.Usage = a |> function @@ -183,8 +185,11 @@ and [] TopParameters = | CategoryName _ -> "Specify category name to match against `p`, e.g. `$UserServices`." | CategoryLike _ -> "Specify category name to match against `p` as a Cosmos LIKE expression (with `%` as wildcard, e.g. `$UserServices-%`." | Streams -> "Stream level stats" - | Limit _ -> "Number of items to limit output to" + | TsOrder -> "Retrieve data in `_ts` ORDER (generally has significant RU impact). Default: Use continuation tokens" + | Sort _ -> "Sort order for results" + | Limit _ -> "Number of categories to limit output to (Streams limit is 10x the category limit). Default: 100" | Cosmos _ -> "Parameters for CosmosDB." +and Order = Name | Items | Events | Unfolds | Size | EventSize | UnfoldSize | InflateSize | CorrCauseSize and TopArguments(p: ParseResults) = member val Criteria = match p.TryGetResult StreamName, p.TryGetResult CategoryName, p.TryGetResult CategoryLike with @@ -196,8 +201,11 @@ and TopArguments(p: ParseResults) = | None, None, None -> Criteria.Unfiltered | None, Some _, Some _ -> p.Raise "CategoryLike and CategoryName are mutually exclusive" member val CosmosArgs = p.GetResult TopParameters.Cosmos |> Store.Cosmos.Arguments - member val StreamLevel = p.Contains TopParameters.Streams - member val Count = p.GetResult(TopParameters.Limit, 100) + member val StreamLevel = p.Contains Streams + member val Count = p.GetResult(Limit, 100) + member val TsOrder = p.Contains TsOrder + member val Order = p.GetResult(Sort, Order.Size) + member x.StreamCount = p.GetResult(Limit, x.Count * 10) member x.Connect() = match Store.Cosmos.config Log.Logger (None, true) x.CosmosArgs with | Store.Config.Cosmos (cc, _, _) -> cc.Container | _ -> failwith "Top requires Cosmos" @@ -424,7 +432,7 @@ module CosmosQuery = | [||] -> "1=1" | [| x |] -> x |> exists | xs -> String.Join(" AND ", xs) |> exists - $"SELECT {selectedFields} FROM c WHERE {partitionKeyCriteria} AND {unfoldFilter} ORDER BY c.i" + $"SELECT {selectedFields} FROM c WHERE {partitionKeyCriteria} AND {unfoldFilter}" let private queryDef (a: QueryArguments) = let sql = composeSql a Log.Information("Querying {mode}: {q}", a.Mode, sql) @@ -469,17 +477,52 @@ module CosmosTop = open Equinox.CosmosStore.Linq.Internal open FSharp.Control - - let cosmosTimeStamp (x: System.Text.Json.JsonElement) = x.GetProperty("_ts").GetDouble() |> DateTime.UnixEpoch.AddSeconds - let tryParseEquinoxBatch (x: System.Text.Json.JsonElement) = - let tryProp (id: string): ValueOption = + open System.Text.Json + module private Parser = + let scratch = new System.IO.MemoryStream() + let inline utf8Size (x: JsonElement) = + scratch.Position <- 0L + JsonSerializer.Serialize(scratch, x) + scratch.Position + let inline inflatedUtf8Size x = + scratch.Position <- 0L + if Equinox.CosmosStore.Core.JsonElement.tryInflateTo scratch x then scratch.Position + else utf8Size x + let inline tryProp (x: JsonElement) (id: string): ValueOption = let mutable p = Unchecked.defaultof<_> if x.TryGetProperty(id, &p) then ValueSome p else ValueNone - match tryProp "p" with - | ValueSome (je: System.Text.Json.JsonElement) when je.ValueKind = System.Text.Json.JsonValueKind.String -> - ValueSome struct (je.GetString() |> FsCodec.StreamName.parse, tryProp "e", tryProp "u") - | _ -> ValueNone - + // using the length as a decent proxy for UTF-8 length of corr/causation; if you have messy data in there, you'll have bigger problems to worry about + let inline stringLen x = match x with ValueSome (x: JsonElement) when x.ValueKind <> JsonValueKind.Null -> x.GetString().Length | _ -> 0 + let _e = Unchecked.defaultof // Or Unfold - both share field names + let inline ciSize (x: JsonElement) = + (struct (0, 0L), x.EnumerateArray()) + ||> Seq.fold (fun struct (c, i) x -> + let inline infSize x = match x with ValueSome x -> inflatedUtf8Size x | ValueNone -> 0 + struct (c + (tryProp x (nameof _e.correlationId) |> stringLen) + (tryProp x (nameof _e.causationId) |> stringLen), + i + (tryProp x (nameof _e.d) |> infSize) + (tryProp x (nameof _e.m) |> infSize))) + let _t = Unchecked.defaultof + let inline tryEquinoxStreamName x = + match tryProp x (nameof _t.p) with + | ValueSome (je: JsonElement) when je.ValueKind = JsonValueKind.String -> + je.GetString() |> FsCodec.StreamName.parse |> FsCodec.StreamName.toString |> ValueSome + | _ -> ValueNone + let private tryParseEventOrUnfold = function + | ValueNone -> struct (0, 0L, struct (0, 0L)) + | ValueSome (x: JsonElement) -> x.GetArrayLength(), utf8Size x, ciSize x + [] + type Stat = + { key: string; count: int; events: int; unfolds: int; bytes: int64; eBytes: int64; uBytes: int64; cBytes: int64; iBytes: int64 } + member x.Merge y = + { key = x.key; count = x.count + y.count; events = x.events + y.events; unfolds = x.unfolds + y.unfolds; bytes = x.bytes + y.bytes + eBytes = x.eBytes + y.eBytes; uBytes = x.uBytes + y.uBytes; cBytes = x.cBytes + y.cBytes; iBytes = x.iBytes + y.iBytes } + override x.GetHashCode() = StringComparer.Ordinal.GetHashCode x.key + override x.Equals y = match y with :? Stat as y -> StringComparer.Ordinal.Equals(x.key, y.key) | _ -> false + static Create(key, x: JsonElement) = + let struct (e, eb, struct (ec, ei)) = tryProp x (nameof _t.e) |> tryParseEventOrUnfold + let struct (u, ub, struct (uc, ui)) = tryProp x (nameof _t.u) |> tryParseEventOrUnfold + { key = key; count = 1; events = e; unfolds = u + bytes = utf8Size x; eBytes = eb; uBytes = ub; cBytes = int64 (ec + uc); iBytes = ei + ui } + let [] OrderByTs = " ORDER BY c._ts" let private composeSql (a: TopArguments) = let partitionKeyCriteria = match a.Criteria with @@ -487,60 +530,58 @@ module CosmosTop = | Criteria.CatName n -> $"c.p LIKE \"{n}-%%\"" | Criteria.CatLike pat -> $"c.p LIKE \"{pat}\"" | Criteria.Unfiltered -> "1=1" - $"SELECT * FROM c WHERE {partitionKeyCriteria}" - let arrayLen = function ValueNone -> 0 | ValueSome (x: System.Text.Json.JsonElement) -> x.GetArrayLength() - let scratch = new System.IO.MemoryStream() - let utf8Size (x: System.Text.Json.JsonElement) = - scratch.Position <- 0L - System.Text.Json.JsonSerializer.Serialize(scratch, x) - scratch.Position - [] - type Stat = - { key: string; count: int; events: int; unfolds: int; bytes: int64; eBytes: int64; uBytes: int64 } - static member Create(key, d: System.Text.Json.JsonElement, e: System.Text.Json.JsonElement voption, u: System.Text.Json.JsonElement voption) = - let eb = match e with ValueSome x -> utf8Size x | ValueNone -> 0 - let ub = match u with ValueSome x -> utf8Size x | ValueNone -> 0 - { key = key; count = 1; events = arrayLen e; unfolds = arrayLen u - bytes = utf8Size d; eBytes = eb; uBytes = ub } - member x.Merge y = - { key = x.key; count = x.count + y.count; events = x.events + y.events; unfolds = x.unfolds + y.unfolds - bytes = x.bytes + y.bytes; eBytes = x.eBytes + y.eBytes; uBytes = x.uBytes + y.uBytes } - override x.GetHashCode() = StringComparer.Ordinal.GetHashCode x.key - override x.Equals y = match y with :? Stat as y -> StringComparer.Ordinal.Equals(x.key, y.key) | _ -> false + $"SELECT * FROM c WHERE {partitionKeyCriteria}{if a.TsOrder then OrderByTs else null}" + let inline cosmosTimeStamp (x: JsonElement) = x.GetProperty("_ts").GetDouble() |> DateTime.UnixEpoch.AddSeconds let run (a: TopArguments) = task { let sw = System.Diagnostics.Stopwatch.StartNew() let pageStreams, accStreams = System.Collections.Generic.HashSet(), System.Collections.Generic.HashSet() let mutable accI, accE, accU, accRus, accRds, accOds, accBytes = 0L, 0L, 0L, 0., 0L, 0L, 0L let s = System.Collections.Generic.HashSet() - let categoryName = FsCodec.StreamName.split >> fun struct (cn, _sid) -> cn - let g = if a.StreamLevel then FsCodec.StreamName.toString else categoryName + let categoryName = FsCodec.StreamName.Internal.trust >> FsCodec.StreamName.split >> fun struct (cn, _sid) -> cn + let group = if a.StreamLevel then id else categoryName try for rtt, rc, items, rdc, rds, ods in a.Execute(composeSql a) |> Query.enum__ do - let mutable pageI, pageE, pageU, pageB, newestTs = 0, 0, 0, 0L, DateTime.MinValue + let mutable pageI, pageE, pageU, pageB, pageCc, pageDm, newestTs, sw = 0, 0, 0, 0L, 0L, 0L, DateTime.MinValue, System.Diagnostics.Stopwatch.StartNew() for x in items do newestTs <- max newestTs (cosmosTimeStamp x) - match tryParseEquinoxBatch x with + match Parser.tryEquinoxStreamName x with | ValueNone -> failwith $"Could not parse document:\n{prettySerdes.Value.Serialize x}" - | ValueSome (sn, e, u) -> - if pageStreams.Add sn then accStreams.Add sn |> ignore - let x = Stat.Create(g sn, x, e, u) + | ValueSome sn -> + if pageStreams.Add sn && not a.StreamLevel then accStreams.Add sn |> ignore + let x = Parser.Stat.Create(group sn, x) let mutable v = Unchecked.defaultof<_> if s.TryGetValue(x, &v) then s.Remove x |> ignore; s.Add(v.Merge x) |> ignore else s.Add x |> ignore - pageI <- pageI + 1; pageE <- pageE + x.events; pageU <- pageU + x.unfolds; pageB <- pageB + x.bytes - Log.Information("Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}>{jds,4:f2}MiB{rc,7:f2}RU{s,5:N1}s age {age:dddd\.hh\:mm\:ss}", - rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, miB pageB, rc, rtt.TotalSeconds, newestTs - DateTime.UtcNow) + pageI <- pageI + 1; pageE <- pageE + x.events; pageU <- pageU + x.unfolds; pageB <- pageB + x.bytes; pageCc <- pageCc + x.cBytes; pageDm <- pageDm + x.iBytes + Log.Information("Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}>{jds,4:f2}MiB{rc,7:f2}RU{s,5:N1}s D+M{im,4:f1} C+C{cm,5:f2} {ms,3}ms age {age:dddd\.hh\:mm\:ss}", + rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, miB pageB, rc, rtt.TotalSeconds, miB pageDm, miB pageCc, sw.ElapsedMilliseconds, newestTs - DateTime.UtcNow) pageStreams.Clear() accI <- accI + int64 pageI; accE <- accE + int64 pageE; accU <- accU + int64 pageU accRus <- accRus + rc; accRds <- accRds + int64 rds; accOds <- accOds + int64 ods; accBytes <- accBytes + pageB finally - let accCats = System.Collections.Generic.HashSet(accStreams |> Seq.map categoryName).Count - Log.Information("TOTALS {count:N0}i {cats}c {streams:N0}s {es:N0}e {us:N0}u {tmib:N1}MiB Read {rmib:N1}>{omib:N1} {ru:N2}RU {s:N1}s", - accI, accCats, accStreams.Count, accE, accU, miB accBytes, miB accRds, miB accOds, accRus, sw.Elapsed.TotalSeconds) - - for x in s |> Seq.sortByDescending _.bytes |> Seq.truncate a.Count do - Log.Information("{key,-20}:{count,7}i {mib,6:N1}MiB E{events,7} {emib,7:N1} U{unfolds,7} {umib,6:N1}", - x.key, x.count, miB x.bytes, x.events, miB x.eBytes, x.unfolds, miB x.uBytes) } + let accCats = (if a.StreamLevel then s |> Seq.map _.key else accStreams) |> Seq.map group |> System.Collections.Generic.HashSet |> _.Count + let accStreams = if a.StreamLevel then s.Count else accStreams.Count + let iBytes, cBytes = s |> Seq.sumBy _.iBytes, s |> Seq.sumBy _.cBytes + let giB x = miB x / 1024. + Log.Information("TOTALS {count:N0}i {cats}c {streams:N0}s {es:N0}e {us:N0}u read {rg:f1}GiB output {og:f1}GiB JSON {tg:f1}GiB D+M(inflated) {ig:f1}GiB C+C {cm:f2}MiB {ru:N2}RU {s:N1}s", + accI, accCats, accStreams, accE, accU, giB accRds, giB accOds, giB accBytes, giB iBytes, miB cBytes, accRus, sw.Elapsed.TotalSeconds) + let sort: Parser.Stat seq -> Parser.Stat seq = a.Order |> function + | Order.Name -> Seq.sortBy _.key + | Order.Size -> Seq.sortByDescending _.bytes + | Order.Items -> Seq.sortByDescending _.count + | Order.Events -> Seq.sortByDescending _.events + | Order.Unfolds -> Seq.sortByDescending _.unfolds + | Order.EventSize -> Seq.sortByDescending _.eBytes + | Order.UnfoldSize -> Seq.sortByDescending _.uBytes + | Order.InflateSize -> Seq.sortByDescending _.iBytes + | Order.CorrCauseSize -> Seq.sortByDescending _.cBytes + let render (x: Parser.Stat) = + Log.Information("{count,7}i {tm,6:N2}MiB E{events,7} {em,7:N1} U{unfolds,7} {um,6:N1} D+M{dm,6:N1} C+C{cm,5:N1} {key}", + x.count, miB x.bytes, x.events, miB x.eBytes, x.unfolds, miB x.uBytes, miB x.iBytes, miB x.cBytes, x.key) + if a.StreamLevel then + let collapsed = s |> Seq.groupBy (_.key >> categoryName) |> Seq.map (fun (cat, xs) -> { (xs |> Seq.reduce _.Merge) with key = cat }) + sort collapsed |> Seq.truncate a.Count |> Seq.iter render + sort s |> Seq.truncate (if a.StreamLevel then a.StreamCount else a.Count) |> Seq.iter render } module DynamoInit = From 2ed3beab355d0bae3cdb3b210b6ac41cbf6ac6ae Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Sat, 8 Jun 2024 00:16:40 +0100 Subject: [PATCH 29/56] Tidy --- tools/Equinox.Tool/Program.fs | 85 +++++++++++++++++------------------ 1 file changed, 41 insertions(+), 44 deletions(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 61358ca0a..5a0ef96a3 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -149,7 +149,13 @@ and [] QueryParameters = | Console -> "Also emit the JSON to the console. Default: Gather statistics (but only write to a File if specified)" | Cosmos _ -> "Parameters for CosmosDB." and [] Mode = Default | SnapOnly | SnapWithStream | ReadOnly | ReadWithStream | Raw -and [] Criteria = SingleStream of string | CatName of string | CatLike of string | Unfiltered +and [] Criteria = + | SingleStream of string | CatName of string | CatLike of string | Unfiltered + member x.Sql = x |> function + | Criteria.SingleStream sn -> $"c.p = \"{sn}\"" + | Criteria.CatName n -> $"c.p LIKE \"{n}-%%\"" + | Criteria.CatLike pat -> $"c.p LIKE \"{pat}\"" + | Criteria.Unfiltered -> "1=1" and QueryArguments(p: ParseResults) = member val Mode = p.GetResult(QueryParameters.Mode, if p.Contains QueryParameters.File then Mode.Raw else Mode.Default) member val Pretty = p.Contains QueryParameters.Pretty @@ -395,6 +401,12 @@ module CosmosStats = let prettySerdes = lazy FsCodec.SystemTextJson.Serdes(FsCodec.SystemTextJson.Options.Create(indent = true)) +type System.Text.Json.JsonElement with + member x.Timestamp = x.GetProperty("_ts").GetDouble() |> DateTime.UnixEpoch.AddSeconds + member x.TryProp(name: string) = let mutable p = Unchecked.defaultof<_> in if x.TryGetProperty(name, &p) then ValueSome p else ValueNone +module StreamName = + let categoryName = FsCodec.StreamName.parse >> FsCodec.StreamName.split >> fun struct (cn, _sid) -> cn + module CosmosQuery = open Equinox.CosmosStore.Linq.Internal @@ -408,15 +420,11 @@ module CosmosQuery = let ok, p = x.RootElement.TryGetProperty("_ts") if ok then p.GetDouble() |> DateTime.UnixEpoch.AddSeconds |> Some else None let private composeSql (a: QueryArguments) = - let inline warnOnUnfiltered () = + match a.Criteria with + | Criteria.Unfiltered -> let lel = if a.Mode = Mode.Raw then LogEventLevel.Debug elif a.Filepath = None then LogEventLevel.Warning else LogEventLevel.Information Log.Write(lel, "No StreamName or CategoryName/CategoryLike specified - Unfold Criteria better be unambiguous") - let partitionKeyCriteria = - match a.Criteria with - | Criteria.SingleStream sn -> $"c.p = \"{sn}\"" - | Criteria.CatName n -> $"c.p LIKE \"{n}-%%\"" - | Criteria.CatLike pat -> $"c.p LIKE \"{pat}\"" - | Criteria.Unfiltered -> warnOnUnfiltered (); "1=1" + | _ -> () let selectedFields = match a.Mode with | Mode.Default -> "c._etag, c.p, c.u[0].d" @@ -432,7 +440,7 @@ module CosmosQuery = | [||] -> "1=1" | [| x |] -> x |> exists | xs -> String.Join(" AND ", xs) |> exists - $"SELECT {selectedFields} FROM c WHERE {partitionKeyCriteria} AND {unfoldFilter}" + $"SELECT {selectedFields} FROM c WHERE {a.Criteria.Sql} AND {unfoldFilter}" let private queryDef (a: QueryArguments) = let sql = composeSql a Log.Information("Querying {mode}: {q}", a.Mode, sql) @@ -449,14 +457,14 @@ module CosmosQuery = let mutable accI, accE, accU, accRus, accBytesRead = 0L, 0L, 0L, 0., 0L let it = container.GetItemQueryIterator(queryDef a, requestOptions = qo) try for rtt, rc, items, rdc, rds, ods in it |> Query.enum__ do - let mutable newestTs = None - let items = [| for x in items -> newestTs <- max newestTs x.Timestamp - x.Cast() |] + let mutable newestTs = DateTime.MinValue + let items = [| for x in items -> newestTs <- max newestTs x.RootElement.Timestamp + System.Text.Json.JsonSerializer.Deserialize(x.RootElement) |] let inline arrayLen x = if isNull x then 0 else Array.length x pageStreams.Clear(); for x in items do if x.p <> null && pageStreams.Add x.p then accStreams.Add x.p |> ignore let pageI, pageE, pageU = items.Length, items |> Seq.sumBy (_.e >> arrayLen), items |> Seq.sumBy (_.u >> arrayLen) Log.Information("Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}MiB{rc,7:f2}RU{s,5:N1}s age {age:dddd\.hh\:mm\:ss}", - rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, rc, rtt.TotalSeconds, newestTs.Value - DateTime.UtcNow) + rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, rc, rtt.TotalSeconds, DateTime.UtcNow - newestTs) maybeFileStream |> Option.iter (fun stream -> for x in items do serdes.SerializeToStream(x, stream) @@ -468,8 +476,7 @@ module CosmosQuery = finally let fileSize = maybeFileStream |> Option.map _.Position |> Option.defaultValue 0 maybeFileStream |> Option.iter _.Close() // Before we log so time includes flush time and no confusion - let categoryName = FsCodec.StreamName.parse >> FsCodec.StreamName.split >> fun struct (cn, _sid) -> cn - let accCategories = System.Collections.Generic.HashSet(accStreams |> Seq.map categoryName).Count + let accCategories = System.Collections.Generic.HashSet(accStreams |> Seq.map StreamName.categoryName).Count Log.Information("TOTALS {count:N0}i {cats}c {streams:N0}s {es:N0}e {us:N0}u R/W {rmib:N1}/{wmib:N1}MiB {ru:N2}RU {s:N1}s", accI, accCategories, accStreams.Count, accE, accU, miB accBytesRead, miB fileSize, accRus, sw.Elapsed.TotalSeconds) } @@ -478,6 +485,7 @@ module CosmosTop = open Equinox.CosmosStore.Linq.Internal open FSharp.Control open System.Text.Json + module private Parser = let scratch = new System.IO.MemoryStream() let inline utf8Size (x: JsonElement) = @@ -488,9 +496,6 @@ module CosmosTop = scratch.Position <- 0L if Equinox.CosmosStore.Core.JsonElement.tryInflateTo scratch x then scratch.Position else utf8Size x - let inline tryProp (x: JsonElement) (id: string): ValueOption = - let mutable p = Unchecked.defaultof<_> - if x.TryGetProperty(id, &p) then ValueSome p else ValueNone // using the length as a decent proxy for UTF-8 length of corr/causation; if you have messy data in there, you'll have bigger problems to worry about let inline stringLen x = match x with ValueSome (x: JsonElement) when x.ValueKind <> JsonValueKind.Null -> x.GetString().Length | _ -> 0 let _e = Unchecked.defaultof // Or Unfold - both share field names @@ -498,11 +503,11 @@ module CosmosTop = (struct (0, 0L), x.EnumerateArray()) ||> Seq.fold (fun struct (c, i) x -> let inline infSize x = match x with ValueSome x -> inflatedUtf8Size x | ValueNone -> 0 - struct (c + (tryProp x (nameof _e.correlationId) |> stringLen) + (tryProp x (nameof _e.causationId) |> stringLen), - i + (tryProp x (nameof _e.d) |> infSize) + (tryProp x (nameof _e.m) |> infSize))) + struct (c + (x.TryProp(nameof _e.correlationId) |> stringLen) + (x.TryProp(nameof _e.causationId) |> stringLen), + i + (x.TryProp(nameof _e.d) |> infSize) + (x.TryProp(nameof _e.m) |> infSize))) let _t = Unchecked.defaultof - let inline tryEquinoxStreamName x = - match tryProp x (nameof _t.p) with + let inline tryEquinoxStreamName (x: JsonElement) = + match x.TryProp(nameof _t.p) with | ValueSome (je: JsonElement) when je.ValueKind = JsonValueKind.String -> je.GetString() |> FsCodec.StreamName.parse |> FsCodec.StreamName.toString |> ValueSome | _ -> ValueNone @@ -518,42 +523,33 @@ module CosmosTop = override x.GetHashCode() = StringComparer.Ordinal.GetHashCode x.key override x.Equals y = match y with :? Stat as y -> StringComparer.Ordinal.Equals(x.key, y.key) | _ -> false static Create(key, x: JsonElement) = - let struct (e, eb, struct (ec, ei)) = tryProp x (nameof _t.e) |> tryParseEventOrUnfold - let struct (u, ub, struct (uc, ui)) = tryProp x (nameof _t.u) |> tryParseEventOrUnfold + let struct (e, eb, struct (ec, ei)) = x.TryProp(nameof _t.e) |> tryParseEventOrUnfold + let struct (u, ub, struct (uc, ui)) = x.TryProp(nameof _t.u) |> tryParseEventOrUnfold { key = key; count = 1; events = e; unfolds = u bytes = utf8Size x; eBytes = eb; uBytes = ub; cBytes = int64 (ec + uc); iBytes = ei + ui } let [] OrderByTs = " ORDER BY c._ts" - let private composeSql (a: TopArguments) = - let partitionKeyCriteria = - match a.Criteria with - | Criteria.SingleStream sn -> $"c.p = \"{sn}\"" - | Criteria.CatName n -> $"c.p LIKE \"{n}-%%\"" - | Criteria.CatLike pat -> $"c.p LIKE \"{pat}\"" - | Criteria.Unfiltered -> "1=1" - $"SELECT * FROM c WHERE {partitionKeyCriteria}{if a.TsOrder then OrderByTs else null}" - let inline cosmosTimeStamp (x: JsonElement) = x.GetProperty("_ts").GetDouble() |> DateTime.UnixEpoch.AddSeconds + let private sql (a: TopArguments) = $"SELECT * FROM c WHERE {a.Criteria.Sql}{if a.TsOrder then OrderByTs else null}" let run (a: TopArguments) = task { let sw = System.Diagnostics.Stopwatch.StartNew() let pageStreams, accStreams = System.Collections.Generic.HashSet(), System.Collections.Generic.HashSet() let mutable accI, accE, accU, accRus, accRds, accOds, accBytes = 0L, 0L, 0L, 0., 0L, 0L, 0L let s = System.Collections.Generic.HashSet() - let categoryName = FsCodec.StreamName.Internal.trust >> FsCodec.StreamName.split >> fun struct (cn, _sid) -> cn - let group = if a.StreamLevel then id else categoryName - try for rtt, rc, items, rdc, rds, ods in a.Execute(composeSql a) |> Query.enum__ do + let group = if a.StreamLevel then id else StreamName.categoryName + try for rtt, rc, items, rdc, rds, ods in a.Execute(sql a) |> Query.enum__ do let mutable pageI, pageE, pageU, pageB, pageCc, pageDm, newestTs, sw = 0, 0, 0, 0L, 0L, 0L, DateTime.MinValue, System.Diagnostics.Stopwatch.StartNew() for x in items do - newestTs <- max newestTs (cosmosTimeStamp x) + newestTs <- max newestTs x.Timestamp match Parser.tryEquinoxStreamName x with | ValueNone -> failwith $"Could not parse document:\n{prettySerdes.Value.Serialize x}" | ValueSome sn -> if pageStreams.Add sn && not a.StreamLevel then accStreams.Add sn |> ignore let x = Parser.Stat.Create(group sn, x) let mutable v = Unchecked.defaultof<_> - if s.TryGetValue(x, &v) then s.Remove x |> ignore; s.Add(v.Merge x) |> ignore - else s.Add x |> ignore - pageI <- pageI + 1; pageE <- pageE + x.events; pageU <- pageU + x.unfolds; pageB <- pageB + x.bytes; pageCc <- pageCc + x.cBytes; pageDm <- pageDm + x.iBytes - Log.Information("Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}>{jds,4:f2}MiB{rc,7:f2}RU{s,5:N1}s D+M{im,4:f1} C+C{cm,5:f2} {ms,3}ms age {age:dddd\.hh\:mm\:ss}", - rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, miB pageB, rc, rtt.TotalSeconds, miB pageDm, miB pageCc, sw.ElapsedMilliseconds, newestTs - DateTime.UtcNow) + s.Add(if s.TryGetValue(x, &v) then s.Remove x |> ignore; v.Merge x else x) |> ignore + pageI <- pageI + 1; pageE <- pageE + x.events; pageU <- pageU + x.unfolds + pageB <- pageB + x.bytes; pageCc <- pageCc + x.cBytes; pageDm <- pageDm + x.iBytes + Log.Information("Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}<{jds,4:f2}MiB{rc,7:f2}RU{s,5:N1}s D+M{im,4:f1} C+C{cm,5:f2} {ms,3}ms age {age:dddd\.hh\:mm\:ss}", + rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, miB pageB, rc, rtt.TotalSeconds, miB pageDm, miB pageCc, sw.ElapsedMilliseconds, DateTime.UtcNow - newestTs) pageStreams.Clear() accI <- accI + int64 pageI; accE <- accE + int64 pageE; accU <- accU + int64 pageU accRus <- accRus + rc; accRds <- accRds + int64 rds; accOds <- accOds + int64 ods; accBytes <- accBytes + pageB @@ -562,9 +558,10 @@ module CosmosTop = let accCats = (if a.StreamLevel then s |> Seq.map _.key else accStreams) |> Seq.map group |> System.Collections.Generic.HashSet |> _.Count let accStreams = if a.StreamLevel then s.Count else accStreams.Count let iBytes, cBytes = s |> Seq.sumBy _.iBytes, s |> Seq.sumBy _.cBytes - let giB x = miB x / 1024. + let inline giB x = miB x / 1024. Log.Information("TOTALS {count:N0}i {cats}c {streams:N0}s {es:N0}e {us:N0}u read {rg:f1}GiB output {og:f1}GiB JSON {tg:f1}GiB D+M(inflated) {ig:f1}GiB C+C {cm:f2}MiB {ru:N2}RU {s:N1}s", accI, accCats, accStreams, accE, accU, giB accRds, giB accOds, giB accBytes, giB iBytes, miB cBytes, accRus, sw.Elapsed.TotalSeconds) + let sort: Parser.Stat seq -> Parser.Stat seq = a.Order |> function | Order.Name -> Seq.sortBy _.key | Order.Size -> Seq.sortByDescending _.bytes @@ -579,7 +576,7 @@ module CosmosTop = Log.Information("{count,7}i {tm,6:N2}MiB E{events,7} {em,7:N1} U{unfolds,7} {um,6:N1} D+M{dm,6:N1} C+C{cm,5:N1} {key}", x.count, miB x.bytes, x.events, miB x.eBytes, x.unfolds, miB x.uBytes, miB x.iBytes, miB x.cBytes, x.key) if a.StreamLevel then - let collapsed = s |> Seq.groupBy (_.key >> categoryName) |> Seq.map (fun (cat, xs) -> { (xs |> Seq.reduce _.Merge) with key = cat }) + let collapsed = s |> Seq.groupBy (_.key >> StreamName.categoryName) |> Seq.map (fun (cat, xs) -> { (xs |> Seq.reduce _.Merge) with key = cat }) sort collapsed |> Seq.truncate a.Count |> Seq.iter render sort s |> Seq.truncate (if a.StreamLevel then a.StreamCount else a.Count) |> Seq.iter render } From 0aac7922349b04855e67d70ebff99f0c3473874a Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Sat, 8 Jun 2024 02:14:04 +0100 Subject: [PATCH 30/56] CL --- CHANGELOG.md | 1 + tools/Equinox.Tool/Program.fs | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 33d4c73bc..f085928bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ The `Unreleased` section name is replaced by the expected version of next releas - `eqx stats`: `-O`, `-N` flags extract oldest and newest `_ts` within a store [#459](https://github.com/jet/equinox/pull/459) - `eqx`: `-Q` flag omits timestamps from console output logging [#459](https://github.com/jet/equinox/pull/459) - `Equinox.CosmosStore.Linq`: Add LINQ querying support for Indexed `u`nfolds (`AccessStrategy.Custom`+`CosmosStoreCategory.shouldCompress`) [#450](https://github.com/jet/equinox/pull/450) +- `eqx top`: Support for analyzing space usage for event and view containers by category and/or stream [#450](https://github.com/jet/equinox/pull/450) ### Changed diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 5a0ef96a3..583b5ac89 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -488,21 +488,21 @@ module CosmosTop = module private Parser = let scratch = new System.IO.MemoryStream() - let inline utf8Size (x: JsonElement) = + let utf8Size (x: JsonElement) = scratch.Position <- 0L JsonSerializer.Serialize(scratch, x) scratch.Position - let inline inflatedUtf8Size x = + let inflatedUtf8Size x = scratch.Position <- 0L if Equinox.CosmosStore.Core.JsonElement.tryInflateTo scratch x then scratch.Position else utf8Size x + let infSize = function ValueSome x -> inflatedUtf8Size x | ValueNone -> 0 // using the length as a decent proxy for UTF-8 length of corr/causation; if you have messy data in there, you'll have bigger problems to worry about let inline stringLen x = match x with ValueSome (x: JsonElement) when x.ValueKind <> JsonValueKind.Null -> x.GetString().Length | _ -> 0 let _e = Unchecked.defaultof // Or Unfold - both share field names - let inline ciSize (x: JsonElement) = + let dmcSize (x: JsonElement) = (struct (0, 0L), x.EnumerateArray()) ||> Seq.fold (fun struct (c, i) x -> - let inline infSize x = match x with ValueSome x -> inflatedUtf8Size x | ValueNone -> 0 struct (c + (x.TryProp(nameof _e.correlationId) |> stringLen) + (x.TryProp(nameof _e.causationId) |> stringLen), i + (x.TryProp(nameof _e.d) |> infSize) + (x.TryProp(nameof _e.m) |> infSize))) let _t = Unchecked.defaultof @@ -513,7 +513,7 @@ module CosmosTop = | _ -> ValueNone let private tryParseEventOrUnfold = function | ValueNone -> struct (0, 0L, struct (0, 0L)) - | ValueSome (x: JsonElement) -> x.GetArrayLength(), utf8Size x, ciSize x + | ValueSome (x: JsonElement) -> x.GetArrayLength(), utf8Size x, dmcSize x [] type Stat = { key: string; count: int; events: int; unfolds: int; bytes: int64; eBytes: int64; uBytes: int64; cBytes: int64; iBytes: int64 } @@ -522,7 +522,7 @@ module CosmosTop = eBytes = x.eBytes + y.eBytes; uBytes = x.uBytes + y.uBytes; cBytes = x.cBytes + y.cBytes; iBytes = x.iBytes + y.iBytes } override x.GetHashCode() = StringComparer.Ordinal.GetHashCode x.key override x.Equals y = match y with :? Stat as y -> StringComparer.Ordinal.Equals(x.key, y.key) | _ -> false - static Create(key, x: JsonElement) = + static member Create(key, x: JsonElement) = let struct (e, eb, struct (ec, ei)) = x.TryProp(nameof _t.e) |> tryParseEventOrUnfold let struct (u, ub, struct (uc, ui)) = x.TryProp(nameof _t.u) |> tryParseEventOrUnfold { key = key; count = 1; events = e; unfolds = u @@ -558,8 +558,8 @@ module CosmosTop = let accCats = (if a.StreamLevel then s |> Seq.map _.key else accStreams) |> Seq.map group |> System.Collections.Generic.HashSet |> _.Count let accStreams = if a.StreamLevel then s.Count else accStreams.Count let iBytes, cBytes = s |> Seq.sumBy _.iBytes, s |> Seq.sumBy _.cBytes - let inline giB x = miB x / 1024. - Log.Information("TOTALS {count:N0}i {cats}c {streams:N0}s {es:N0}e {us:N0}u read {rg:f1}GiB output {og:f1}GiB JSON {tg:f1}GiB D+M(inflated) {ig:f1}GiB C+C {cm:f2}MiB {ru:N2}RU {s:N1}s", + let giB x = miB x / 1024. + Log.Information("TOTALS {count:N0}i {cats:N0}c {streams:N0}s {es:N0}e {us:N0}u read {rg:f1}GiB output {og:f1}GiB JSON {tg:f1}GiB D+M(inflated) {ig:f1}GiB C+C {cm:f2}MiB {ru:N2}RU {s:N1}s", accI, accCats, accStreams, accE, accU, giB accRds, giB accOds, giB accBytes, giB iBytes, miB cBytes, accRus, sw.Elapsed.TotalSeconds) let sort: Parser.Stat seq -> Parser.Stat seq = a.Order |> function From a6b33121a2ac405f4e9adb61a3ac0fabe4aae87c Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Wed, 12 Jun 2024 19:18:34 +0100 Subject: [PATCH 31/56] Destroy skeleton --- tools/Equinox.Tool/Program.fs | 106 ++++++++++++++++++++++++++++------ 1 file changed, 88 insertions(+), 18 deletions(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 583b5ac89..2a2d2f1c0 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -31,6 +31,7 @@ type Parameters = | [] Stats of ParseResults | [] Query of ParseResults | [] Top of ParseResults + | [] Destroy of ParseResults interface IArgParserTemplate with member a.Usage = a |> function | Quiet -> "Omit timestamps from log output" @@ -46,6 +47,7 @@ type Parameters = | Stats _ -> "inspect store to determine numbers of streams/documents/events and/or config (supports `cosmos` and `dynamo` stores)." | Query _ -> "Load/Summarise streams based on Cosmos SQL Queries (supports `cosmos` only)." | Top _ -> "Scan to determine top categories and streams (supports `cosmos` only)." + | Destroy _ -> "DELETE documents for a nominated category and/or stream (includes a dry-run mode). (supports `cosmos` only)." and [] InitParameters = | [] Rus of int | [] Autoscale @@ -198,10 +200,10 @@ and [] TopParameters = and Order = Name | Items | Events | Unfolds | Size | EventSize | UnfoldSize | InflateSize | CorrCauseSize and TopArguments(p: ParseResults) = member val Criteria = - match p.TryGetResult StreamName, p.TryGetResult CategoryName, p.TryGetResult CategoryLike with + match p.TryGetResult TopParameters.StreamName, p.TryGetResult TopParameters.CategoryName, p.TryGetResult TopParameters.CategoryLike with | Some sn, None, None -> Criteria.SingleStream sn | Some _, Some _, _ - | Some _, _, Some _ -> p.Raise "StreamName and CategoryLike/CategoryName mutually exclusive" + | Some _, _, Some _ -> p.Raise "StreamName and CategoryLike/CategoryName are mutually exclusive" | None, Some cn, None -> Criteria.CatName cn | None, None, Some cl -> Criteria.CatLike cl | None, None, None -> Criteria.Unfiltered @@ -219,7 +221,39 @@ and TopArguments(p: ParseResults) = let qd = Microsoft.Azure.Cosmos.QueryDefinition sql let qo = Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = x.CosmosArgs.QueryMaxItems) container.GetItemQueryIterator(qd, requestOptions = qo) - +and [] DestroyParameters = + | [] StreamName of string + | [] CategoryName of string + | [] CategoryLike of string + | [] Force + | [] Cosmos of ParseResults + interface IArgParserTemplate with + member a.Usage = a |> function + | StreamName _ -> "Specify stream name to match against `p`, e.g. `$UserServices-f7c1ce63389a45bdbea1cccebb1b3c8a`." + | CategoryName _ -> "Specify category name to match against `p`, e.g. `$UserServices`." + | CategoryLike _ -> "Specify category name to match against `p` as a Cosmos LIKE expression (with `%` as wildcard, e.g. `$UserServices-%`." + | Force -> "Actually delete the documents (default is a dry run, reporting what would be deleted)" + | Cosmos _ -> "Parameters for CosmosDB." +and DestroyArguments(p: ParseResults) = + member val Criteria = + match p.TryGetResult StreamName, p.TryGetResult CategoryName, p.TryGetResult CategoryLike with + | Some sn, None, None -> Criteria.SingleStream sn + | Some _, Some _, _ + | Some _, _, Some _ -> p.Raise "StreamName and CategoryLike/CategoryName are mutually exclusive" + | None, Some cn, None -> Criteria.CatName cn + | None, None, Some cl -> Criteria.CatLike cl + | None, None, None -> failwith "Category or stream name criteria must be supplied" + | None, Some _, Some _ -> p.Raise "CategoryLike and CategoryName are mutually exclusive" + member val CosmosArgs = p.GetResult DestroyParameters.Cosmos |> Store.Cosmos.Arguments + member val DryRun = p.Contains Force |> not + member x.Connect() = match Store.Cosmos.config Log.Logger (None, true) x.CosmosArgs with + | Store.Config.Cosmos (cc, _, _) -> cc.Container + | _ -> failwith "Destroy requires Cosmos" + member x.Execute(sql) = let container = x.Connect() + let qd = Microsoft.Azure.Cosmos.QueryDefinition sql + let qo = Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = x.CosmosArgs.QueryMaxItems) + container.GetItemQueryIterator(qd, requestOptions = qo) +and SnEventsUnfolds = { sn: string; events: int; unfolds: int } and [] DumpParameters = | [] Stream of FsCodec.StreamName | [] Correlation @@ -357,6 +391,7 @@ module CosmosStats = open Equinox.CosmosStore.Linq.Internal open FSharp.Control + let run (log : ILogger, _verboseConsole, _maybeSeq) (p : ParseResults) = match p.GetSubCommand() with | StatsParameters.Cosmos sp -> @@ -404,6 +439,7 @@ let prettySerdes = lazy FsCodec.SystemTextJson.Serdes(FsCodec.SystemTextJson.Opt type System.Text.Json.JsonElement with member x.Timestamp = x.GetProperty("_ts").GetDouble() |> DateTime.UnixEpoch.AddSeconds member x.TryProp(name: string) = let mutable p = Unchecked.defaultof<_> in if x.TryGetProperty(name, &p) then ValueSome p else ValueNone + module StreamName = let categoryName = FsCodec.StreamName.parse >> FsCodec.StreamName.split >> fun struct (cn, _sid) -> cn @@ -486,6 +522,17 @@ module CosmosTop = open FSharp.Control open System.Text.Json + let _t = Unchecked.defaultof + let inline tryEquinoxStreamName (x: JsonElement) = + match x.TryProp(nameof _t.p) with + | ValueSome (je: JsonElement) when je.ValueKind = JsonValueKind.String -> + je.GetString() |> FsCodec.StreamName.parse |> FsCodec.StreamName.toString |> ValueSome + | _ -> ValueNone + let inline parseEquinoxStreamName (x: JsonElement) = + match tryEquinoxStreamName x with + | ValueNone -> failwith $"Could not parse document:\n{prettySerdes.Value.Serialize x}" + | ValueSome sn -> sn + module private Parser = let scratch = new System.IO.MemoryStream() let utf8Size (x: JsonElement) = @@ -505,15 +552,10 @@ module CosmosTop = ||> Seq.fold (fun struct (c, i) x -> struct (c + (x.TryProp(nameof _e.correlationId) |> stringLen) + (x.TryProp(nameof _e.causationId) |> stringLen), i + (x.TryProp(nameof _e.d) |> infSize) + (x.TryProp(nameof _e.m) |> infSize))) - let _t = Unchecked.defaultof - let inline tryEquinoxStreamName (x: JsonElement) = - match x.TryProp(nameof _t.p) with - | ValueSome (je: JsonElement) when je.ValueKind = JsonValueKind.String -> - je.GetString() |> FsCodec.StreamName.parse |> FsCodec.StreamName.toString |> ValueSome - | _ -> ValueNone let private tryParseEventOrUnfold = function | ValueNone -> struct (0, 0L, struct (0, 0L)) | ValueSome (x: JsonElement) -> x.GetArrayLength(), utf8Size x, dmcSize x + let _t = Unchecked.defaultof [] type Stat = { key: string; count: int; events: int; unfolds: int; bytes: int64; eBytes: int64; uBytes: int64; cBytes: int64; iBytes: int64 } @@ -539,15 +581,13 @@ module CosmosTop = let mutable pageI, pageE, pageU, pageB, pageCc, pageDm, newestTs, sw = 0, 0, 0, 0L, 0L, 0L, DateTime.MinValue, System.Diagnostics.Stopwatch.StartNew() for x in items do newestTs <- max newestTs x.Timestamp - match Parser.tryEquinoxStreamName x with - | ValueNone -> failwith $"Could not parse document:\n{prettySerdes.Value.Serialize x}" - | ValueSome sn -> - if pageStreams.Add sn && not a.StreamLevel then accStreams.Add sn |> ignore - let x = Parser.Stat.Create(group sn, x) - let mutable v = Unchecked.defaultof<_> - s.Add(if s.TryGetValue(x, &v) then s.Remove x |> ignore; v.Merge x else x) |> ignore - pageI <- pageI + 1; pageE <- pageE + x.events; pageU <- pageU + x.unfolds - pageB <- pageB + x.bytes; pageCc <- pageCc + x.cBytes; pageDm <- pageDm + x.iBytes + let sn = parseEquinoxStreamName x + if pageStreams.Add sn && not a.StreamLevel then accStreams.Add sn |> ignore + let x = Parser.Stat.Create(group sn, x) + let mutable v = Unchecked.defaultof<_> + s.Add(if s.TryGetValue(x, &v) then s.Remove x |> ignore; v.Merge x else x) |> ignore + pageI <- pageI + 1; pageE <- pageE + x.events; pageU <- pageU + x.unfolds + pageB <- pageB + x.bytes; pageCc <- pageCc + x.cBytes; pageDm <- pageDm + x.iBytes Log.Information("Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}<{jds,4:f2}MiB{rc,7:f2}RU{s,5:N1}s D+M{im,4:f1} C+C{cm,5:f2} {ms,3}ms age {age:dddd\.hh\:mm\:ss}", rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, miB pageB, rc, rtt.TotalSeconds, miB pageDm, miB pageCc, sw.ElapsedMilliseconds, DateTime.UtcNow - newestTs) pageStreams.Clear() @@ -580,6 +620,35 @@ module CosmosTop = sort collapsed |> Seq.truncate a.Count |> Seq.iter render sort s |> Seq.truncate (if a.StreamLevel then a.StreamCount else a.Count) |> Seq.iter render } +module CosmosDestroy = + + open Equinox.CosmosStore.Linq.Internal + open FSharp.Control + + let run (a: DestroyArguments) = task { + let sw = System.Diagnostics.Stopwatch.StartNew() + let sql = $"SELECT c.p AS sn, ARRAYLENGTH(c.e) AS events, ARRAYLENGTH(c.u) AS unfolds FROM c WHERE {a.Criteria.Sql}" + if a.DryRun then Log.Warning("Dry-run of deleting all Items matching {sql}", sql) + else Log.Warning("DESTROYING all Items matching {sql}", sql) + + let pageStreams, accStreams = System.Collections.Generic.HashSet(), System.Collections.Generic.HashSet() + let mutable accI, accE, accU, accRus, accRds, accOds = 0L, 0L, 0L, 0., 0L, 0L + try for rtt, rc, items, rdc, rds, ods in a.Execute sql |> Query.enum__ do + let mutable pageI, pageE, pageU, sw = 0, 0, 0, System.Diagnostics.Stopwatch.StartNew() + for i in items do + if pageStreams.Add i.sn then accStreams.Add i.sn |> ignore + pageI <- pageI + 1; pageE <- pageE + i.events; pageU <- pageU + i.unfolds + Log.Information("Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}{rc,7:f2}RU{s,5:N1}s {s,5:N1}s", + rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, rc, rtt.TotalSeconds, sw.Elapsed.TotalSeconds) + pageStreams.Clear() + accI <- accI + int64 pageI; accE <- accE + int64 pageE; accU <- accU + int64 pageU + accRus <- accRus + rc; accRds <- accRds + int64 rds; accOds <- accOds + int64 ods + finally + + let accCats = accStreams |> Seq.map StreamName.categoryName |> System.Collections.Generic.HashSet |> _.Count + Log.Information("TOTALS {count:N0}i {cats:N0}c {streams:N0}s {es:N0}e {us:N0}u read {rmib:f1}MiB output {omib:f1}MiB {ru:N2}RU {s:N1}s", + accI, accCats, accStreams.Count, accE, accU, miB accRds, miB accOds, accRus, sw.Elapsed.TotalSeconds) } + module DynamoInit = open Equinox.DynamoStore @@ -695,6 +764,7 @@ type Arguments(p: ParseResults) = | Dump a -> do! Dump.run (Log.Logger, verboseConsole, maybeSeq) a | Query a -> do! CosmosQuery.run (QueryArguments a) |> Async.AwaitTaskCorrect | Top a -> do! CosmosTop.run (TopArguments a) |> Async.AwaitTaskCorrect + | Destroy a -> do! CosmosDestroy.run (DestroyArguments a) |> Async.AwaitTaskCorrect | Stats a -> do! CosmosStats.run (Log.Logger, verboseConsole, maybeSeq) a | LoadTest a -> let n = p.GetResult(LogFile, fun () -> p.ProgramName + ".log") let reportFilename = System.IO.FileInfo(n).FullName From 47c9b588db7d9b0540840b91ae7a98cbb731eb1e Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Wed, 12 Jun 2024 23:00:54 +0100 Subject: [PATCH 32/56] Implement deletion --- tools/Equinox.Tool/Program.fs | 48 ++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 2a2d2f1c0..6f5c8bad7 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -249,11 +249,7 @@ and DestroyArguments(p: ParseResults) = member x.Connect() = match Store.Cosmos.config Log.Logger (None, true) x.CosmosArgs with | Store.Config.Cosmos (cc, _, _) -> cc.Container | _ -> failwith "Destroy requires Cosmos" - member x.Execute(sql) = let container = x.Connect() - let qd = Microsoft.Azure.Cosmos.QueryDefinition sql - let qo = Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = x.CosmosArgs.QueryMaxItems) - container.GetItemQueryIterator(qd, requestOptions = qo) -and SnEventsUnfolds = { sn: string; events: int; unfolds: int } +and SnEventsUnfolds = { p: string; id: string; es: int; us: int } and [] DumpParameters = | [] Stream of FsCodec.StreamName | [] Correlation @@ -626,28 +622,44 @@ module CosmosDestroy = open FSharp.Control let run (a: DestroyArguments) = task { - let sw = System.Diagnostics.Stopwatch.StartNew() - let sql = $"SELECT c.p AS sn, ARRAYLENGTH(c.e) AS events, ARRAYLENGTH(c.u) AS unfolds FROM c WHERE {a.Criteria.Sql}" + let tsw = System.Diagnostics.Stopwatch.StartNew() + let sql = $"SELECT c.p, c.id, ARRAYLENGTH(c.e) AS es, ARRAYLENGTH(c.u) AS us FROM c WHERE {a.Criteria.Sql}" if a.DryRun then Log.Warning("Dry-run of deleting all Items matching {sql}", sql) else Log.Warning("DESTROYING all Items matching {sql}", sql) - + let container = a.Connect() + let query = + let qd = Microsoft.Azure.Cosmos.QueryDefinition sql + let qo = Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = a.CosmosArgs.QueryMaxItems) + container.GetItemQueryIterator(qd, requestOptions = qo) let pageStreams, accStreams = System.Collections.Generic.HashSet(), System.Collections.Generic.HashSet() - let mutable accI, accE, accU, accRus, accRds, accOds = 0L, 0L, 0L, 0., 0L, 0L - try for rtt, rc, items, rdc, rds, ods in a.Execute sql |> Query.enum__ do - let mutable pageI, pageE, pageU, sw = 0, 0, 0, System.Diagnostics.Stopwatch.StartNew() + let mutable accI, accE, accU, accRus, accDelRu, accRds, accOds = 0L, 0L, 0L, 0., 0., 0L, 0L + try for rtt, rc, items, rdc, rds, ods in query |> Query.enum__ do + let mutable pageI, pageE, pageU, pdRu, idRu = 0, 0, 0, 0., 0. + let psw, isw = System.Diagnostics.Stopwatch.StartNew(), System.Diagnostics.Stopwatch.StartNew() for i in items do - if pageStreams.Add i.sn then accStreams.Add i.sn |> ignore - pageI <- pageI + 1; pageE <- pageE + i.events; pageU <- pageU + i.unfolds - Log.Information("Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}{rc,7:f2}RU{s,5:N1}s {s,5:N1}s", - rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, rc, rtt.TotalSeconds, sw.Elapsed.TotalSeconds) + if pageStreams.Add i.p then accStreams.Add i.p |> ignore + pageI <- pageI + 1; pageE <- pageE + i.es; pageU <- pageU + i.us + if not a.DryRun then + let! res = container.DeleteItemStreamAsync(i.id, Microsoft.Azure.Cosmos.PartitionKey i.p) + let ru = res.Headers.RequestCharge in idRu <- idRu + ru; pdRu <- pdRu + ru + if not res.IsSuccessStatusCode then + failwith $"Deletion of {i.p}/{i.id} failed with Code: {res.StatusCode} Message: {res.ErrorMessage}\n{res.Diagnostics}" + if isw.Elapsed.TotalSeconds > 30 then + Log.Information(".. Deleted {count,5}i {streams,7}s{es,7}e{us,7}u {rus,7:N2}WRU/s {s,6:N1}s", + pageI, pageStreams.Count, pageE, pageU, idRu / isw.Elapsed.TotalSeconds, psw.Elapsed.TotalSeconds) + isw.Restart() + idRu <- 0 + let ps = psw.Elapsed.TotalSeconds + Log.Information("Page{rdc,6}>{count,5}i {streams,7}s{es,7}e{us,7}u{rds,8:f2}>{ods,4:f2} {prc,8:f2}RRU {rs,5:N1}s {rus:N2}WRU/s {ps,5:N1}s", + rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, rc, rtt.TotalSeconds, pdRu / ps, ps) pageStreams.Clear() accI <- accI + int64 pageI; accE <- accE + int64 pageE; accU <- accU + int64 pageU - accRus <- accRus + rc; accRds <- accRds + int64 rds; accOds <- accOds + int64 ods + accRus <- accRus + rc; accDelRu <- accDelRu + pdRu; accRds <- accRds + int64 rds; accOds <- accOds + int64 ods finally let accCats = accStreams |> Seq.map StreamName.categoryName |> System.Collections.Generic.HashSet |> _.Count - Log.Information("TOTALS {count:N0}i {cats:N0}c {streams:N0}s {es:N0}e {us:N0}u read {rmib:f1}MiB output {omib:f1}MiB {ru:N2}RU {s:N1}s", - accI, accCats, accStreams.Count, accE, accU, miB accRds, miB accOds, accRus, sw.Elapsed.TotalSeconds) } + Log.Information("TOTALS {count:N0}i {cats:N0}c {streams:N0}s {es:N0}e {us:N0}u read {rmib:f1}MiB output {omib:f1}MiB {rru:N2}RRU Avg {ru:N2}WRU/s Delete {ru:N2}WRU Total {s:N1}s", + accI, accCats, accStreams.Count, accE, accU, miB accRds, miB accOds, accRus, accDelRu / tsw.Elapsed.TotalSeconds, accDelRu, tsw.Elapsed.TotalSeconds) } module DynamoInit = From f840f66d8d19dafe8289f5459ea864a0d98680c7 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Wed, 12 Jun 2024 23:36:04 +0100 Subject: [PATCH 33/56] Polish --- tools/Equinox.Tool/Program.fs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 6f5c8bad7..7334fa442 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -624,8 +624,8 @@ module CosmosDestroy = let run (a: DestroyArguments) = task { let tsw = System.Diagnostics.Stopwatch.StartNew() let sql = $"SELECT c.p, c.id, ARRAYLENGTH(c.e) AS es, ARRAYLENGTH(c.u) AS us FROM c WHERE {a.Criteria.Sql}" - if a.DryRun then Log.Warning("Dry-run of deleting all Items matching {sql}", sql) - else Log.Warning("DESTROYING all Items matching {sql}", sql) + if a.DryRun then Log.Warning("Dry-run of deleting items based on {sql}", sql) + else Log.Warning("DESTROYING all Items WHERE {sql}", a.Criteria.Sql) let container = a.Connect() let query = let qd = Microsoft.Azure.Cosmos.QueryDefinition sql From 500cd086ebc5951beab225b28227543643690402 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Thu, 13 Jun 2024 12:28:21 +0100 Subject: [PATCH 34/56] Add Custom SQL mode --- tools/Equinox.Tool/Program.fs | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 7334fa442..487194a1d 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -152,11 +152,12 @@ and [] QueryParameters = | Cosmos _ -> "Parameters for CosmosDB." and [] Mode = Default | SnapOnly | SnapWithStream | ReadOnly | ReadWithStream | Raw and [] Criteria = - | SingleStream of string | CatName of string | CatLike of string | Unfiltered + | SingleStream of string | CatName of string | CatLike of string | Custom of sql: string | Unfiltered member x.Sql = x |> function | Criteria.SingleStream sn -> $"c.p = \"{sn}\"" | Criteria.CatName n -> $"c.p LIKE \"{n}-%%\"" | Criteria.CatLike pat -> $"c.p LIKE \"{pat}\"" + | Criteria.Custom filter -> filter | Criteria.Unfiltered -> "1=1" and QueryArguments(p: ParseResults) = member val Mode = p.GetResult(QueryParameters.Mode, if p.Contains QueryParameters.File then Mode.Raw else Mode.Default) @@ -225,6 +226,7 @@ and [] DestroyParameters = | [] StreamName of string | [] CategoryName of string | [] CategoryLike of string + | [] CustomFilter of sql: string | [] Force | [] Cosmos of ParseResults interface IArgParserTemplate with @@ -232,18 +234,21 @@ and [] DestroyParameters = | StreamName _ -> "Specify stream name to match against `p`, e.g. `$UserServices-f7c1ce63389a45bdbea1cccebb1b3c8a`." | CategoryName _ -> "Specify category name to match against `p`, e.g. `$UserServices`." | CategoryLike _ -> "Specify category name to match against `p` as a Cosmos LIKE expression (with `%` as wildcard, e.g. `$UserServices-%`." + | CustomFilter _ -> "Specify a custom filter, referencing the document as `c.` (e.g. `'c.p LIKE \"test-%\" AND c._ts < 1717138092'`)" | Force -> "Actually delete the documents (default is a dry run, reporting what would be deleted)" | Cosmos _ -> "Parameters for CosmosDB." and DestroyArguments(p: ParseResults) = member val Criteria = - match p.TryGetResult StreamName, p.TryGetResult CategoryName, p.TryGetResult CategoryLike with - | Some sn, None, None -> Criteria.SingleStream sn - | Some _, Some _, _ - | Some _, _, Some _ -> p.Raise "StreamName and CategoryLike/CategoryName are mutually exclusive" - | None, Some cn, None -> Criteria.CatName cn - | None, None, Some cl -> Criteria.CatLike cl - | None, None, None -> failwith "Category or stream name criteria must be supplied" - | None, Some _, Some _ -> p.Raise "CategoryLike and CategoryName are mutually exclusive" + match p.TryGetResult StreamName, p.TryGetResult CategoryName, p.TryGetResult CategoryLike, p.TryGetResult CustomFilter with + | Some sn, None, None, None -> Criteria.SingleStream sn + | Some _, Some _, _, None + | Some _, _, Some _, None -> p.Raise "StreamName and CategoryLike/CategoryName are mutually exclusive" + | None, Some cn, None, None -> Criteria.CatName cn + | None, None, Some cl, None -> Criteria.CatLike cl + | None, None, None, Some filter -> Criteria.Custom filter + | _, _, _, Some _ -> p.Raise "Custom SQL and Category/Stream name settings are mutually exclusive" + | None, None, None, None -> failwith "Category, stream name, or custom SQL must be supplied" + | None, Some _, Some _, None -> p.Raise "CategoryLike and CategoryName are mutually exclusive" member val CosmosArgs = p.GetResult DestroyParameters.Cosmos |> Store.Cosmos.Arguments member val DryRun = p.Contains Force |> not member x.Connect() = match Store.Cosmos.config Log.Logger (None, true) x.CosmosArgs with @@ -658,7 +663,7 @@ module CosmosDestroy = finally let accCats = accStreams |> Seq.map StreamName.categoryName |> System.Collections.Generic.HashSet |> _.Count - Log.Information("TOTALS {count:N0}i {cats:N0}c {streams:N0}s {es:N0}e {us:N0}u read {rmib:f1}MiB output {omib:f1}MiB {rru:N2}RRU Avg {ru:N2}WRU/s Delete {ru:N2}WRU Total {s:N1}s", + Log.Information("TOTALS {count:N0}i {cats:N0}c {streams:N0}s {es:N0}e {us:N0}u read {rmib:f1}MiB output {omib:f1}MiB {rru:N2}RRU Avg {aru:N2}WRU/s Delete {dru:N2}WRU Total {s:N1}s", accI, accCats, accStreams.Count, accE, accU, miB accRds, miB accOds, accRus, accDelRu / tsw.Elapsed.TotalSeconds, accDelRu, tsw.Elapsed.TotalSeconds) } module DynamoInit = From cefb8b98420f8984ba473bab77c7a889477b44c8 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Fri, 14 Jun 2024 17:34:56 +0100 Subject: [PATCH 35/56] Ignore other people helping --- tools/Equinox.Tool/Program.fs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 487194a1d..a2429e309 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -647,7 +647,7 @@ module CosmosDestroy = if not a.DryRun then let! res = container.DeleteItemStreamAsync(i.id, Microsoft.Azure.Cosmos.PartitionKey i.p) let ru = res.Headers.RequestCharge in idRu <- idRu + ru; pdRu <- pdRu + ru - if not res.IsSuccessStatusCode then + if not res.IsSuccessStatusCode && not (res.StatusCode = System.Net.HttpStatusCode.NotFound) then failwith $"Deletion of {i.p}/{i.id} failed with Code: {res.StatusCode} Message: {res.ErrorMessage}\n{res.Diagnostics}" if isw.Elapsed.TotalSeconds > 30 then Log.Information(".. Deleted {count,5}i {streams,7}s{es,7}e{us,7}u {rus,7:N2}WRU/s {s,6:N1}s", From 74dfca5ca964d19a14876a3dd9441a24cb22fc4b Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Fri, 28 Jun 2024 11:47:02 +0100 Subject: [PATCH 36/56] Parallelize deletion --- tools/Equinox.Tool/Program.fs | 76 ++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 14 deletions(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index a2429e309..69dfee33c 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -251,6 +251,8 @@ and DestroyArguments(p: ParseResults) = | None, Some _, Some _, None -> p.Raise "CategoryLike and CategoryName are mutually exclusive" member val CosmosArgs = p.GetResult DestroyParameters.Cosmos |> Store.Cosmos.Arguments member val DryRun = p.Contains Force |> not + member val Dop = 4 + member val StatsInterval = TimeSpan.FromSeconds 30 member x.Connect() = match Store.Cosmos.config Log.Logger (None, true) x.CosmosArgs with | Store.Config.Cosmos (cc, _, _) -> cc.Container | _ -> failwith "Destroy requires Cosmos" @@ -626,6 +628,22 @@ module CosmosDestroy = open Equinox.CosmosStore.Linq.Internal open FSharp.Control + type Sem(max) = + let inner = new SemaphoreSlim(max) + member _.IsEmpty = inner.CurrentCount = max + member _.TryWait(ms: int) = inner.WaitAsync ms + member _.Release() = inner.Release() |> ignore + + module Channel = + + open System.Threading.Channels + let unboundedSr<'t> = Channel.CreateUnbounded<'t>(UnboundedChannelOptions(SingleReader = true)) + let write (w: ChannelWriter<_>) = w.TryWrite >> ignore + let inline readAll (r: ChannelReader<_>) () = seq { + let mutable msg = Unchecked.defaultof<_> + while r.TryRead(&msg) do + yield msg } + let run (a: DestroyArguments) = task { let tsw = System.Diagnostics.Stopwatch.StartNew() let sql = $"SELECT c.p, c.id, ARRAYLENGTH(c.e) AS es, ARRAYLENGTH(c.u) AS us FROM c WHERE {a.Criteria.Sql}" @@ -638,28 +656,58 @@ module CosmosDestroy = container.GetItemQueryIterator(qd, requestOptions = qo) let pageStreams, accStreams = System.Collections.Generic.HashSet(), System.Collections.Generic.HashSet() let mutable accI, accE, accU, accRus, accDelRu, accRds, accOds = 0L, 0L, 0L, 0., 0., 0L, 0L + let deletionDop = Sem a.Dop + let writeResult, readResults = let c = Channel.unboundedSr in Channel.write c.Writer, Channel.readAll c.Reader try for rtt, rc, items, rdc, rds, ods in query |> Query.enum__ do - let mutable pageI, pageE, pageU, pdRu, idRu = 0, 0, 0, 0., 0. - let psw, isw = System.Diagnostics.Stopwatch.StartNew(), System.Diagnostics.Stopwatch.StartNew() + let mutable pageI, pageE, pageU, pRu, iRu = 0, 0, 0, 0., 0. + let pageSw, intervalSw = System.Diagnostics.Stopwatch.StartNew(), System.Diagnostics.Stopwatch.StartNew() + let drainResults () = + let mutable failMessage = null + for ru, exn in readResults () do + iRu <- iRu + ru; pRu <- pRu + ru + if exn <> null && failMessage <> null then failMessage <- exn + if intervalSw.Elapsed > a.StatsInterval then + Log.Information(".. Deleted {count,5}i {streams,7}s{es,7}e{us,7}u {rus,7:N2}WRU/s {s,6:N1}s", + pageI, pageStreams.Count, pageE, pageU, iRu / intervalSw.Elapsed.TotalSeconds, pageSw.Elapsed.TotalSeconds) + intervalSw.Restart() + iRu <- 0 + if failMessage <> null then failwith failMessage + (a.StatsInterval - intervalSw.Elapsed).TotalMilliseconds |> int + let awaitState check = task { + let mutable reserved = false + while not reserved do + match drainResults () with + | wait when wait <= 0 -> () + | timeoutAtNextLogInterval -> + match! check timeoutAtNextLogInterval with + | false -> () + | true -> reserved <- true } + let checkEmpty () = task { + if deletionDop.IsEmpty then return true else + do! System.Threading.Tasks.Task.Delay 1 + return deletionDop.IsEmpty } + let awaitCapacity () = awaitState deletionDop.TryWait + let releaseCapacity () = deletionDop.Release() + let awaitCompletion () = awaitState (fun _timeout -> checkEmpty ()) for i in items do if pageStreams.Add i.p then accStreams.Add i.p |> ignore pageI <- pageI + 1; pageE <- pageE + i.es; pageU <- pageU + i.us if not a.DryRun then - let! res = container.DeleteItemStreamAsync(i.id, Microsoft.Azure.Cosmos.PartitionKey i.p) - let ru = res.Headers.RequestCharge in idRu <- idRu + ru; pdRu <- pdRu + ru - if not res.IsSuccessStatusCode && not (res.StatusCode = System.Net.HttpStatusCode.NotFound) then - failwith $"Deletion of {i.p}/{i.id} failed with Code: {res.StatusCode} Message: {res.ErrorMessage}\n{res.Diagnostics}" - if isw.Elapsed.TotalSeconds > 30 then - Log.Information(".. Deleted {count,5}i {streams,7}s{es,7}e{us,7}u {rus,7:N2}WRU/s {s,6:N1}s", - pageI, pageStreams.Count, pageE, pageU, idRu / isw.Elapsed.TotalSeconds, psw.Elapsed.TotalSeconds) - isw.Restart() - idRu <- 0 - let ps = psw.Elapsed.TotalSeconds + do! awaitCapacity () + ignore <| task { // we could do a Task.Run dance, but kicking it off inline without waiting suits us fine as results processed above + let! res = container.DeleteItemStreamAsync(i.id, Microsoft.Azure.Cosmos.PartitionKey i.p) + releaseCapacity () + let exn = + if res.IsSuccessStatusCode || res.StatusCode = System.Net.HttpStatusCode.NotFound then null + else $"Deletion of {i.p}/{i.id} failed with Code: {res.StatusCode} Message: {res.ErrorMessage}\n{res.Diagnostics}" + writeResult (res.Headers.RequestCharge, exn) } + do! awaitCompletion () // we want stats output and/or failure exceptions to align with Pages + let ps = pageSw.Elapsed.TotalSeconds Log.Information("Page{rdc,6}>{count,5}i {streams,7}s{es,7}e{us,7}u{rds,8:f2}>{ods,4:f2} {prc,8:f2}RRU {rs,5:N1}s {rus:N2}WRU/s {ps,5:N1}s", - rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, rc, rtt.TotalSeconds, pdRu / ps, ps) + rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, rc, rtt.TotalSeconds, pRu / ps, ps) pageStreams.Clear() accI <- accI + int64 pageI; accE <- accE + int64 pageE; accU <- accU + int64 pageU - accRus <- accRus + rc; accDelRu <- accDelRu + pdRu; accRds <- accRds + int64 rds; accOds <- accOds + int64 ods + accRus <- accRus + rc; accDelRu <- accDelRu + pRu; accRds <- accRds + int64 rds; accOds <- accOds + int64 ods finally let accCats = accStreams |> Seq.map StreamName.categoryName |> System.Collections.Generic.HashSet |> _.Count From d5796c32168b3e427df5c17c5f9d531d9361afb3 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Fri, 28 Jun 2024 14:50:26 +0100 Subject: [PATCH 37/56] CL/tidy --- CHANGELOG.md | 1 + tools/Equinox.Tool/Program.fs | 47 ++++++++++++++++++----------------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f085928bd..b47364837 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ The `Unreleased` section name is replaced by the expected version of next releas - `eqx`: `-Q` flag omits timestamps from console output logging [#459](https://github.com/jet/equinox/pull/459) - `Equinox.CosmosStore.Linq`: Add LINQ querying support for Indexed `u`nfolds (`AccessStrategy.Custom`+`CosmosStoreCategory.shouldCompress`) [#450](https://github.com/jet/equinox/pull/450) - `eqx top`: Support for analyzing space usage for event and view containers by category and/or stream [#450](https://github.com/jet/equinox/pull/450) +- `eqx destroy`: Support for deleting the items(documents) underlying a category/stream/arbitrary `WHERE` clause [#450](https://github.com/jet/equinox/pull/450) ### Changed diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 69dfee33c..fa71fd530 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -183,6 +183,7 @@ and [] TopParameters = | [] StreamName of string | [] CategoryName of string | [] CategoryLike of string + | [] CustomFilter of sql: string | [] Streams | [] TsOrder | [] Limit of int @@ -192,7 +193,8 @@ and [] TopParameters = member a.Usage = a |> function | StreamName _ -> "Specify stream name to match against `p`, e.g. `$UserServices-f7c1ce63389a45bdbea1cccebb1b3c8a`." | CategoryName _ -> "Specify category name to match against `p`, e.g. `$UserServices`." - | CategoryLike _ -> "Specify category name to match against `p` as a Cosmos LIKE expression (with `%` as wildcard, e.g. `$UserServices-%`." + | CategoryLike _ -> "Specify category name to match against `p` as a Cosmos LIKE expression (with `%` as wildcard, e.g. `$UserServices-%`)." + | CustomFilter _ -> "Specify a custom filter, referencing the document as `c.` (e.g. `'c.p LIKE \"test-%\" AND c._ts < 1717138092'`)" | Streams -> "Stream level stats" | TsOrder -> "Retrieve data in `_ts` ORDER (generally has significant RU impact). Default: Use continuation tokens" | Sort _ -> "Sort order for results" @@ -201,14 +203,13 @@ and [] TopParameters = and Order = Name | Items | Events | Unfolds | Size | EventSize | UnfoldSize | InflateSize | CorrCauseSize and TopArguments(p: ParseResults) = member val Criteria = - match p.TryGetResult TopParameters.StreamName, p.TryGetResult TopParameters.CategoryName, p.TryGetResult TopParameters.CategoryLike with - | Some sn, None, None -> Criteria.SingleStream sn - | Some _, Some _, _ - | Some _, _, Some _ -> p.Raise "StreamName and CategoryLike/CategoryName are mutually exclusive" - | None, Some cn, None -> Criteria.CatName cn - | None, None, Some cl -> Criteria.CatLike cl - | None, None, None -> Criteria.Unfiltered - | None, Some _, Some _ -> p.Raise "CategoryLike and CategoryName are mutually exclusive" + match p.TryGetResult TopParameters.StreamName, p.TryGetResult TopParameters.CategoryName, p.TryGetResult TopParameters.CategoryLike, p.TryGetResult TopParameters.CustomFilter with + | None, None, None, None -> Criteria.Unfiltered + | Some sn, None, None, None -> Criteria.SingleStream sn + | None, Some cn, None, None -> Criteria.CatName cn + | None, None, Some cl, None -> Criteria.CatLike cl + | None, None, None, Some filter -> Criteria.Custom filter + | _ -> p.Raise "StreamName/CategoryLike/CategoryName/CustomFilter are mutually exclusive" member val CosmosArgs = p.GetResult TopParameters.Cosmos |> Store.Cosmos.Arguments member val StreamLevel = p.Contains Streams member val Count = p.GetResult(Limit, 100) @@ -217,7 +218,7 @@ and TopArguments(p: ParseResults) = member x.StreamCount = p.GetResult(Limit, x.Count * 10) member x.Connect() = match Store.Cosmos.config Log.Logger (None, true) x.CosmosArgs with | Store.Config.Cosmos (cc, _, _) -> cc.Container - | _ -> failwith "Top requires Cosmos" + | _ -> p.Raise "Top requires Cosmos" member x.Execute(sql) = let container = x.Connect() let qd = Microsoft.Azure.Cosmos.QueryDefinition sql let qo = Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = x.CosmosArgs.QueryMaxItems) @@ -228,34 +229,33 @@ and [] DestroyParameters = | [] CategoryLike of string | [] CustomFilter of sql: string | [] Force + | [] Parallelism of dop: int | [] Cosmos of ParseResults interface IArgParserTemplate with member a.Usage = a |> function | StreamName _ -> "Specify stream name to match against `p`, e.g. `$UserServices-f7c1ce63389a45bdbea1cccebb1b3c8a`." | CategoryName _ -> "Specify category name to match against `p`, e.g. `$UserServices`." - | CategoryLike _ -> "Specify category name to match against `p` as a Cosmos LIKE expression (with `%` as wildcard, e.g. `$UserServices-%`." + | CategoryLike _ -> "Specify category name to match against `p` as a Cosmos LIKE expression (with `%` as wildcard, e.g. `$UserServices-%`)." | CustomFilter _ -> "Specify a custom filter, referencing the document as `c.` (e.g. `'c.p LIKE \"test-%\" AND c._ts < 1717138092'`)" | Force -> "Actually delete the documents (default is a dry run, reporting what would be deleted)" + | Parallelism _ -> "Number of concurrent delete requests permitted to run in parallel. Default: 32" | Cosmos _ -> "Parameters for CosmosDB." and DestroyArguments(p: ParseResults) = member val Criteria = match p.TryGetResult StreamName, p.TryGetResult CategoryName, p.TryGetResult CategoryLike, p.TryGetResult CustomFilter with + | None, None, None, None -> p.Raise "Category, stream name, or custom SQL must be supplied" | Some sn, None, None, None -> Criteria.SingleStream sn - | Some _, Some _, _, None - | Some _, _, Some _, None -> p.Raise "StreamName and CategoryLike/CategoryName are mutually exclusive" | None, Some cn, None, None -> Criteria.CatName cn | None, None, Some cl, None -> Criteria.CatLike cl | None, None, None, Some filter -> Criteria.Custom filter - | _, _, _, Some _ -> p.Raise "Custom SQL and Category/Stream name settings are mutually exclusive" - | None, None, None, None -> failwith "Category, stream name, or custom SQL must be supplied" - | None, Some _, Some _, None -> p.Raise "CategoryLike and CategoryName are mutually exclusive" + | _ -> p.Raise "StreamName/CategoryLike/CategoryName/CustomFilter are mutually exclusive" member val CosmosArgs = p.GetResult DestroyParameters.Cosmos |> Store.Cosmos.Arguments member val DryRun = p.Contains Force |> not - member val Dop = 4 + member val Dop = p.GetResult(Parallelism, 32) member val StatsInterval = TimeSpan.FromSeconds 30 member x.Connect() = match Store.Cosmos.config Log.Logger (None, true) x.CosmosArgs with | Store.Config.Cosmos (cc, _, _) -> cc.Container - | _ -> failwith "Destroy requires Cosmos" + | _ -> p.Raise "Destroy requires Cosmos" and SnEventsUnfolds = { p: string; id: string; es: int; us: int } and [] DumpParameters = | [] Stream of FsCodec.StreamName @@ -415,7 +415,7 @@ module CosmosStats = log.Information("Computing {measures} ({mode})", Seq.map render ops, (if inParallel then "in parallel" else "serially")) ops |> Seq.map (fun (name, sql) -> async { let! res = Microsoft.Azure.Cosmos.QueryDefinition sql - |> container.GetItemQueryIterator + |> container.GetItemQueryIterator |> Query.enum_ log container "Stat" null LogEventLevel.Debug |> TaskSeq.head |> Async.AwaitTaskCorrect match name with | "Oldest" | "Newest" -> log.Information("{stat,-10}: {result,13} ({d:u})", name, res, DateTime.UnixEpoch.AddSeconds(float res)) @@ -577,7 +577,7 @@ module CosmosTop = let run (a: TopArguments) = task { let sw = System.Diagnostics.Stopwatch.StartNew() let pageStreams, accStreams = System.Collections.Generic.HashSet(), System.Collections.Generic.HashSet() - let mutable accI, accE, accU, accRus, accRds, accOds, accBytes = 0L, 0L, 0L, 0., 0L, 0L, 0L + let mutable accI, accE, accU, accRus, accRds, accOds, accBytes, accParse = 0L, 0L, 0L, 0., 0L, 0L, 0L, TimeSpan.Zero let s = System.Collections.Generic.HashSet() let group = if a.StreamLevel then id else StreamName.categoryName try for rtt, rc, items, rdc, rds, ods in a.Execute(sql a) |> Query.enum__ do @@ -596,14 +596,15 @@ module CosmosTop = pageStreams.Clear() accI <- accI + int64 pageI; accE <- accE + int64 pageE; accU <- accU + int64 pageU accRus <- accRus + rc; accRds <- accRds + int64 rds; accOds <- accOds + int64 ods; accBytes <- accBytes + pageB + accParse <- accParse + sw.Elapsed finally let accCats = (if a.StreamLevel then s |> Seq.map _.key else accStreams) |> Seq.map group |> System.Collections.Generic.HashSet |> _.Count let accStreams = if a.StreamLevel then s.Count else accStreams.Count let iBytes, cBytes = s |> Seq.sumBy _.iBytes, s |> Seq.sumBy _.cBytes let giB x = miB x / 1024. - Log.Information("TOTALS {count:N0}i {cats:N0}c {streams:N0}s {es:N0}e {us:N0}u read {rg:f1}GiB output {og:f1}GiB JSON {tg:f1}GiB D+M(inflated) {ig:f1}GiB C+C {cm:f2}MiB {ru:N2}RU {s:N1}s", - accI, accCats, accStreams, accE, accU, giB accRds, giB accOds, giB accBytes, giB iBytes, miB cBytes, accRus, sw.Elapsed.TotalSeconds) + Log.Information("TOTALS {count:N0}i {cats:N0}c {streams:N0}s {es:N0}e {us:N0}u read {rg:f1}GiB output {og:f1}GiB JSON {tg:f1}GiB D+M(inflated) {ig:f1}GiB C+C {cm:f2}MiB Parse {ps:N3}s Total {ru:N2}RU {s:N1}s", + accI, accCats, accStreams, accE, accU, giB accRds, giB accOds, giB accBytes, giB iBytes, miB cBytes, accParse.TotalSeconds, accRus, sw.Elapsed.TotalSeconds) let sort: Parser.Stat seq -> Parser.Stat seq = a.Order |> function | Order.Name -> Seq.sortBy _.key @@ -616,7 +617,7 @@ module CosmosTop = | Order.InflateSize -> Seq.sortByDescending _.iBytes | Order.CorrCauseSize -> Seq.sortByDescending _.cBytes let render (x: Parser.Stat) = - Log.Information("{count,7}i {tm,6:N2}MiB E{events,7} {em,7:N1} U{unfolds,7} {um,6:N1} D+M{dm,6:N1} C+C{cm,5:N1} {key}", + Log.Information("{count,8}i {tm,7:N2}MiB E{events,8} {em,7:N1} U{unfolds,8} {um,7:N1} D+M{dm,7:N1} C+C{cm,6:N1} {key}", x.count, miB x.bytes, x.events, miB x.eBytes, x.unfolds, miB x.uBytes, miB x.iBytes, miB x.cBytes, x.key) if a.StreamLevel then let collapsed = s |> Seq.groupBy (_.key >> StreamName.categoryName) |> Seq.map (fun (cat, xs) -> { (xs |> Seq.reduce _.Merge) with key = cat }) From 6014781905b4f81b34b141f493722b826c79e5b4 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Fri, 28 Jun 2024 15:57:46 +0100 Subject: [PATCH 38/56] Extend readme --- README.md | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/README.md b/README.md index 3bcf739f5..ae6572f18 100644 --- a/README.md +++ b/README.md @@ -430,6 +430,78 @@ While Equinox is implemented in F#, and F# is a great fit for writing event-sour # > Page 2903s, 601u, 3188e 107.53RU 3.1s 4.0MiB age 0004.05:24:51 {} # > Page 2638s, 316u, 3019e 93.09RU 2.5s 3.4MiB age 0000.05:08:38 {} # > TOTALS 11c, 206,356s, 7,886.75RU R/W 290.4/290.4MiB 225.3s {} + + # Prepare a breakdown of which categories are using the most capacity within the store + eqx -Q top cosmos -d db -c $EQUINOX_COSMOS_CONTAINER -b 9999 + # Page 3276>3276i 3276s 0e 3991u 4.00>4.00<4.22MiB 103.74RU 3.5s D+M 5.1 C+C 0.00 201ms age 0000.00:33:13 {} + # Page 3177>3177i 3177s 0e 4593u 4.00>4.01<4.20MiB 105.22RU 3.2s D+M 4.7 C+C 0.00 146ms age 0000.02:23:48 {} + # Page 2708>2708i 2708s 0e 5044u 4.00>4.00<4.19MiB 105.76RU 3.4s D+M 4.5 C+C 0.00 84ms age 0002.23:10:55 {} + ... + # Page 4334>4334i 4334s 0e 5038u 4.00>4.00<4.19MiB 112.59RU 2.9s D+M 4.2 C+C 0.00 109ms age 0000.00:00:59 {} + # Page 1637>1637i 1637s 0e 2939u 2.40>2.41<2.52MiB 64.12RU 1.7s D+M 2.5 C+C 0.00 39ms age 0000.00:18:03 {} + # TOTALS 47,200i 9c 47,200s 0e 79,262u read 0.1GiB output 0.1GiB JSON 0.1GiB D+M(inflated) 0.1GiB C+C 0.00MiB Parse 1.516s Total 1,750.73RU 54.2s {} + # 24064i 40.75MiB E 0 0.0 U 48128 33.6 D+M 35.0 C+C 0.0 $Friend {} + # 6372i 13.18MiB E 0 0.0 U 12744 11.4 D+M 23.9 C+C 0.0 $Tenant {} + # 6374i 5.41MiB E 0 0.0 U 6374 3.6 D+M 5.4 C+C 0.0 $Role0 {} + # 5992i 5.09MiB E 0 0.0 U 5992 3.4 D+M 5.1 C+C 0.0 $Role {} + # 1574i 1.95MiB E 0 0.0 U 1574 1.5 D+M 2.0 C+C 0.0 $Permission {} + # 1575i 1.79MiB E 0 0.0 U 3150 1.3 D+M 1.2 C+C 0.0 $User {} + # 445i 0.51MiB E 0 0.0 U 483 0.4 D+M 0.8 C+C 0.0 $Invoice3 {} + # 410i 0.46MiB E 0 0.0 U 423 0.3 D+M 0.8 C+C 0.0 $Invoice2 {} + # 394i 0.44MiB E 0 0.0 U 394 0.3 D+M 0.7 C+C 0.0 $Invoice {} + + # Drill into the Friend data (different test data to preceding article) + eqx top -cn '$Friend' cosmos -d db -c $EQUINOX_COSMOS_CONTAINER -b 9999 + # Page 4787>4787i 4787s 0e 4787u 4.00>4.00<4.19MiB 218.54RU 3.6s D+M 4.5 C+C 0.00 259ms age 0013.22:52:15 {} + # Page 4955>4955i 4955s 0e 4955u 4.00>4.00<4.19MiB 200.20RU 3.2s D+M 4.1 C+C 0.00 202ms age 0013.22:52:18 {} + # Page 4715>4715i 4715s 0e 4715u 4.00>4.00<4.21MiB 201.26RU 3.2s D+M 4.4 C+C 0.00 145ms age 0013.22:52:22 {} + # Page 4884>4884i 4884s 0e 4884u 4.00>4.00<4.20MiB 198.97RU 3.2s D+M 4.1 C+C 0.00 95ms age 0013.22:52:31 {} + # Page 4620>4620i 4620s 0e 4620u 4.00>4.00<4.20MiB 194.76RU 3.0s D+M 4.7 C+C 0.00 140ms age 0013.22:52:28 {} + # Page 4840>4840i 4840s 0e 4840u 4.00>4.00<4.19MiB 198.43RU 3.2s D+M 4.2 C+C 0.00 136ms age 0013.22:52:34 {} + # Page 4791>4791i 4791s 0e 4791u 4.00>4.00<4.21MiB 200.20RU 3.0s D+M 4.2 C+C 0.00 137ms age 0014.02:23:24 {} + # Page 3906>3906i 3906s 0e 3906u 3.01>3.02<3.15MiB 158.28RU 2.6s D+M 2.9 C+C 0.00 142ms age 0013.23:13:51 {} + # TOTALS 37,498i 1c 37,498s 0e 37,498u read 0.0GiB output 0.0GiB JSON 0.0GiB D+M(inflated) 0.0GiB C+C 0.00MiB Parse 1.264s Total 1,570.64RU 30.0s {} + # 37498i 32.55MiB E 0 0.1 U 37498 21.7 D+M 33.2 C+C 0.0 $Friend {} + + # DRY RUN of deleting (note no `-f` supplied) + eqx destroy -cn '$Friend' cosmos -d db -c $EQUINOX_COSMOS_CONTAINER -b 9999 + # W Dry-run of deleting items based on SELECT c.p, c.id, ARRAYLENGTH(c.e) AS es, ARRAYLENGTH(c.u) AS us FROM c WHERE c.p LIKE "$Friend%" {} + # I Page 9999> 9999i 9999s 0e 9999u 8.21>0.76 415.07RRU 1.4s 0.00WRU/s 0.0s {} + # I Page 9999> 9999i 9999s 0e 9999u 8.48>0.76 404.70RRU 0.8s 0.00WRU/s 0.0s {} + # I Page 9999> 9999i 9999s 0e 9999u 8.32>0.76 395.36RRU 1.1s 0.00WRU/s 0.0s {} + # I Page 7501> 7501i 7501s 0e 7501u 6.01>0.57 299.60RRU 1.0s 0.00WRU/s 0.0s {} + # I TOTALS 37,498i 1c 37,498s 0e 37,498u read 31.0MiB output 2.9MiB 1,514.73RRU Avg 0.00WRU/s Delete 0.00WRU Total 7.8s {} + + # Whack them (note the `--force` supplied) + eqx destroy -cn '$Friend' --force cosmos -d db -c $EQUINOX_COSMOS_CONTAINER -b 9999 + # W DESTROYING all Items WHERE c.p LIKE "$ResourceRole%" {} + # I .. Deleted 6347i 6347s 0e 6347u 1,671.52WRU/s 30.0s {} + # I Page 9999> 9999i 9999s 0e 9999u 8.21>0.76 415.17RRU 1.2s 1,678.54WRU/s 47.2s {} + # I .. Deleted 6363i 6363s 0e 6363u 1,703.29WRU/s 30.0s {} + # I Page 9999> 9999i 9999s 0e 9999u 8.48>0.76 404.70RRU 1.1s 1,685.49WRU/s 47.8s {} + # I .. Deleted 6001i 6001s 0e 6001u 1,571.94WRU/s 30.0s {} + # I Page 9999> 9999i 9999s 0e 9999u 8.32>0.76 395.36RRU 1.0s 1,582.18WRU/s 50.1s {} + ^C + + # Get impatient; up the concurrency (-w 192) from the default 32 (note the `--force` supplied) + eqx destroy -cn '$Friend' --force -w 192 cosmos -d db -c $EQUINOX_COSMOS_CONTAINER -b 9999 + # W DESTROYING all Items WHERE c.p LIKE "$ResourceRole%" {} + # I Page 3946> 3946i 3946s 0e 3946u 3.05>0.30 176.23RRU 0.8s 5,107.71WRU/s 6.1s {} + # I TOTALS 3,946i 1c 3,946s 0e 3,946u read 3.0MiB output 0.3MiB 176.23RRU Avg 3,058.48WRU/s Delete 31,360.10WRU Total 10.3s {} + + # Analyze the largest streams in the '$Permission' category + eqx top -S -cl '$Perm%' cosmos -d db -c $EQUINOX_COSMOS_CONTAINER -b 9999 + # I Page 254> 254i 254s 0e 254u 4.33>4.33<4.65MiB 349.76RU 3.9s D+M 8.2 C+C 0.00 105ms age 0013.23:34:02 {} + # I Page 1671>1671i 1671s 0e 1671u 2.39>2.40<2.54MiB 91.57RU 2.1s D+M 2.9 C+C 0.00 99ms age 0013.23:34:07 {} + # I TOTALS 1,925i 1,925c 1,925s 0e 1,925u read 0.0GiB output 0.0GiB JSON 0.0GiB D+M(inflated) 0.0GiB C+C 0.00MiB Parse 0.207s Total 441.33RU 9.4s {} + # I 1925i 7.19MiB E 0 0.0 U 1925 6.6 D+M 11.1 C+C 0.0 $Permission {} + # I 1i 1.75MiB E 0 0.0 U 1 1.8 D+M 3.1 C+C 0.0 $Permission-5292b7cd524d509bb969bd82abf39461 {} + # I 1i 1.38MiB E 0 0.0 U 1 1.4 D+M 2.5 C+C 0.0 $Permission-244b72fb0238595494b5cb3f9bd1abf7 {} + # I 1i 0.79MiB E 0 0.0 U 1 0.8 D+M 1.5 C+C 0.0 $Permission-68a13e8398b352c5b8e22ec18ab2bbb6 {} + # I 1i 0.57MiB E 0 0.0 U 1 0.6 D+M 1.1 C+C 0.0 $Permission-ea4d1f46014a5bf6bbd97d3ec5723266 {} + # I 1i 0.13MiB E 0 0.0 U 1 0.1 D+M 0.2 C+C 0.0 $Permission-65b58d132ff857bb81b08a5bb69732d2 {} + # I 1i 0.02MiB E 0 0.0 U 1 0.0 D+M 0.0 C+C 0.0 $Permission-a7bcc3370ad15ae68041745ca55166cf {} + # I 1i 0.02MiB E 0 0.0 U 1 0.0 D+M 0.0 C+C 0.0 $Permission-03032ccf597857d9aa9c64b10288af8c {} ``` 6. Use `propulsion sync` tool to run a CosmosDB ChangeFeedProcessor From 8574d8b7c6439a18dd0db391bff8cc2e8c86b6f2 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Tue, 2 Jul 2024 13:45:30 +0100 Subject: [PATCH 39/56] fix category counting --- tools/Equinox.Tool/Program.fs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index fa71fd530..5113cf449 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -599,7 +599,7 @@ module CosmosTop = accParse <- accParse + sw.Elapsed finally - let accCats = (if a.StreamLevel then s |> Seq.map _.key else accStreams) |> Seq.map group |> System.Collections.Generic.HashSet |> _.Count + let accCats = (if a.StreamLevel then s |> Seq.map _.key else accStreams) |> Seq.map StreamName.categoryName |> System.Collections.Generic.HashSet |> _.Count let accStreams = if a.StreamLevel then s.Count else accStreams.Count let iBytes, cBytes = s |> Seq.sumBy _.iBytes, s |> Seq.sumBy _.cBytes let giB x = miB x / 1024. From 355cf6b88a3d46f9a4d8dbe94e7f5aa3365c9f62 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Wed, 3 Jul 2024 21:52:28 +0100 Subject: [PATCH 40/56] Add Stream Count --- tools/Equinox.Tool/Program.fs | 42 ++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 5113cf449..e233d5301 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -599,30 +599,32 @@ module CosmosTop = accParse <- accParse + sw.Elapsed finally - let accCats = (if a.StreamLevel then s |> Seq.map _.key else accStreams) |> Seq.map StreamName.categoryName |> System.Collections.Generic.HashSet |> _.Count - let accStreams = if a.StreamLevel then s.Count else accStreams.Count + let accCats = (if a.StreamLevel then s |> Seq.map _.key else accStreams) |> Seq.map StreamName.categoryName |> Seq.distinct |> Seq.length let iBytes, cBytes = s |> Seq.sumBy _.iBytes, s |> Seq.sumBy _.cBytes let giB x = miB x / 1024. Log.Information("TOTALS {count:N0}i {cats:N0}c {streams:N0}s {es:N0}e {us:N0}u read {rg:f1}GiB output {og:f1}GiB JSON {tg:f1}GiB D+M(inflated) {ig:f1}GiB C+C {cm:f2}MiB Parse {ps:N3}s Total {ru:N2}RU {s:N1}s", - accI, accCats, accStreams, accE, accU, giB accRds, giB accOds, giB accBytes, giB iBytes, miB cBytes, accParse.TotalSeconds, accRus, sw.Elapsed.TotalSeconds) - - let sort: Parser.Stat seq -> Parser.Stat seq = a.Order |> function - | Order.Name -> Seq.sortBy _.key - | Order.Size -> Seq.sortByDescending _.bytes - | Order.Items -> Seq.sortByDescending _.count - | Order.Events -> Seq.sortByDescending _.events - | Order.Unfolds -> Seq.sortByDescending _.unfolds - | Order.EventSize -> Seq.sortByDescending _.eBytes - | Order.UnfoldSize -> Seq.sortByDescending _.uBytes - | Order.InflateSize -> Seq.sortByDescending _.iBytes - | Order.CorrCauseSize -> Seq.sortByDescending _.cBytes - let render (x: Parser.Stat) = - Log.Information("{count,8}i {tm,7:N2}MiB E{events,8} {em,7:N1} U{unfolds,8} {um,7:N1} D+M{dm,7:N1} C+C{cm,6:N1} {key}", - x.count, miB x.bytes, x.events, miB x.eBytes, x.unfolds, miB x.uBytes, miB x.iBytes, miB x.cBytes, x.key) + accI, accCats, accStreams.Count, accE, accU, giB accRds, giB accOds, giB accBytes, giB iBytes, miB cBytes, accParse.TotalSeconds, accRus, sw.Elapsed.TotalSeconds) + + let sortBy (f: 'x -> Parser.Stat) = a.Order |> function + | Order.Name -> Seq.sortBy (f >> _.key) + | Order.Size -> Seq.sortByDescending (f >> _.bytes) + | Order.Items -> Seq.sortByDescending (f >> _.count) + | Order.Events -> Seq.sortByDescending (f >> _.events) + | Order.Unfolds -> Seq.sortByDescending (f >> _.unfolds) + | Order.EventSize -> Seq.sortByDescending (f >> _.eBytes) + | Order.UnfoldSize -> Seq.sortByDescending (f >> _.uBytes) + | Order.InflateSize -> Seq.sortByDescending (f >> _.iBytes) + | Order.CorrCauseSize -> Seq.sortByDescending (f >> _.cBytes) + let streamOrCatTemplate s = "{count,8}i {tm,7:N2}MiB" + s + " E{events,8} {em,7:N1} U{unfolds,8} {um,7:N1} D+M{dm,7:N1} C+C{cm,6:N1} {key}" if a.StreamLevel then - let collapsed = s |> Seq.groupBy (_.key >> StreamName.categoryName) |> Seq.map (fun (cat, xs) -> { (xs |> Seq.reduce _.Merge) with key = cat }) - sort collapsed |> Seq.truncate a.Count |> Seq.iter render - sort s |> Seq.truncate (if a.StreamLevel then a.StreamCount else a.Count) |> Seq.iter render } + let renderWithStreamCount (s: int, x: Parser.Stat) = + Log.Information(streamOrCatTemplate " {streams,8}s", x.count, miB x.bytes, s, x.events, miB x.eBytes, x.unfolds, miB x.uBytes, miB x.iBytes, miB x.cBytes, x.key) + s |> Seq.groupBy (_.key >> StreamName.categoryName) + |> Seq.map (fun (cat, streams) -> Seq.length streams, { (streams |> Seq.reduce _.Merge) with key = cat }) + |> sortBy snd |> Seq.truncate a.Count |> Seq.iter renderWithStreamCount + let renderWithoutStreamCount (x: Parser.Stat) = + Log.Information(streamOrCatTemplate "", x.count, miB x.bytes, x.events, miB x.eBytes, x.unfolds, miB x.uBytes, miB x.iBytes, miB x.cBytes, x.key) + s |> sortBy id |> Seq.truncate (if a.StreamLevel then a.StreamCount else a.Count) |> Seq.iter renderWithoutStreamCount } module CosmosDestroy = From ed87ae00bae23fbbab856489012dc5f91de48355 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Wed, 3 Jul 2024 22:02:46 +0100 Subject: [PATCH 41/56] Default 9999 --- samples/Infrastructure/Store.fs | 4 ++-- tools/Equinox.Tool/Program.fs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/Infrastructure/Store.fs b/samples/Infrastructure/Store.fs index 0a76509fa..28e8af6a7 100644 --- a/samples/Infrastructure/Store.fs +++ b/samples/Infrastructure/Store.fs @@ -118,12 +118,12 @@ module Cosmos = match connect log a with | (connector, databaseId, containerId), None -> let client = connector.Connect(databaseId, [| containerId |]) |> Async.RunSynchronously - CosmosStoreContext(client, databaseId, containerId, a.TipMaxEvents, tipMaxJsonLength = a.TipMaxJsonLength, queryMaxItems = a.QueryMaxItems) + CosmosStoreContext(client, databaseId, containerId, a.TipMaxEvents, tipMaxJsonLength = a.TipMaxJsonLength, queryMaxItems = a.QueryMaxItems 10) | (connector, databaseId, containerId), Some (aConnector, aDatabaseId, aContainerId) -> let cosmosClient = connector.CreateAndInitialize(databaseId, [| containerId |]) |> Async.RunSynchronously let archiveCosmosClient = aConnector.CreateAndInitialize(aDatabaseId, [| aContainerId |]) |> Async.RunSynchronously let client = CosmosStoreClient(cosmosClient, archiveCosmosClient) - CosmosStoreContext(client, databaseId, containerId, a.TipMaxEvents, tipMaxJsonLength = a.TipMaxJsonLength, queryMaxItems = a.QueryMaxItems, + CosmosStoreContext(client, databaseId, containerId, a.TipMaxEvents, tipMaxJsonLength = a.TipMaxJsonLength, queryMaxItems = a.QueryMaxItems 10, archiveDatabaseId = aDatabaseId, archiveContainerId = aContainerId) log.Information("CosmosStore Tip thresholds: {maxTipJsonLength}b {maxTipEvents}e Query paging {queryMaxItems} items", a.TipMaxJsonLength, a.TipMaxEvents, a.QueryMaxItems) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index e233d5301..c44f81078 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -221,7 +221,7 @@ and TopArguments(p: ParseResults) = | _ -> p.Raise "Top requires Cosmos" member x.Execute(sql) = let container = x.Connect() let qd = Microsoft.Azure.Cosmos.QueryDefinition sql - let qo = Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = x.CosmosArgs.QueryMaxItems) + let qo = Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = x.CosmosArgs.QueryMaxItemsOr 9999) container.GetItemQueryIterator(qd, requestOptions = qo) and [] DestroyParameters = | [] StreamName of string @@ -655,7 +655,7 @@ module CosmosDestroy = let container = a.Connect() let query = let qd = Microsoft.Azure.Cosmos.QueryDefinition sql - let qo = Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = a.CosmosArgs.QueryMaxItems) + let qo = Microsoft.Azure.Cosmos.QueryRequestOptions(MaxItemCount = a.CosmosArgs.QueryMaxItemsOr 9999) container.GetItemQueryIterator(qd, requestOptions = qo) let pageStreams, accStreams = System.Collections.Generic.HashSet(), System.Collections.Generic.HashSet() let mutable accI, accE, accU, accRus, accDelRu, accRds, accOds = 0L, 0L, 0L, 0., 0., 0L, 0L From 412cb5ee7dcb590d0b43ccbfb231532436727957 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Wed, 3 Jul 2024 23:02:34 +0100 Subject: [PATCH 42/56] Tidy --- README.md | 12 ++++----- samples/Infrastructure/Store.fs | 4 +-- tools/Equinox.Tool/Program.fs | 45 ++++++++++++++++++--------------- 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index ae6572f18..8339711ec 100644 --- a/README.md +++ b/README.md @@ -432,7 +432,7 @@ While Equinox is implemented in F#, and F# is a great fit for writing event-sour # > TOTALS 11c, 206,356s, 7,886.75RU R/W 290.4/290.4MiB 225.3s {} # Prepare a breakdown of which categories are using the most capacity within the store - eqx -Q top cosmos -d db -c $EQUINOX_COSMOS_CONTAINER -b 9999 + eqx -Q top cosmos -d db -c $EQUINOX_COSMOS_CONTAINER # Page 3276>3276i 3276s 0e 3991u 4.00>4.00<4.22MiB 103.74RU 3.5s D+M 5.1 C+C 0.00 201ms age 0000.00:33:13 {} # Page 3177>3177i 3177s 0e 4593u 4.00>4.01<4.20MiB 105.22RU 3.2s D+M 4.7 C+C 0.00 146ms age 0000.02:23:48 {} # Page 2708>2708i 2708s 0e 5044u 4.00>4.00<4.19MiB 105.76RU 3.4s D+M 4.5 C+C 0.00 84ms age 0002.23:10:55 {} @@ -451,7 +451,7 @@ While Equinox is implemented in F#, and F# is a great fit for writing event-sour # 394i 0.44MiB E 0 0.0 U 394 0.3 D+M 0.7 C+C 0.0 $Invoice {} # Drill into the Friend data (different test data to preceding article) - eqx top -cn '$Friend' cosmos -d db -c $EQUINOX_COSMOS_CONTAINER -b 9999 + eqx top -cn '$Friend' cosmos -d db -c $EQUINOX_COSMOS_CONTAINER # Page 4787>4787i 4787s 0e 4787u 4.00>4.00<4.19MiB 218.54RU 3.6s D+M 4.5 C+C 0.00 259ms age 0013.22:52:15 {} # Page 4955>4955i 4955s 0e 4955u 4.00>4.00<4.19MiB 200.20RU 3.2s D+M 4.1 C+C 0.00 202ms age 0013.22:52:18 {} # Page 4715>4715i 4715s 0e 4715u 4.00>4.00<4.21MiB 201.26RU 3.2s D+M 4.4 C+C 0.00 145ms age 0013.22:52:22 {} @@ -464,7 +464,7 @@ While Equinox is implemented in F#, and F# is a great fit for writing event-sour # 37498i 32.55MiB E 0 0.1 U 37498 21.7 D+M 33.2 C+C 0.0 $Friend {} # DRY RUN of deleting (note no `-f` supplied) - eqx destroy -cn '$Friend' cosmos -d db -c $EQUINOX_COSMOS_CONTAINER -b 9999 + eqx destroy -cn '$Friend' cosmos -d db -c $EQUINOX_COSMOS_CONTAINER # W Dry-run of deleting items based on SELECT c.p, c.id, ARRAYLENGTH(c.e) AS es, ARRAYLENGTH(c.u) AS us FROM c WHERE c.p LIKE "$Friend%" {} # I Page 9999> 9999i 9999s 0e 9999u 8.21>0.76 415.07RRU 1.4s 0.00WRU/s 0.0s {} # I Page 9999> 9999i 9999s 0e 9999u 8.48>0.76 404.70RRU 0.8s 0.00WRU/s 0.0s {} @@ -473,7 +473,7 @@ While Equinox is implemented in F#, and F# is a great fit for writing event-sour # I TOTALS 37,498i 1c 37,498s 0e 37,498u read 31.0MiB output 2.9MiB 1,514.73RRU Avg 0.00WRU/s Delete 0.00WRU Total 7.8s {} # Whack them (note the `--force` supplied) - eqx destroy -cn '$Friend' --force cosmos -d db -c $EQUINOX_COSMOS_CONTAINER -b 9999 + eqx destroy -cn '$Friend' --force cosmos -d db -c $EQUINOX_COSMOS_CONTAINER # W DESTROYING all Items WHERE c.p LIKE "$ResourceRole%" {} # I .. Deleted 6347i 6347s 0e 6347u 1,671.52WRU/s 30.0s {} # I Page 9999> 9999i 9999s 0e 9999u 8.21>0.76 415.17RRU 1.2s 1,678.54WRU/s 47.2s {} @@ -484,13 +484,13 @@ While Equinox is implemented in F#, and F# is a great fit for writing event-sour ^C # Get impatient; up the concurrency (-w 192) from the default 32 (note the `--force` supplied) - eqx destroy -cn '$Friend' --force -w 192 cosmos -d db -c $EQUINOX_COSMOS_CONTAINER -b 9999 + eqx destroy -cn '$Friend' --force -w 192 cosmos -d db -c $EQUINOX_COSMOS_CONTAINER # W DESTROYING all Items WHERE c.p LIKE "$ResourceRole%" {} # I Page 3946> 3946i 3946s 0e 3946u 3.05>0.30 176.23RRU 0.8s 5,107.71WRU/s 6.1s {} # I TOTALS 3,946i 1c 3,946s 0e 3,946u read 3.0MiB output 0.3MiB 176.23RRU Avg 3,058.48WRU/s Delete 31,360.10WRU Total 10.3s {} # Analyze the largest streams in the '$Permission' category - eqx top -S -cl '$Perm%' cosmos -d db -c $EQUINOX_COSMOS_CONTAINER -b 9999 + eqx top -S -cl '$Perm%' cosmos -d db -c $EQUINOX_COSMOS_CONTAINER # I Page 254> 254i 254s 0e 254u 4.33>4.33<4.65MiB 349.76RU 3.9s D+M 8.2 C+C 0.00 105ms age 0013.23:34:02 {} # I Page 1671>1671i 1671s 0e 1671u 2.39>2.40<2.54MiB 91.57RU 2.1s D+M 2.9 C+C 0.00 99ms age 0013.23:34:07 {} # I TOTALS 1,925i 1,925c 1,925s 0e 1,925u read 0.0GiB output 0.0GiB JSON 0.0GiB D+M(inflated) 0.0GiB C+C 0.00MiB Parse 0.207s Total 441.33RU 9.4s {} diff --git a/samples/Infrastructure/Store.fs b/samples/Infrastructure/Store.fs index 28e8af6a7..0a76509fa 100644 --- a/samples/Infrastructure/Store.fs +++ b/samples/Infrastructure/Store.fs @@ -118,12 +118,12 @@ module Cosmos = match connect log a with | (connector, databaseId, containerId), None -> let client = connector.Connect(databaseId, [| containerId |]) |> Async.RunSynchronously - CosmosStoreContext(client, databaseId, containerId, a.TipMaxEvents, tipMaxJsonLength = a.TipMaxJsonLength, queryMaxItems = a.QueryMaxItems 10) + CosmosStoreContext(client, databaseId, containerId, a.TipMaxEvents, tipMaxJsonLength = a.TipMaxJsonLength, queryMaxItems = a.QueryMaxItems) | (connector, databaseId, containerId), Some (aConnector, aDatabaseId, aContainerId) -> let cosmosClient = connector.CreateAndInitialize(databaseId, [| containerId |]) |> Async.RunSynchronously let archiveCosmosClient = aConnector.CreateAndInitialize(aDatabaseId, [| aContainerId |]) |> Async.RunSynchronously let client = CosmosStoreClient(cosmosClient, archiveCosmosClient) - CosmosStoreContext(client, databaseId, containerId, a.TipMaxEvents, tipMaxJsonLength = a.TipMaxJsonLength, queryMaxItems = a.QueryMaxItems 10, + CosmosStoreContext(client, databaseId, containerId, a.TipMaxEvents, tipMaxJsonLength = a.TipMaxJsonLength, queryMaxItems = a.QueryMaxItems, archiveDatabaseId = aDatabaseId, archiveContainerId = aContainerId) log.Information("CosmosStore Tip thresholds: {maxTipJsonLength}b {maxTipEvents}e Query paging {queryMaxItems} items", a.TipMaxJsonLength, a.TipMaxEvents, a.QueryMaxItems) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index c44f81078..8643c9669 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -599,32 +599,35 @@ module CosmosTop = accParse <- accParse + sw.Elapsed finally - let accCats = (if a.StreamLevel then s |> Seq.map _.key else accStreams) |> Seq.map StreamName.categoryName |> Seq.distinct |> Seq.length + let accC = (if a.StreamLevel then s |> Seq.map _.key else accStreams) |> Seq.map StreamName.categoryName |> Seq.distinct |> Seq.length + let accS = if a.StreamLevel then s.Count else accStreams.Count let iBytes, cBytes = s |> Seq.sumBy _.iBytes, s |> Seq.sumBy _.cBytes let giB x = miB x / 1024. - Log.Information("TOTALS {count:N0}i {cats:N0}c {streams:N0}s {es:N0}e {us:N0}u read {rg:f1}GiB output {og:f1}GiB JSON {tg:f1}GiB D+M(inflated) {ig:f1}GiB C+C {cm:f2}MiB Parse {ps:N3}s Total {ru:N2}RU {s:N1}s", - accI, accCats, accStreams.Count, accE, accU, giB accRds, giB accOds, giB accBytes, giB iBytes, miB cBytes, accParse.TotalSeconds, accRus, sw.Elapsed.TotalSeconds) - - let sortBy (f: 'x -> Parser.Stat) = a.Order |> function - | Order.Name -> Seq.sortBy (f >> _.key) - | Order.Size -> Seq.sortByDescending (f >> _.bytes) - | Order.Items -> Seq.sortByDescending (f >> _.count) - | Order.Events -> Seq.sortByDescending (f >> _.events) - | Order.Unfolds -> Seq.sortByDescending (f >> _.unfolds) - | Order.EventSize -> Seq.sortByDescending (f >> _.eBytes) - | Order.UnfoldSize -> Seq.sortByDescending (f >> _.uBytes) - | Order.InflateSize -> Seq.sortByDescending (f >> _.iBytes) - | Order.CorrCauseSize -> Seq.sortByDescending (f >> _.cBytes) - let streamOrCatTemplate s = "{count,8}i {tm,7:N2}MiB" + s + " E{events,8} {em,7:N1} U{unfolds,8} {um,7:N1} D+M{dm,7:N1} C+C{cm,6:N1} {key}" + Log.Information("TOTALS {cats:N0}c {streams:N0}s {count:N0}i {es:N0}e {us:N0}u Server read {rg:f1}GiB output {og:f1}GiB JSON {tg:f1}GiB D+M(inflated) {ig:f1}GiB C+C {cm:f2}MiB Parse {ps:N3}s Total {ru:N2}RU {s:N1}s", + accC, accS, accI, accE, accU, giB accRds, giB accOds, giB accBytes, giB iBytes, miB cBytes, accParse.TotalSeconds, accRus, sw.Elapsed.TotalSeconds) + + let sort: seq -> seq<_> = + match a.Order with + | Order.Name -> Seq.sortBy (snd >> _.key) + | Order.Size -> Seq.sortByDescending (snd >> _.bytes) + | Order.Items -> Seq.sortByDescending (snd >> _.count) + | Order.Events -> Seq.sortByDescending (snd >> _.events) + | Order.Unfolds -> Seq.sortByDescending (snd >> _.unfolds) + | Order.EventSize -> Seq.sortByDescending (snd >> _.eBytes) + | Order.UnfoldSize -> Seq.sortByDescending (snd >> _.uBytes) + | Order.InflateSize -> Seq.sortByDescending (snd >> _.iBytes) + | Order.CorrCauseSize -> Seq.sortByDescending (snd >> _.cBytes) + let inline streamOrCatTemplate s = "{count,8}i {tm,7:N2}MiB" + s + " E{events,8} {em,7:N1} U{unfolds,8} {um,7:N1} D+M{dm,7:N1} C+C{cm,6:N1} {key}" + let catTemplate, streamTemplate = streamOrCatTemplate " S{streams,8}", streamOrCatTemplate "" + let render (streams, x: Parser.Stat) = + if streams = 0 then Log.Information(streamTemplate, x.count, miB x.bytes, x.events, miB x.eBytes, x.unfolds, miB x.uBytes, miB x.iBytes, miB x.cBytes, x.key) + else Log.Information(catTemplate, x.count, miB x.bytes, streams, x.events, miB x.eBytes, x.unfolds, miB x.uBytes, miB x.iBytes, miB x.cBytes, x.key) if a.StreamLevel then - let renderWithStreamCount (s: int, x: Parser.Stat) = - Log.Information(streamOrCatTemplate " {streams,8}s", x.count, miB x.bytes, s, x.events, miB x.eBytes, x.unfolds, miB x.uBytes, miB x.iBytes, miB x.cBytes, x.key) s |> Seq.groupBy (_.key >> StreamName.categoryName) |> Seq.map (fun (cat, streams) -> Seq.length streams, { (streams |> Seq.reduce _.Merge) with key = cat }) - |> sortBy snd |> Seq.truncate a.Count |> Seq.iter renderWithStreamCount - let renderWithoutStreamCount (x: Parser.Stat) = - Log.Information(streamOrCatTemplate "", x.count, miB x.bytes, x.events, miB x.eBytes, x.unfolds, miB x.uBytes, miB x.iBytes, miB x.cBytes, x.key) - s |> sortBy id |> Seq.truncate (if a.StreamLevel then a.StreamCount else a.Count) |> Seq.iter renderWithoutStreamCount } + |> sort |> Seq.truncate a.Count + |> Seq.iter render + s |> Seq.map (fun x -> 0, x) |> sort |> Seq.truncate (if a.StreamLevel then a.StreamCount else a.Count) |> Seq.iter render } module CosmosDestroy = From 25f8240fbfc03337198ff917a96f0b3a0c8b8c1e Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Wed, 3 Jul 2024 23:06:58 +0100 Subject: [PATCH 43/56] Move Streams first --- tools/Equinox.Tool/Program.fs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 8643c9669..5ab89238f 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -617,11 +617,11 @@ module CosmosTop = | Order.UnfoldSize -> Seq.sortByDescending (snd >> _.uBytes) | Order.InflateSize -> Seq.sortByDescending (snd >> _.iBytes) | Order.CorrCauseSize -> Seq.sortByDescending (snd >> _.cBytes) - let inline streamOrCatTemplate s = "{count,8}i {tm,7:N2}MiB" + s + " E{events,8} {em,7:N1} U{unfolds,8} {um,7:N1} D+M{dm,7:N1} C+C{cm,6:N1} {key}" - let catTemplate, streamTemplate = streamOrCatTemplate " S{streams,8}", streamOrCatTemplate "" + let streamTemplate = "{count,8}i {tm,7:N2}MiB E{events,8} {em,7:N1} U{unfolds,8} {um,7:N1} D+M{dm,7:N1} C+C{cm,6:N1} {key}" + let catTemplate = "S{streams,8} " + streamTemplate let render (streams, x: Parser.Stat) = if streams = 0 then Log.Information(streamTemplate, x.count, miB x.bytes, x.events, miB x.eBytes, x.unfolds, miB x.uBytes, miB x.iBytes, miB x.cBytes, x.key) - else Log.Information(catTemplate, x.count, miB x.bytes, streams, x.events, miB x.eBytes, x.unfolds, miB x.uBytes, miB x.iBytes, miB x.cBytes, x.key) + else Log.Information(catTemplate, streams, x.count, miB x.bytes, x.events, miB x.eBytes, x.unfolds, miB x.uBytes, miB x.iBytes, miB x.cBytes, x.key) if a.StreamLevel then s |> Seq.groupBy (_.key >> StreamName.categoryName) |> Seq.map (fun (cat, streams) -> Seq.length streams, { (streams |> Seq.reduce _.Merge) with key = cat }) From 58ba07114bf24c4f682c200e2ed614e5ff8daa8d Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Wed, 3 Jul 2024 23:24:55 +0100 Subject: [PATCH 44/56] Tidy --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b47364837..39cd0e05f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ The `Unreleased` section name is replaced by the expected version of next releas ### Changed -- `Equinox.*Store`,`Equinox.*Store.Prometheus`: Pin `Equinox` dependencies to `[4.0.0, 5.0.0)`] [#448](https://github.com/jet/equinox/pull/448) +- `Equinox.*Store`,`Equinox.*Store.Prometheus`: Pin `Equinox` dependencies to `[4.0.2, 5.0.0)`] [#448](https://github.com/jet/equinox/pull/448) - `Equinox.CosmosStore`: Update `System.Text.Json` dep to `6.0.10` per [CVE-2024-43485](https://github.com/advisories/GHSA-8g4q-xg66-9fp4) [#470](https://github.com/jet/equinox/pull/470) - `Equinox.CosmosStore`: Minimum `Microsoft.Azure.Cosmos` requirement updated to `3.43.0` to avail of integrated `System.Text.Json` support [#467](https://github.com/jet/equinox/pull/467) - `Equinox.CosmosStore.CosmosStoreConnector`: Removed mandatory `requestTimeout` argument [#467](https://github.com/jet/equinox/pull/467) From 97659ca2fabf988d49130d13ac7188df5aedbcae Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Thu, 18 Jul 2024 09:30:06 +0100 Subject: [PATCH 45/56] fix Limits logic --- tools/Equinox.Tool/Program.fs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 5ab89238f..c9b737f4a 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -186,7 +186,8 @@ and [] TopParameters = | [] CustomFilter of sql: string | [] Streams | [] TsOrder - | [] Limit of int + | [] CategoryLimit of int + | [] StreamsLimit of int | [] Sort of Order | [] Cosmos of ParseResults interface IArgParserTemplate with @@ -198,7 +199,8 @@ and [] TopParameters = | Streams -> "Stream level stats" | TsOrder -> "Retrieve data in `_ts` ORDER (generally has significant RU impact). Default: Use continuation tokens" | Sort _ -> "Sort order for results" - | Limit _ -> "Number of categories to limit output to (Streams limit is 10x the category limit). Default: 100" + | CategoryLimit _ -> "Number of categories to limit output to. Default: unlimited." + | StreamsLimit _ -> "Number of streams to limit output to. Default: 50" | Cosmos _ -> "Parameters for CosmosDB." and Order = Name | Items | Events | Unfolds | Size | EventSize | UnfoldSize | InflateSize | CorrCauseSize and TopArguments(p: ParseResults) = @@ -212,10 +214,10 @@ and TopArguments(p: ParseResults) = | _ -> p.Raise "StreamName/CategoryLike/CategoryName/CustomFilter are mutually exclusive" member val CosmosArgs = p.GetResult TopParameters.Cosmos |> Store.Cosmos.Arguments member val StreamLevel = p.Contains Streams - member val Count = p.GetResult(Limit, 100) + member val CategoriesLimit = p.GetResult(CategoryLimit, Int32.MaxValue) member val TsOrder = p.Contains TsOrder member val Order = p.GetResult(Sort, Order.Size) - member x.StreamCount = p.GetResult(Limit, x.Count * 10) + member val StreamsLimit = p.GetResult(StreamsLimit, 50) member x.Connect() = match Store.Cosmos.config Log.Logger (None, true) x.CosmosArgs with | Store.Config.Cosmos (cc, _, _) -> cc.Container | _ -> p.Raise "Top requires Cosmos" @@ -625,9 +627,9 @@ module CosmosTop = if a.StreamLevel then s |> Seq.groupBy (_.key >> StreamName.categoryName) |> Seq.map (fun (cat, streams) -> Seq.length streams, { (streams |> Seq.reduce _.Merge) with key = cat }) - |> sort |> Seq.truncate a.Count + |> sort |> Seq.truncate a.CategoriesLimit |> Seq.iter render - s |> Seq.map (fun x -> 0, x) |> sort |> Seq.truncate (if a.StreamLevel then a.StreamCount else a.Count) |> Seq.iter render } + s |> Seq.map (fun x -> 0, x) |> sort |> Seq.truncate (if a.StreamLevel then a.StreamsLimit else a.CategoriesLimit) |> Seq.iter render } module CosmosDestroy = From 241fcbfdb5da987bd7cbc96d204766f8a2ed6faf Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Wed, 3 Jul 2024 23:33:45 +0100 Subject: [PATCH 46/56] Release 4.1.0-alpha.14 --- CHANGELOG.md | 10 ++++++++-- README.md | 2 +- src/Equinox.CosmosStore/Equinox.CosmosStore.fsproj | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39cd0e05f..67e835979 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ The `Unreleased` section name is replaced by the expected version of next releas ## [Unreleased] +### Added +### Changed +### Removed +### Fixed + ## 4.1.0 - 2024/5 @@ -27,7 +32,7 @@ The `Unreleased` section name is replaced by the expected version of next releas ### Changed -- `Equinox.*Store`,`Equinox.*Store.Prometheus`: Pin `Equinox` dependencies to `[4.0.2, 5.0.0)`] [#448](https://github.com/jet/equinox/pull/448) +- `Equinox.*Store`,`Equinox.*Store.Prometheus`: Pin `Equinox` dependencies to `[4.0.3, 5.0.0)`] [#450](https://github.com/jet/equinox/pull/450) - `Equinox.CosmosStore`: Update `System.Text.Json` dep to `6.0.10` per [CVE-2024-43485](https://github.com/advisories/GHSA-8g4q-xg66-9fp4) [#470](https://github.com/jet/equinox/pull/470) - `Equinox.CosmosStore`: Minimum `Microsoft.Azure.Cosmos` requirement updated to `3.43.0` to avail of integrated `System.Text.Json` support [#467](https://github.com/jet/equinox/pull/467) - `Equinox.CosmosStore.CosmosStoreConnector`: Removed mandatory `requestTimeout` argument [#467](https://github.com/jet/equinox/pull/467) @@ -796,7 +801,8 @@ The `Unreleased` section name is replaced by the expected version of next releas (For information pertaining to earlier releases, see release notes in https://github.com/jet/equinox/releases and/or can someone please add it!) -[Unreleased]: https://github.com/jet/equinox/compare/4.0.4...HEAD +[Unreleased]: https://github.com/jet/equinox/compare/4.1.0...HEAD +[4.1.0]: https://github.com/jet/equinox/compare/4.0.4...4.1.0 [4.0.4]: https://github.com/jet/equinox/compare/4.0.3...4.0.4 [4.0.3]: https://github.com/jet/equinox/compare/4.0.2...4.0.3 [4.0.2]: https://github.com/jet/equinox/compare/4.0.0...4.0.2 diff --git a/README.md b/README.md index 8339711ec..c75e7ff13 100644 --- a/README.md +++ b/README.md @@ -170,7 +170,7 @@ The components within this repository are delivered as multi-targeted Nuget pack - `Equinox.Core` [![NuGet](https://img.shields.io/nuget/v/Equinox.Core.svg)](https://www.nuget.org/packages/Equinox.Core/): Hosts generic utility types frequently useful alongside Equinox: [`TaskCell`](https://github.com/jet/equinox/blob/master/src/Equinox.Core/TaskCell.fs#L36), [`Batcher`, `BatcherCache`, `BatcherDictionary`](https://github.com/jet/equinox/blob/master/src/Equinox.Core/Batching.fs#L44). ([depends](https://www.fuget.org/packages/Equinox.Core) on `System.Runtime.Caching`) - `Equinox.MemoryStore` [![MemoryStore NuGet](https://img.shields.io/nuget/v/Equinox.MemoryStore.svg)](https://www.nuget.org/packages/Equinox.MemoryStore/): In-memory store for integration testing/performance base-lining/providing out-of-the-box zero dependency storage for examples. ([depends](https://www.fuget.org/packages/Equinox.MemoryStore) on `Equinox`) -- `Equinox.CosmosStore` [![CosmosStore NuGet](https://img.shields.io/nuget/v/Equinox.CosmosStore.svg)](https://www.nuget.org/packages/Equinox.CosmosStore/): Azure CosmosDB Adapter with integrated 'unfolds' feature, facilitating optimal read performance in terms of latency and RU costs, instrumented to meet Jet's production monitoring requirements. ([depends](https://www.fuget.org/packages/Equinox.CosmosStore) on `Equinox` v `4.0.2`, `Equinox`, `Microsoft.Azure.Cosmos` >= `3.43.1`, `System.Text.Json`, `FSharp.Control.TaskSeq`) +- `Equinox.CosmosStore` [![CosmosStore NuGet](https://img.shields.io/nuget/v/Equinox.CosmosStore.svg)](https://www.nuget.org/packages/Equinox.CosmosStore/): Azure CosmosDB Adapter with integrated 'unfolds' feature, facilitating optimal read performance in terms of latency and RU costs, instrumented to meet Jet's production monitoring requirements. ([depends](https://www.fuget.org/packages/Equinox.CosmosStore) on `Equinox` v `4.0.3`, `Equinox`, `Microsoft.Azure.Cosmos` >= `3.43.1`, `System.Text.Json`, `FSharp.Control.TaskSeq`) - `Equinox.CosmosStore.Prometheus` [![CosmosStore.Prometheus NuGet](https://img.shields.io/nuget/v/Equinox.CosmosStore.Prometheus.svg)](https://www.nuget.org/packages/Equinox.CosmosStore.Prometheus/): Integration package providing a `Serilog.Core.ILogEventSink` that extracts detailed metrics information attached to the `LogEvent`s and feeds them to the `prometheus-net`'s `Prometheus.Metrics` static instance. ([depends](https://www.fuget.org/packages/Equinox.CosmosStore.Prometheus) on `Equinox.CosmosStore`, `prometheus-net >= 3.6.0`) - `Equinox.DynamoStore` [![DynamoStore NuGet](https://img.shields.io/nuget/v/Equinox.DynamoStore.svg)](https://www.nuget.org/packages/Equinox.DynamoStore/): Amazon DynamoDB Adapter with integrated 'unfolds' feature, facilitating optimal read performance in terms of latency and RC costs, patterned after `Equinox.CosmosStore`. ([depends](https://www.fuget.org/packages/Equinox.DynamoStore) on `Equinox`, `FSharp.AWS.DynamoDB` >= `0.12.0-beta`, `FSharp.Control.TaskSeq`) - `Equinox.DynamoStore.Prometheus` [![DynamoStore.Prometheus NuGet](https://img.shields.io/nuget/v/Equinox.DynamoStore.Prometheus.svg)](https://www.nuget.org/packages/Equinox.DynamoStore.Prometheus/): Integration package providing a `Serilog.Core.ILogEventSink` that extracts detailed metrics information attached to the `LogEvent`s and feeds them to the `prometheus-net`'s `Prometheus.Metrics` static instance. ([depends](https://www.fuget.org/packages/Equinox.CosmosStore.Prometheus) on `Equinox.DynamoStore`, `prometheus-net >= 3.6.0`) diff --git a/src/Equinox.CosmosStore/Equinox.CosmosStore.fsproj b/src/Equinox.CosmosStore/Equinox.CosmosStore.fsproj index 213d78b61..fbb718170 100644 --- a/src/Equinox.CosmosStore/Equinox.CosmosStore.fsproj +++ b/src/Equinox.CosmosStore/Equinox.CosmosStore.fsproj @@ -18,7 +18,7 @@ - + From 438f2ed4283a99d5f11be8333e18f18b23e2610f Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Tue, 23 Jul 2024 18:00:58 +0100 Subject: [PATCH 47/56] Count unfolds/unfolded --- CHANGELOG.md | 1 + tools/Equinox.Tool/Program.fs | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 67e835979..22a18ff01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ The `Unreleased` section name is replaced by the expected version of next releas - `Equinox.CosmosStore`: Support Ingesting unfolds [#460](https://github.com/jet/equinox/pull/460) - `Equinox.CosmosStore.EventsContext.Sync`: Support syncing of unfolds [#460](https://github.com/jet/equinox/pull/460) - `eqx stats`: `-O`, `-N` flags extract oldest and newest `_ts` within a store [#459](https://github.com/jet/equinox/pull/459) +- `eqx stats`: `-U` flag to count streams with unfolds and total number thereof [#461](https://github.com/jet/equinox/pull/461) - `eqx`: `-Q` flag omits timestamps from console output logging [#459](https://github.com/jet/equinox/pull/459) - `Equinox.CosmosStore.Linq`: Add LINQ querying support for Indexed `u`nfolds (`AccessStrategy.Custom`+`CosmosStoreCategory.shouldCompress`) [#450](https://github.com/jet/equinox/pull/450) - `eqx top`: Support for analyzing space usage for event and view containers by category and/or stream [#450](https://github.com/jet/equinox/pull/450) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index c9b737f4a..e5c8927a2 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -105,6 +105,7 @@ and [] InitSqlParameters = | Postgres _ -> "Configure Postgres Store." and [] StatsParameters = | [] Events + | [] Unfolds | [] Streams | [] Documents | [] Oldest @@ -115,6 +116,7 @@ and [] StatsParameters = interface IArgParserTemplate with member a.Usage = a |> function | Events -> "Count the number of Events in the store." + | Unfolds -> "Count the number of Unfolds in the store." | Streams -> "Count the number of Streams in the store." | Documents -> "Count the number of Documents in the store." | Oldest -> "Oldest document, based on the _ts field" @@ -154,11 +156,11 @@ and [] Mode = Default | SnapOnly | SnapWithStream | Read and [] Criteria = | SingleStream of string | CatName of string | CatLike of string | Custom of sql: string | Unfiltered member x.Sql = x |> function - | Criteria.SingleStream sn -> $"c.p = \"{sn}\"" - | Criteria.CatName n -> $"c.p LIKE \"{n}-%%\"" - | Criteria.CatLike pat -> $"c.p LIKE \"{pat}\"" - | Criteria.Custom filter -> filter - | Criteria.Unfiltered -> "1=1" + | Criteria.SingleStream sn -> $"c.p = \"{sn}\"" + | Criteria.CatName n -> $"c.p LIKE \"{n}-%%\"" + | Criteria.CatLike pat -> $"c.p LIKE \"{pat}\"" + | Criteria.Custom filter -> filter + | Criteria.Unfiltered -> "1=1" and QueryArguments(p: ParseResults) = member val Mode = p.GetResult(QueryParameters.Mode, if p.Contains QueryParameters.File then Mode.Raw else Mode.Default) member val Pretty = p.Contains QueryParameters.Pretty @@ -400,10 +402,10 @@ module CosmosStats = let run (log : ILogger, _verboseConsole, _maybeSeq) (p : ParseResults) = match p.GetSubCommand() with | StatsParameters.Cosmos sp -> - let doS, doD, doE, doO, doN = - let s, d, e, o, n = p.Contains StatsParameters.Streams, p.Contains Documents, p.Contains StatsParameters.Events, p.Contains Oldest, p.Contains Newest + let doS, doD, doE, doU, doO, doN = + let s, d, e, u, o, n = p.Contains StatsParameters.Streams, p.Contains Documents, p.Contains StatsParameters.Events, p.Contains StatsParameters.Unfolds, p.Contains Oldest, p.Contains Newest let all = not (s || d || e || o || n) - all || s, all || d, all || e, all || o, all || n + all || s, all || d, all || e, all || u, all || o, all || n let doS = doS || (not doD && not doE) // default to counting streams only unless otherwise specified let inParallel = p.Contains Parallel let connector, dName, cName = CosmosInit.connect log sp @@ -411,6 +413,8 @@ module CosmosStats = let ops = [| if doS then "Streams", """SELECT VALUE COUNT(1) FROM c WHERE c.id="-1" """ if doD then "Documents", """SELECT VALUE COUNT(1) FROM c""" if doE then "Events", """SELECT VALUE SUM(c.n) FROM c WHERE c.id="-1" """ + if doU then "Unfolded", """SELECT VALUE SUM(ARRAY_LENGTH(c.u) > 0 ? 1 : 0) FROM c WHERE c.id="-1" """ + if doU then "Unfolds", """SELECT VALUE SUM(ARRAYLENGTH(c.u)) FROM c WHERE c.id="-1" """ if doO then "Oldest", """SELECT VALUE MIN(c._ts) FROM c""" if doN then "Newest", """SELECT VALUE MAX(c._ts) FROM c""" |] let render = if log.IsEnabled LogEventLevel.Debug then snd else fst From 8e887f1e2bc80a6878b66e23942f1b0e307249f5 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Tue, 23 Jul 2024 18:03:40 +0100 Subject: [PATCH 48/56] fix(Stats)!: Count Resync as read --- src/Equinox.CosmosStore/CosmosStore.fs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Equinox.CosmosStore/CosmosStore.fs b/src/Equinox.CosmosStore/CosmosStore.fs index 7ab557851..f715843c4 100644 --- a/src/Equinox.CosmosStore/CosmosStore.fs +++ b/src/Equinox.CosmosStore/CosmosStore.fs @@ -355,7 +355,7 @@ module Log = nameof res.Prune, res.Prune nameof res.Delete, res.Delete nameof res.Trim, res.Trim |] - let isRead = function nameof res.Tip | nameof res.Read | nameof res.Index | nameof res.Prune -> true | _ -> false + let isRead = function nameof res.Tip | nameof res.Read | nameof res.Index | nameof res.Prune | nameof res.Resync -> true | _ -> false let buckets = stats |> Seq.collect (fun (_n, stat) -> stat.Buckets) |> Seq.distinct |> Seq.sort |> Seq.toArray if Array.isEmpty buckets then () else From 6e08b094cd0be5f2916b75cbd87b587cc0e9c863 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Tue, 23 Jul 2024 18:13:08 +0100 Subject: [PATCH 49/56] fix(stats): Relabel Documents -> Items --- CHANGELOG.md | 3 ++- tools/Equinox.Tool/Program.fs | 16 ++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22a18ff01..924f431f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,8 @@ The `Unreleased` section name is replaced by the expected version of next releas - `Equinox.CosmosStore`: Support Ingesting unfolds [#460](https://github.com/jet/equinox/pull/460) - `Equinox.CosmosStore.EventsContext.Sync`: Support syncing of unfolds [#460](https://github.com/jet/equinox/pull/460) - `eqx stats`: `-O`, `-N` flags extract oldest and newest `_ts` within a store [#459](https://github.com/jet/equinox/pull/459) -- `eqx stats`: `-U` flag to count streams with unfolds and total number thereof [#461](https://github.com/jet/equinox/pull/461) +- `eqx stats`: `-U` flag to count streams with unfolds and total number thereof; `-I` alias relabel Documents as Items [#464](https://github.com/jet/equinox/pull/464) +- `eqx stats`: `-I` flag; relabel Documents as Items, retaining existing `-D` flag [#464](https://github.com/jet/equinox/pull/464) - `eqx`: `-Q` flag omits timestamps from console output logging [#459](https://github.com/jet/equinox/pull/459) - `Equinox.CosmosStore.Linq`: Add LINQ querying support for Indexed `u`nfolds (`AccessStrategy.Custom`+`CosmosStoreCategory.shouldCompress`) [#450](https://github.com/jet/equinox/pull/450) - `eqx top`: Support for analyzing space usage for event and view containers by category and/or stream [#450](https://github.com/jet/equinox/pull/450) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index e5c8927a2..effd9ae3b 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -107,7 +107,7 @@ and [] StatsParameters = | [] Events | [] Unfolds | [] Streams - | [] Documents + | [] Items | [] Oldest | [] Newest | [] Parallel @@ -118,7 +118,7 @@ and [] StatsParameters = | Events -> "Count the number of Events in the store." | Unfolds -> "Count the number of Unfolds in the store." | Streams -> "Count the number of Streams in the store." - | Documents -> "Count the number of Documents in the store." + | Items -> "Count the number of Items(Documents) in the store." | Oldest -> "Oldest document, based on the _ts field" | Newest -> "Newest document, based on the _ts field" | Parallel -> "Run in Parallel (CAREFUL! can overwhelm RU allocations)." @@ -402,16 +402,16 @@ module CosmosStats = let run (log : ILogger, _verboseConsole, _maybeSeq) (p : ParseResults) = match p.GetSubCommand() with | StatsParameters.Cosmos sp -> - let doS, doD, doE, doU, doO, doN = - let s, d, e, u, o, n = p.Contains StatsParameters.Streams, p.Contains Documents, p.Contains StatsParameters.Events, p.Contains StatsParameters.Unfolds, p.Contains Oldest, p.Contains Newest - let all = not (s || d || e || o || n) - all || s, all || d, all || e, all || u, all || o, all || n - let doS = doS || (not doD && not doE) // default to counting streams only unless otherwise specified + let doS, doI, doE, doU, doO, doN = + let s, i, e, u, o, n = p.Contains StatsParameters.Streams, p.Contains StatsParameters.Items, p.Contains StatsParameters.Events, p.Contains StatsParameters.Unfolds, p.Contains Oldest, p.Contains Newest + let all = not (s || i || e || u || o || n) + all || s, all || i, all || e, all || u, all || o, all || n + let doS = doS || (not doI && not doE) // default to counting streams only unless otherwise specified let inParallel = p.Contains Parallel let connector, dName, cName = CosmosInit.connect log sp let container = connector.CreateUninitialized().GetContainer(dName, cName) let ops = [| if doS then "Streams", """SELECT VALUE COUNT(1) FROM c WHERE c.id="-1" """ - if doD then "Documents", """SELECT VALUE COUNT(1) FROM c""" + if doI then "Items", """SELECT VALUE COUNT(1) FROM c""" if doE then "Events", """SELECT VALUE SUM(c.n) FROM c WHERE c.id="-1" """ if doU then "Unfolded", """SELECT VALUE SUM(ARRAY_LENGTH(c.u) > 0 ? 1 : 0) FROM c WHERE c.id="-1" """ if doU then "Unfolds", """SELECT VALUE SUM(ARRAYLENGTH(c.u)) FROM c WHERE c.id="-1" """ From 5f29db9fe2d9e2f7eacc179fdee38c22cb97fc49 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Wed, 31 Jul 2024 08:08:30 +0100 Subject: [PATCH 50/56] fix(eqx stats): Handle newest/oldest when empty --- tools/Equinox.Tool/Program.fs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index effd9ae3b..c57112b5f 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -422,10 +422,11 @@ module CosmosStats = ops |> Seq.map (fun (name, sql) -> async { let! res = Microsoft.Azure.Cosmos.QueryDefinition sql |> container.GetItemQueryIterator - |> Query.enum_ log container "Stat" null LogEventLevel.Debug |> TaskSeq.head |> Async.AwaitTaskCorrect - match name with - | "Oldest" | "Newest" -> log.Information("{stat,-10}: {result,13} ({d:u})", name, res, DateTime.UnixEpoch.AddSeconds(float res)) - | _ -> log.Information("{stat,-10}: {result,13:N0}", name, res) }) + |> Query.enum_ log container "Stat" null LogEventLevel.Debug |> TaskSeq.tryHead |> Async.AwaitTaskCorrect + match name, res with + | ("Oldest" | "Newest"), Some res -> log.Information("{stat,-10}: {result,13} ({d:u})", name, res, DateTime.UnixEpoch.AddSeconds(float res)) + | _, Some res -> log.Information("{stat,-10}: {result,13:N0}", name, res) + | _, None -> () }) // handle no Oldest/Newest not producing a result |> if inParallel then Async.Parallel else Async.Sequential |> Async.Ignore | StatsParameters.Dynamo sp -> async { From 48d48aa9a7700f047bf6f777e2505cb169bc7473 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Fri, 2 Aug 2024 19:18:13 +0100 Subject: [PATCH 51/56] Silence Page log in -Q mode --- tools/Equinox.Tool/Program.fs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index c57112b5f..06b2d303b 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -189,8 +189,8 @@ and [] TopParameters = | [] Streams | [] TsOrder | [] CategoryLimit of int - | [] StreamsLimit of int - | [] Sort of Order + | [] StreamsLimit of int + | [] Sort of Order | [] Cosmos of ParseResults interface IArgParserTemplate with member a.Usage = a |> function @@ -491,7 +491,7 @@ module CosmosQuery = let sql = composeSql a Log.Information("Querying {mode}: {q}", a.Mode, sql) Microsoft.Azure.Cosmos.QueryDefinition sql - let run (a: QueryArguments) = task { + let run quiet (a: QueryArguments) = task { let sw = System.Diagnostics.Stopwatch.StartNew() let serdes = if a.Pretty then prettySerdes.Value else FsCodec.SystemTextJson.Serdes.Default let maybeFileStream = a.Filepath |> Option.map (fun p -> @@ -509,8 +509,9 @@ module CosmosQuery = let inline arrayLen x = if isNull x then 0 else Array.length x pageStreams.Clear(); for x in items do if x.p <> null && pageStreams.Add x.p then accStreams.Add x.p |> ignore let pageI, pageE, pageU = items.Length, items |> Seq.sumBy (_.e >> arrayLen), items |> Seq.sumBy (_.u >> arrayLen) - Log.Information("Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}MiB{rc,7:f2}RU{s,5:N1}s age {age:dddd\.hh\:mm\:ss}", - rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, rc, rtt.TotalSeconds, DateTime.UtcNow - newestTs) + let ll = if quiet then LogEventLevel.Debug else LogEventLevel.Information + Log.Write(ll, "Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}MiB{rc,7:f2}RU{s,5:N1}s age {age:dddd\.hh\:mm\:ss}", + rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, rc, rtt.TotalSeconds, DateTime.UtcNow - newestTs) maybeFileStream |> Option.iter (fun stream -> for x in items do serdes.SerializeToStream(x, stream) @@ -581,7 +582,7 @@ module CosmosTop = bytes = utf8Size x; eBytes = eb; uBytes = ub; cBytes = int64 (ec + uc); iBytes = ei + ui } let [] OrderByTs = " ORDER BY c._ts" let private sql (a: TopArguments) = $"SELECT * FROM c WHERE {a.Criteria.Sql}{if a.TsOrder then OrderByTs else null}" - let run (a: TopArguments) = task { + let run quiet (a: TopArguments) = task { let sw = System.Diagnostics.Stopwatch.StartNew() let pageStreams, accStreams = System.Collections.Generic.HashSet(), System.Collections.Generic.HashSet() let mutable accI, accE, accU, accRus, accRds, accOds, accBytes, accParse = 0L, 0L, 0L, 0., 0L, 0L, 0L, TimeSpan.Zero @@ -598,8 +599,9 @@ module CosmosTop = s.Add(if s.TryGetValue(x, &v) then s.Remove x |> ignore; v.Merge x else x) |> ignore pageI <- pageI + 1; pageE <- pageE + x.events; pageU <- pageU + x.unfolds pageB <- pageB + x.bytes; pageCc <- pageCc + x.cBytes; pageDm <- pageDm + x.iBytes - Log.Information("Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}<{jds,4:f2}MiB{rc,7:f2}RU{s,5:N1}s D+M{im,4:f1} C+C{cm,5:f2} {ms,3}ms age {age:dddd\.hh\:mm\:ss}", - rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, miB pageB, rc, rtt.TotalSeconds, miB pageDm, miB pageCc, sw.ElapsedMilliseconds, DateTime.UtcNow - newestTs) + let ll = if quiet then LogEventLevel.Debug else LogEventLevel.Information + Log.Write(ll, "Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}<{jds,4:f2}MiB{rc,7:f2}RU{s,5:N1}s D+M{im,4:f1} C+C{cm,5:f2} {ms,3}ms age {age:dddd\.hh\:mm\:ss}", + rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, miB pageB, rc, rtt.TotalSeconds, miB pageDm, miB pageCc, sw.ElapsedMilliseconds, DateTime.UtcNow - newestTs) pageStreams.Clear() accI <- accI + int64 pageI; accE <- accE + int64 pageE; accU <- accU + int64 pageU accRus <- accRus + rc; accRds <- accRds + int64 rds; accOds <- accOds + int64 ods; accBytes <- accBytes + pageB @@ -840,8 +842,8 @@ type Arguments(p: ParseResults) = | InitAws a -> do! DynamoInit.table Log.Logger a | InitSql a -> do! SqlInit.databaseOrSchema Log.Logger a | Dump a -> do! Dump.run (Log.Logger, verboseConsole, maybeSeq) a - | Query a -> do! CosmosQuery.run (QueryArguments a) |> Async.AwaitTaskCorrect - | Top a -> do! CosmosTop.run (TopArguments a) |> Async.AwaitTaskCorrect + | Query a -> do! CosmosQuery.run quiet (QueryArguments a) |> Async.AwaitTaskCorrect + | Top a -> do! CosmosTop.run quiet (TopArguments a) |> Async.AwaitTaskCorrect | Destroy a -> do! CosmosDestroy.run (DestroyArguments a) |> Async.AwaitTaskCorrect | Stats a -> do! CosmosStats.run (Log.Logger, verboseConsole, maybeSeq) a | LoadTest a -> let n = p.GetResult(LogFile, fun () -> p.ProgramName + ".log") From c37dc4a3af86f87d9876053e08ad9b1a8667442b Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Wed, 14 Aug 2024 10:40:46 +0100 Subject: [PATCH 52/56] Add -sl; Polish -Q dump --- CHANGELOG.md | 2 + src/Equinox.CosmosStore/CosmosStoreLinq.fs | 9 +- tools/Equinox.Tool/Program.fs | 115 +++++++++++++-------- 3 files changed, 83 insertions(+), 43 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 924f431f3..3a1eb05c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,8 @@ The `Unreleased` section name is replaced by the expected version of next releas - `eqx stats`: `-I` flag; relabel Documents as Items, retaining existing `-D` flag [#464](https://github.com/jet/equinox/pull/464) - `eqx`: `-Q` flag omits timestamps from console output logging [#459](https://github.com/jet/equinox/pull/459) - `Equinox.CosmosStore.Linq`: Add LINQ querying support for Indexed `u`nfolds (`AccessStrategy.Custom`+`CosmosStoreCategory.shouldCompress`) [#450](https://github.com/jet/equinox/pull/450) +- `eqx dump`, `eqx query`: `-sl` Support for specifying streams to dump via a [CosmosDB `LIKE` expression](https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/query/keywords#like) [#450](https://github.com/jet/equinox/pull/450) +- `eqx dump`: `-Q` strips intervals, regularizes snapshots, logs stream names [#450](https://github.com/jet/equinox/pull/450) - `eqx top`: Support for analyzing space usage for event and view containers by category and/or stream [#450](https://github.com/jet/equinox/pull/450) - `eqx destroy`: Support for deleting the items(documents) underlying a category/stream/arbitrary `WHERE` clause [#450](https://github.com/jet/equinox/pull/450) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 4fff31d81..67dcf1200 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -100,11 +100,16 @@ module Internal = action, items, responses, totalRtt.TotalMilliseconds, totalRdc, miB totalRds, miB totalOds, totalRu, interval.ElapsedMilliseconds) } /// Runs a query that can be hydrated as 'T let enum log container cat = enum_ log container "Index" cat Events.LogEventLevel.Information + let exec__<'R> (log: ILogger) (container: Container) cat logLevel (queryDefinition: QueryDefinition): TaskSeq<'R> = + if log.IsEnabled logLevel then log.Write(logLevel, "CosmosStoreQuery.run {cat} {query}", cat, queryDefinition.QueryText) + container.GetItemQueryIterator<'R> queryDefinition |> enum_ log container "Query" cat logLevel /// Runs a query that renders 'T, Hydrating the results as 'P (can be the same types but e.g. you might want to map an object to a JsonElement etc) let enumAs<'T, 'P> (log: ILogger) (container: Container) cat logLevel (query: IQueryable<'T>): TaskSeq<'P> = let queryDefinition = query.ToQueryDefinition() - if log.IsEnabled logLevel then log.Write(logLevel, "CosmosStoreQuery.query {cat} {query}", cat, queryDefinition.QueryText) - container.GetItemQueryIterator<'P> queryDefinition |> enum log container cat + exec__<'P> log container cat logLevel queryDefinition + /// Execute a query, hydrating as 'R + let exec<'R> (log: ILogger) (container: Container) logLevel (queryDefinition: QueryDefinition): TaskSeq<'R> = + exec__<'R> log container "%" logLevel queryDefinition module AggregateOp = /// Runs one of the typical Cosmos SDK extensions, e.g. CountAsync, logging the costs let [] exec (log: ILogger) (container: Container) (op: string) (cat: string) (query: IQueryable<'T>) run render: System.Threading.Tasks.Task<'R> = task { diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index 06b2d303b..e37bd6ef5 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -126,6 +126,7 @@ and [] StatsParameters = | Dynamo _ -> "Dynamo Connection parameters." and [] QueryParameters = | [] StreamName of string + | [] StreamLike of string | [] CategoryName of string | [] CategoryLike of string | [] UnfoldName of string @@ -138,6 +139,7 @@ and [] QueryParameters = interface IArgParserTemplate with member a.Usage = a |> function | StreamName _ -> "Specify stream name to match against `p`, e.g. `$UserServices-f7c1ce63389a45bdbea1cccebb1b3c8a`." + | StreamLike _ -> "Specify stream name to match against `p`, e.g. `%-f7c1ce63389a45bdbea1cccebb1b3c8a`." | CategoryName _ -> "Specify category name to match against `p`, e.g. `$UserServices`." | CategoryLike _ -> "Specify category name to match against `p` as a Cosmos LIKE expression (with `%` as wildcard, e.g. `$UserServices-%`)." | UnfoldName _ -> "Specify unfold Name to match against `u.c`, e.g. `Snapshotted`" @@ -154,11 +156,12 @@ and [] QueryParameters = | Cosmos _ -> "Parameters for CosmosDB." and [] Mode = Default | SnapOnly | SnapWithStream | ReadOnly | ReadWithStream | Raw and [] Criteria = - | SingleStream of string | CatName of string | CatLike of string | Custom of sql: string | Unfiltered + | SingleStream of string | StreamLike of string | CatName of string | CatLike of string | Custom of sql: string | Unfiltered member x.Sql = x |> function | Criteria.SingleStream sn -> $"c.p = \"{sn}\"" + | Criteria.StreamLike pat -> $"c.p LIKE \"{pat}\"" | Criteria.CatName n -> $"c.p LIKE \"{n}-%%\"" - | Criteria.CatLike pat -> $"c.p LIKE \"{pat}\"" + | Criteria.CatLike pat -> $"c.p LIKE \"{pat}-%%\"" | Criteria.Custom filter -> filter | Criteria.Unfiltered -> "1=1" and QueryArguments(p: ParseResults) = @@ -166,14 +169,14 @@ and QueryArguments(p: ParseResults) = member val Pretty = p.Contains QueryParameters.Pretty member val TeeConsole = p.Contains QueryParameters.Console member val Criteria = - match p.TryGetResult QueryParameters.StreamName, p.TryGetResult QueryParameters.CategoryName, p.TryGetResult QueryParameters.CategoryLike with - | Some sn, None, None -> Criteria.SingleStream sn - | Some _, Some _, _ - | Some _, _, Some _ -> p.Raise "StreamName and CategoryLike/CategoryName are mutually exclusive" - | None, Some cn, None -> Criteria.CatName cn - | None, None, Some cl -> Criteria.CatLike cl - | None, None, None -> Criteria.Unfiltered - | None, Some _, Some _ -> p.Raise "CategoryLike and CategoryName are mutually exclusive" + match p.TryGetResult QueryParameters.StreamName, p.TryGetResult QueryParameters.StreamLike, + p.TryGetResult QueryParameters.CategoryName, p.TryGetResult QueryParameters.CategoryLike with + | Some sn, None, None, None -> Criteria.SingleStream sn + | None, Some sl, None, None -> Criteria.StreamLike sl + | None, None, Some cn, None -> Criteria.CatName cn + | None, None, None, Some cl -> Criteria.CatLike cl + | None, None, None, None -> Criteria.Unfiltered + | _ -> p.Raise "StreamName/StreamLike and CategoryLike/CategoryName are mutually exclusive" member val Filepath = p.TryGetResult QueryParameters.File member val UnfoldName = p.TryGetResult QueryParameters.UnfoldName member val UnfoldCriteria = p.TryGetResult QueryParameters.UnfoldCriteria @@ -229,6 +232,7 @@ and TopArguments(p: ParseResults) = container.GetItemQueryIterator(qd, requestOptions = qo) and [] DestroyParameters = | [] StreamName of string + | [] StreamLike of string | [] CategoryName of string | [] CategoryLike of string | [] CustomFilter of sql: string @@ -238,6 +242,7 @@ and [] DestroyParameters = interface IArgParserTemplate with member a.Usage = a |> function | StreamName _ -> "Specify stream name to match against `p`, e.g. `$UserServices-f7c1ce63389a45bdbea1cccebb1b3c8a`." + | StreamLike _ -> "Specify stream name to match against `p`, e.g. `%-f7c1ce63389a45bdbea1cccebb1b3c8a`." | CategoryName _ -> "Specify category name to match against `p`, e.g. `$UserServices`." | CategoryLike _ -> "Specify category name to match against `p` as a Cosmos LIKE expression (with `%` as wildcard, e.g. `$UserServices-%`)." | CustomFilter _ -> "Specify a custom filter, referencing the document as `c.` (e.g. `'c.p LIKE \"test-%\" AND c._ts < 1717138092'`)" @@ -246,13 +251,14 @@ and [] DestroyParameters = | Cosmos _ -> "Parameters for CosmosDB." and DestroyArguments(p: ParseResults) = member val Criteria = - match p.TryGetResult StreamName, p.TryGetResult CategoryName, p.TryGetResult CategoryLike, p.TryGetResult CustomFilter with - | None, None, None, None -> p.Raise "Category, stream name, or custom SQL must be supplied" - | Some sn, None, None, None -> Criteria.SingleStream sn - | None, Some cn, None, None -> Criteria.CatName cn - | None, None, Some cl, None -> Criteria.CatLike cl - | None, None, None, Some filter -> Criteria.Custom filter - | _ -> p.Raise "StreamName/CategoryLike/CategoryName/CustomFilter are mutually exclusive" + match p.TryGetResult StreamName, p.TryGetResult DestroyParameters.StreamLike, p.TryGetResult CategoryName, p.TryGetResult CategoryLike, p.TryGetResult CustomFilter with + | Some sn, None, None, None, None -> Criteria.SingleStream sn + | None, Some sl, None, None, None -> Criteria.StreamLike sl + | None, None, Some cn, None, None -> Criteria.CatName cn + | None, None, None, Some cl, None -> Criteria.CatLike cl + | None, None, None, None, Some filter -> Criteria.Custom filter + | None, None, None, None, None -> p.Raise "Category or stream name/pattern, or custom SQL must be supplied" + | _ -> p.Raise "StreamName/SteamLike/CategoryLike/CategoryName/CustomFilter are mutually exclusive" member val CosmosArgs = p.GetResult DestroyParameters.Cosmos |> Store.Cosmos.Arguments member val DryRun = p.Contains Force |> not member val Dop = p.GetResult(Parallelism, 32) @@ -263,12 +269,15 @@ and DestroyArguments(p: ParseResults) = and SnEventsUnfolds = { p: string; id: string; es: int; us: int } and [] DumpParameters = | [] Stream of FsCodec.StreamName + | [] StreamLike of string | [] Correlation | [] Blobs | [] JsonSkip | [] Pretty | [] FlattenUnfolds | [] TimeRegular + | [] Intervals + | [] Names | [] UnfoldsOnly | [] EventsOnly | [] Cosmos of ParseResults @@ -281,12 +290,15 @@ and [] DumpParameters = interface IArgParserTemplate with member a.Usage = a |> function | Stream _ -> "Specify stream(s) to dump." + | StreamLike _ -> "(CosmosDB only) Specify stream name pattern to dump: LIKE expression with `%` and `_` tokens etc." | Correlation -> "Include Correlation/Causation identifiers" | Blobs -> "Don't assume Data/Metadata is UTF-8 text" | JsonSkip -> "Don't assume Data/Metadata is JSON" | Pretty -> "Pretty print the JSON over multiple lines" - | FlattenUnfolds -> "Don't pretty print the JSON over multiple lines for Unfolds" + | FlattenUnfolds -> "Don't pretty print the JSON over multiple lines for Unfolds. Quiet mode: Pretty print" | TimeRegular -> "Don't humanize time intervals between events" + | Intervals -> "Omit intervals between events. Quiet mode: Include intervals" + | Names -> "Emit StreamName prior to events/unfolds instead of adding log context. Quiet mode: exclude stream names" | UnfoldsOnly -> "Exclude Events. Default: show both Events and Unfolds" | EventsOnly -> "Exclude Unfolds/Snapshots. Default: show both Events and Unfolds." | Es _ -> "Parameters for EventStore." @@ -322,6 +334,19 @@ and DumpArguments(p: ParseResults) = let storeLog = createStoreLog false storeLog, Store.MessageDb.config log None p | x -> p.Raise $"unexpected subcommand %A{x}" + member val CosmosArgs = p.GetResult DumpParameters.Cosmos |> Store.Cosmos.Arguments + member x.Connect() = + match Store.Cosmos.config Log.Logger (None, true) x.CosmosArgs with + | Store.Config.Cosmos (cc, _, _) -> cc.Container + | _ -> p.Raise "Dump StreamLike option requires Cosmos" + member x.Streams(infoLogLevel) = + let streams = p.GetResults DumpParameters.Stream + match p.TryGetResult DumpParameters.StreamLike with + | None -> streams + | Some pattern -> + let container = x.Connect() + let q = Microsoft.Azure.Cosmos.QueryDefinition($"SELECT DISTINCT VALUE c.p from c where c.p LIKE \"{pattern}\"") + Equinox.CosmosStore.Linq.Internal.Query.exec Log.Logger container infoLogLevel q |> FSharp.Control.TaskSeq.toList let writeToStatsSinks (c : LoggerConfiguration) = c.WriteTo.Sink(Equinox.CosmosStore.Core.Log.InternalMetrics.Stats.LogSink()) .WriteTo.Sink(Equinox.DynamoStore.Core.Log.InternalMetrics.Stats.LogSink()) @@ -491,7 +516,7 @@ module CosmosQuery = let sql = composeSql a Log.Information("Querying {mode}: {q}", a.Mode, sql) Microsoft.Azure.Cosmos.QueryDefinition sql - let run quiet (a: QueryArguments) = task { + let run ill (a: QueryArguments) = task { let sw = System.Diagnostics.Stopwatch.StartNew() let serdes = if a.Pretty then prettySerdes.Value else FsCodec.SystemTextJson.Serdes.Default let maybeFileStream = a.Filepath |> Option.map (fun p -> @@ -509,9 +534,8 @@ module CosmosQuery = let inline arrayLen x = if isNull x then 0 else Array.length x pageStreams.Clear(); for x in items do if x.p <> null && pageStreams.Add x.p then accStreams.Add x.p |> ignore let pageI, pageE, pageU = items.Length, items |> Seq.sumBy (_.e >> arrayLen), items |> Seq.sumBy (_.u >> arrayLen) - let ll = if quiet then LogEventLevel.Debug else LogEventLevel.Information - Log.Write(ll, "Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}MiB{rc,7:f2}RU{s,5:N1}s age {age:dddd\.hh\:mm\:ss}", - rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, rc, rtt.TotalSeconds, DateTime.UtcNow - newestTs) + Log.Write(ill, "Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}MiB{rc,7:f2}RU{s,5:N1}s age {age:dddd\.hh\:mm\:ss}", + rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, rc, rtt.TotalSeconds, DateTime.UtcNow - newestTs) maybeFileStream |> Option.iter (fun stream -> for x in items do serdes.SerializeToStream(x, stream) @@ -582,7 +606,7 @@ module CosmosTop = bytes = utf8Size x; eBytes = eb; uBytes = ub; cBytes = int64 (ec + uc); iBytes = ei + ui } let [] OrderByTs = " ORDER BY c._ts" let private sql (a: TopArguments) = $"SELECT * FROM c WHERE {a.Criteria.Sql}{if a.TsOrder then OrderByTs else null}" - let run quiet (a: TopArguments) = task { + let run ill (a: TopArguments) = task { let sw = System.Diagnostics.Stopwatch.StartNew() let pageStreams, accStreams = System.Collections.Generic.HashSet(), System.Collections.Generic.HashSet() let mutable accI, accE, accU, accRus, accRds, accOds, accBytes, accParse = 0L, 0L, 0L, 0., 0L, 0L, 0L, TimeSpan.Zero @@ -599,9 +623,8 @@ module CosmosTop = s.Add(if s.TryGetValue(x, &v) then s.Remove x |> ignore; v.Merge x else x) |> ignore pageI <- pageI + 1; pageE <- pageE + x.events; pageU <- pageU + x.unfolds pageB <- pageB + x.bytes; pageCc <- pageCc + x.cBytes; pageDm <- pageDm + x.iBytes - let ll = if quiet then LogEventLevel.Debug else LogEventLevel.Information - Log.Write(ll, "Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}<{jds,4:f2}MiB{rc,7:f2}RU{s,5:N1}s D+M{im,4:f1} C+C{cm,5:f2} {ms,3}ms age {age:dddd\.hh\:mm\:ss}", - rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, miB pageB, rc, rtt.TotalSeconds, miB pageDm, miB pageCc, sw.ElapsedMilliseconds, DateTime.UtcNow - newestTs) + Log.Write(ill, "Page{rdc,5}>{count,4}i{streams,5}s{es,5}e{us,5}u{rds,5:f2}>{ods,4:f2}<{jds,4:f2}MiB{rc,7:f2}RU{s,5:N1}s D+M{im,4:f1} C+C{cm,5:f2} {ms,3}ms age {age:dddd\.hh\:mm\:ss}", + rdc, pageI, pageStreams.Count, pageE, pageU, miB rds, miB ods, miB pageB, rc, rtt.TotalSeconds, miB pageDm, miB pageCc, sw.ElapsedMilliseconds, DateTime.UtcNow - newestTs) pageStreams.Clear() accI <- accI + int64 pageI; accE <- accE + int64 pageE; accU <- accU + int64 pageU accRus <- accRus + rc; accRds <- accRds + int64 rds; accOds <- accOds + int64 ods; accBytes <- accBytes + pageB @@ -771,12 +794,14 @@ module Dump = let private prettifyJson (json: string) = use parsed = System.Text.Json.JsonDocument.Parse json prettySerdes.Value.Serialize parsed - let run (log : ILogger, verboseConsole, maybeSeq) (p : ParseResults) = async { + let run ill (log : ILogger, verboseConsole, maybeSeq) (p : ParseResults) = async { let a = DumpArguments p let createStoreLog storeVerbose = createStoreLog storeVerbose verboseConsole maybeSeq let storeLog, storeConfig = a.ConfigureStore(log, createStoreLog) let doU, doE = not (p.Contains EventsOnly), not (p.Contains UnfoldsOnly) - let doC, doJ, doS, doT = p.Contains Correlation, not (p.Contains JsonSkip), not (p.Contains Blobs), not (p.Contains TimeRegular) + let quietMode = ill <> LogEventLevel.Debug + let doN = p.Contains Names = quietMode + let doI, doC, doJ, doS, doT = p.Contains Intervals <> quietMode, p.Contains Correlation, not (p.Contains JsonSkip), not (p.Contains Blobs), not (p.Contains TimeRegular) let store = Services.Store(storeConfig) let initial = List.empty @@ -785,7 +810,7 @@ module Dump = let idCodec = FsCodec.Codec.Create((fun _ -> failwith "No encoding required"), tryDecode, (fun _ _ -> failwith "No mapCausation")) let isOriginAndSnapshot = (fun (event : FsCodec.ITimelineEvent<_>) -> not doE && event.IsUnfold), fun _state -> failwith "no snapshot required" let formatUnfolds, formatEvents = - if p.Contains FlattenUnfolds then id else prettifyJson + if p.Contains FlattenUnfolds = quietMode then id else prettifyJson , if p.Contains Pretty then prettifyJson else id let mutable payloadBytes = 0 let render format (data: ReadOnlyMemory) = @@ -803,6 +828,8 @@ module Dump = | x when x.TotalMinutes >= 1. -> x.ToString "m\mss\.ff\s" | x -> x.ToString("s\.fff\s") let dumpEvents (streamName: FsCodec.StreamName) = async { + let log = if doN then Log.Information("Dumping {sn}", streamName); log + else log.ForContext("sn", streamName) let struct (categoryName, sid) = FsCodec.StreamName.split streamName let cat = store.Category(categoryName, idCodec, fold, initial, isOriginAndSnapshot) let decider = Equinox.Decider.forStream storeLog cat sid @@ -815,20 +842,25 @@ module Dump = | Some p when not x.IsUnfold -> let ts = x.Timestamp - p in if doT then humanize ts else ts.ToString() | _ -> if doT then "n/a" else "0" prevTs <- Some x.Timestamp - if not doC then log.Information("{i,4}@{t:u}+{d,9} {u:l} {e:l} {data:l} {meta:l}", - x.Index, x.Timestamp, interval, ty, x.EventType, render x.Data, render x.Meta) - else log.Information("{i,4}@{t:u}+{d,9} Corr {corr} Cause {cause} {u:l} {e:l} {data:l} {meta:l}", - x.Index, x.Timestamp, interval, x.CorrelationId, x.CausationId, ty, x.EventType, render x.Data, render x.Meta) - match streamBytes with ValueNone -> () | ValueSome x -> log.Information("ISyncContext.StreamEventBytes {kib:n1}KiB", float x / 1024.) } + if doC then + log.Information("{i,4}@{t:u}+{d,9} Corr {corr} Cause {cause} {u:l} {e:l} {data:l} {meta:l}", + x.Index, x.Timestamp, interval, x.CorrelationId, x.CausationId, ty, x.EventType, render x.Data, render x.Meta) + elif doI then + log.Information("{i,4}@{t:u}+{d,9:u} {u:l} {e:l} {data:l} {meta:l}", + x.Index, x.Timestamp, interval, ty, x.EventType, render x.Data, render x.Meta) + else + log.Information("{i,4}@{t:u} {u:l} {e:l} {data:l} {meta:l}", + x.Index, x.Timestamp, ty, x.EventType, render x.Data, render x.Meta) + match streamBytes with ValueNone -> () | ValueSome x -> log.Write(ill, "ISyncContext.StreamEventBytes {kib:n1}KiB", float x / 1024.) } resetStats () - let streams = p.GetResults DumpParameters.Stream - log.ForContext("streams",streams).Information("Reading...") + let streams = a.Streams(ill) + log.ForContext("streams",streams).Write(ill, "Reading...") do! streams |> Seq.map dumpEvents - |> Async.Parallel + |> Async.Sequential |> Async.Ignore - log.Information("Total Event Bodies Payload {kib:n1}KiB", float payloadBytes / 1024.) + log.Write(ill, "Total Event Bodies Payload {kib:n1}KiB", float payloadBytes / 1024.) if verboseConsole then dumpStats log storeConfig } @@ -837,13 +869,14 @@ type Arguments(p: ParseResults) = let quiet, verbose, verboseConsole = p.Contains Quiet, p.Contains Verbose, p.Contains VerboseConsole member _.CreateDomainLog() = createDomainLog quiet verbose verboseConsole maybeSeq member _.ExecuteSubCommand() = async { + let ill = if quiet then LogEventLevel.Debug else LogEventLevel.Information match p.GetSubCommand() with | Init a -> do! CosmosInit.containerAndOrDb Log.Logger a CancellationToken.None |> Async.AwaitTaskCorrect | InitAws a -> do! DynamoInit.table Log.Logger a | InitSql a -> do! SqlInit.databaseOrSchema Log.Logger a - | Dump a -> do! Dump.run (Log.Logger, verboseConsole, maybeSeq) a - | Query a -> do! CosmosQuery.run quiet (QueryArguments a) |> Async.AwaitTaskCorrect - | Top a -> do! CosmosTop.run quiet (TopArguments a) |> Async.AwaitTaskCorrect + | Dump a -> do! Dump.run ill (Log.Logger, verboseConsole, maybeSeq) a + | Query a -> do! CosmosQuery.run ill (QueryArguments a) |> Async.AwaitTaskCorrect + | Top a -> do! CosmosTop.run ill (TopArguments a) |> Async.AwaitTaskCorrect | Destroy a -> do! CosmosDestroy.run (DestroyArguments a) |> Async.AwaitTaskCorrect | Stats a -> do! CosmosStats.run (Log.Logger, verboseConsole, maybeSeq) a | LoadTest a -> let n = p.GetResult(LogFile, fun () -> p.ProgramName + ".log") From 2213169e5b99d8f777c4a0ab87147e6b412671bd Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Wed, 1 Jan 2025 22:06:22 +0000 Subject: [PATCH 53/56] Cover TryLoad/TryHydrateTip logic --- src/Equinox.CosmosStore/CosmosStoreLinq.fs | 72 ++++++++++++---------- tools/Equinox.Tool/Program.fs | 11 ++-- 2 files changed, 46 insertions(+), 37 deletions(-) diff --git a/src/Equinox.CosmosStore/CosmosStoreLinq.fs b/src/Equinox.CosmosStore/CosmosStoreLinq.fs index 67dcf1200..d713df497 100644 --- a/src/Equinox.CosmosStore/CosmosStoreLinq.fs +++ b/src/Equinox.CosmosStore/CosmosStoreLinq.fs @@ -142,7 +142,7 @@ module Internal = static member Create<'P>(q, cat, c, log, hydrate: 'P -> 'M, logLevel) = Projection<'T, 'M>(q, cat, c, Query.enumAs<'T, 'P> log c cat logLevel >> TaskSeq.map hydrate, AggregateOp.countAsync log c cat logLevel) member _.Enum: TaskSeq<'M> = query |> enum - member x.EnumPage(skip, take): TaskSeq<'M> = query |> Query.offsetLimit (skip, take) |> enum + member _.EnumPage(skip, take): TaskSeq<'M> = query |> Query.offsetLimit (skip, take) |> enum member _.CountAsync: CancellationToken -> Task = query |> count [] member val Query: IQueryable<'T> = query [] member val Category: string = category @@ -156,22 +156,24 @@ module Internal = // This hack is based on https://stackoverflow.com/a/73506241/11635 type SnAndSnap() = member val sn: FsCodec.StreamName = Unchecked.defaultof<_> with get, set + member val D: Nullable = Unchecked.defaultof<_> with get, set member val d: System.Text.Json.JsonElement = Unchecked.defaultof<_> with get, set - member val D: int = Unchecked.defaultof<_> with get, set static member CreateItemQueryLambda<'T, 'U>( snExpression: Expression -> MemberExpression, - uExpression: Expression>) = + uExpression: Expression>, + formatExpression: Expression>>, + dataExpression: Expression>) = let param = Expression.Parameter(typeof<'T>, "x") let targetType = typeof let snMember = targetType.GetMember(nameof Unchecked.defaultof.sn)[0] - let dMember = targetType.GetMember(nameof Unchecked.defaultof.d)[0] let formatMember = targetType.GetMember(nameof Unchecked.defaultof.D)[0] + let dataMember = targetType.GetMember(nameof Unchecked.defaultof.d)[0] Expression.Lambda>( Expression.MemberInit( Expression.New(targetType.GetConstructor [||]), [| Expression.Bind(snMember, snExpression param) :> MemberBinding - Expression.Bind(dMember, uExpression.Body.Replace(uExpression.Parameters[0], param)) - Expression.Bind(formatMember, uExpression.Body.Replace(uExpression.Parameters[0], param)) |]), + Expression.Bind(formatMember, QueryExtensions.Compose(uExpression, formatExpression).Body.Replace(uExpression.Parameters[0], param)) + Expression.Bind(dataMember, uExpression.Compose(dataExpression).Body.Replace(uExpression.Parameters[0], param)) |]), [| param |]) /// Represents a query projecting information values from an Index and/or Snapshots with a view to rendering the items and/or a count @@ -182,44 +184,46 @@ type Query<'T, 'M>(inner: Internal.Projection<'T, 'M>) = member _.Count(): Async = inner.CountAsync |> Async.call [] member val Inner = inner -/// Helpers for Querying and Projecting results based on relevant aspects of Equinox.CosmosStore's storage schema +/// Helpers for Querying Indices and Projecting Snapshot data based on well-known aspects of Equinox.CosmosStore's storage schema module Index = [] - type Item = + type Item<'I> = { p: string _etag: string - u: Unfold ResizeArray } - and [] Unfold = + u: Unfold<'I> ResizeArray } // Arrays do not bind correctly in Cosmos LINQ + and [] Unfold<'I> = { c: string - d: System.Text.Json.JsonElement - D: int } + d: 'I // For an index, this is the uncompressed JSON data; we're generating a LINQ query using this field's type, 'I + [] + data: System.Text.Json.JsonElement // The raw data representing the encoded snapshot + [] + format: Nullable } // The (optional) encoding associated with that snapshot let inline prefix categoryName = $"%s{categoryName}-" /// The cheapest search basis; the categoryName is a prefix of the `p` partition field /// Depending on how much more selective the caseName is, `byCaseName` may be a better choice /// (but e.g. if the ration is 1:1 then no point having additional criteria) - let byCategoryNameOnly<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName: IQueryable = + let byCategoryNameOnly<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName: IQueryable> = let prefix = prefix categoryName - container.GetItemLinqQueryable().Where(fun d -> d.p.StartsWith(prefix)) + container.GetItemLinqQueryable>().Where(fun d -> d.p.StartsWith(prefix)) // Searches based on the prefix of the `p` field, but also checking the `c` of the relevant unfold is correct // A good idea if that'll be significantly cheaper due to better selectivity - let byCaseName<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName caseName: IQueryable = + let byCaseName<'I> (container: Microsoft.Azure.Cosmos.Container) categoryName caseName: IQueryable> = let prefix = prefix categoryName - container.GetItemLinqQueryable().Where(fun d -> d.p.StartsWith(prefix) && d.u[0].c = caseName) + container.GetItemLinqQueryable>().Where(fun d -> d.p.StartsWith(prefix) && d.u[0].c = caseName) /// Returns the StreamName (from the `p` field) for a 0/1 item query; only the TOP 1 item is returned - let tryGetStreamNameAsync log cat logLevel container (query: IQueryable) ct = + let tryGetStreamNameAsync log cat logLevel container (query: IQueryable>) ct = Internal.Scalar.tryHeadAsync log cat logLevel container (query.Select(fun x -> x.p)) ct - // /// Query the items, returning the Stream name and the Snapshot as a JsonElement (Decompressed if applicable) - // let projectStreamNameAndSnapshot<'I> uExpression: Expression> = - // // a very ugly workaround for not being able to write query.Select(fun x -> { p = x.p; d = x.u[0].d; D = x.u[0].D }) - // let pExpression item = Expression.PropertyOrField(item, nameof Unchecked.defaultof.p) - // let uItem name item = Expression.PropertyOrField(uExpression, name) - // SnAndSnap.CreateItemQueryLambda(pExpression, uExpression, uItem (nameof Unchecked.defaultof.d), uItem (nameof Unchecked.defaultof.D)) + /// Query the items, returning the Stream name and the Snapshot as a JsonElement (Decompressed if applicable) + let projectStreamNameAndSnapshot<'I> snapshotUnfoldExpression: Expression, SnAndSnap>> = + // a very ugly workaround for not being able to write query.Select,Internal.SnAndSnap>(fun x -> { p = x.p; D = x.u[0].D; d = x.u[0].d }) + let pExpression item = Expression.PropertyOrField(item, nameof Unchecked.defaultof>.p) + SnAndSnap.CreateItemQueryLambda, Unfold<'I>>(pExpression, snapshotUnfoldExpression, (fun x -> x.format), (fun x -> x.data)) - let createSnAndSnapshotQuery<'M> log container cat logLevel (hydrate: SnAndSnap -> 'M) (query: IQueryable) = + let createSnAndSnapshotQuery<'I, 'M> log container cat logLevel (hydrate: SnAndSnap -> 'M) (query: IQueryable) = Internal.Projection.Create(query, cat, container, log, hydrate, logLevel) |> Query /// Enables querying based on uncompressed Indexed values stored as secondary unfolds alongside the snapshot @@ -238,26 +242,28 @@ type IndexContext<'I>(container, categoryName, caseName, log, []?quer /// Fetches a base Queryable that's filtered based on the `categoryName` and `caseName` /// NOTE this is relatively expensive to compute a Count on, compared to `CategoryQueryable` - member _.ByCaseName(): IQueryable = + member _.ByCaseName(): IQueryable> = Index.byCaseName<'I> container categoryName caseName /// Fetches a base Queryable that's filtered only on the `categoryName` - member _.ByCategory(): IQueryable = - Index.byCategoryNameOnly container categoryName + member _.ByCategory(): IQueryable> = + Index.byCategoryNameOnly<'I> container categoryName /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria - member x.TryGetStreamNameWhereAsync(criteria: Expressions.Expression>, ct, [] ?logLevel) = + member x.TryGetStreamNameWhereAsync(criteria: Expressions.Expression, bool>>, ct, [] ?logLevel) = let logLevel = defaultArg logLevel queryLogLevel Index.tryGetStreamNameAsync x.Log container categoryName logLevel (x.ByCategory().Where criteria) ct /// Runs the query; yields the StreamName from the TOP 1 Item matching the criteria - member x.TryGetStreamNameWhere(criteria: Expressions.Expression>): Async = + member x.TryGetStreamNameWhere(criteria: Expressions.Expression, bool>>): Async = (fun ct -> x.TryGetStreamNameWhereAsync(criteria, ct)) |> Async.call /// Query the items, grabbing the Stream name and the Snapshot; The StreamName and the (Decompressed if applicable) Snapshot are passed to `hydrate` - member x.QueryStreamNameAndSnapshot(query: IQueryable>, selectBody: Expression, 'I>>, - hydrate: SnAndSnap -> 'M, - [] ?logLevel): Query, 'M> = + member x.QueryStreamNameAndSnapshot( + query: IQueryable>, + selectSnapshotUnfold: Expression, Index.Unfold<'I>>>, + hydrate: SnAndSnap -> 'M, + [] ?logLevel): Query = let logLevel = defaultArg logLevel queryLogLevel - query.Select(Index.projectStreamNameAndSnapshot<'I> selectBody) + query.Select(Index.projectStreamNameAndSnapshot<'I> selectSnapshotUnfold) |> Index.createSnAndSnapshotQuery x.Log container categoryName logLevel hydrate diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index e37bd6ef5..c37cc7b16 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -576,9 +576,12 @@ module CosmosTop = scratch.Position let inflatedUtf8Size x = scratch.Position <- 0L - if Equinox.CosmosStore.Core.JsonElement.tryInflateTo scratch x then scratch.Position - else utf8Size x - let infSize = function ValueSome x -> inflatedUtf8Size x | ValueNone -> 0 + FsCodec.SystemTextJson.Encoding.ExpandTo(scratch, x) + scratch.Position + let infSize dataField formatField (x: JsonElement) = + match x.TryProp dataField, x.TryProp formatField with + | ValueNone, _ -> 0L + | ValueSome d, df -> inflatedUtf8Size (df |> ValueOption.map _.GetInt32() |> ValueOption.defaultValue 0, x) // using the length as a decent proxy for UTF-8 length of corr/causation; if you have messy data in there, you'll have bigger problems to worry about let inline stringLen x = match x with ValueSome (x: JsonElement) when x.ValueKind <> JsonValueKind.Null -> x.GetString().Length | _ -> 0 let _e = Unchecked.defaultof // Or Unfold - both share field names @@ -586,7 +589,7 @@ module CosmosTop = (struct (0, 0L), x.EnumerateArray()) ||> Seq.fold (fun struct (c, i) x -> struct (c + (x.TryProp(nameof _e.correlationId) |> stringLen) + (x.TryProp(nameof _e.causationId) |> stringLen), - i + (x.TryProp(nameof _e.d) |> infSize) + (x.TryProp(nameof _e.m) |> infSize))) + i + infSize "d" "D" x + infSize "m" "M" x)) let private tryParseEventOrUnfold = function | ValueNone -> struct (0, 0L, struct (0, 0L)) | ValueSome (x: JsonElement) -> x.GetArrayLength(), utf8Size x, dmcSize x From 3ac10f1fb98397317d406fae1e2f9487801885d5 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Fri, 3 Jan 2025 12:03:47 +0000 Subject: [PATCH 54/56] Release 4.1.0-alpha.18 --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a1eb05c7..2950e2550 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ The `Unreleased` section name is replaced by the expected version of next releas ### Fixed -## 4.1.0 - 2024/5 +## 4.1.0 - 2025 ### Added From 2b7dc75a6e7c1778814f37495f67ea042394e327 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Sat, 4 Jan 2025 16:16:46 +0000 Subject: [PATCH 55/56] Target FsCodec a14 --- samples/Infrastructure/Services.fs | 4 ++-- samples/Store/Domain/Domain.fsproj | 2 +- samples/Store/Domain/Infrastructure.fs | 2 +- samples/Tutorial/Infrastructure.fs | 2 +- samples/Tutorial/Tutorial.fsproj | 2 +- src/Equinox.CosmosStore/CosmosStore.fs | 15 ++++++++------- .../AccessStrategies.fs | 4 ++-- .../CosmosCoreIntegration.fs | 2 +- .../DocumentStoreIntegration.fs | 4 ++-- .../FsCodecCompressionTests.fs | 10 +++++----- tools/Equinox.Tool/Program.fs | 10 ++++------ 11 files changed, 28 insertions(+), 29 deletions(-) diff --git a/samples/Infrastructure/Services.fs b/samples/Infrastructure/Services.fs index 774c55e4b..9b358abba 100644 --- a/samples/Infrastructure/Services.fs +++ b/samples/Infrastructure/Services.fs @@ -17,10 +17,10 @@ type Store(store) = MemoryStore.MemoryStoreCategory(store, name, codec, fold, initial) | Store.Config.Cosmos (store, caching, unfolds) -> let accessStrategy = if unfolds then CosmosStore.AccessStrategy.Snapshot snapshot else CosmosStore.AccessStrategy.Unoptimized - CosmosStore.CosmosStoreCategory<'event,'state,_>(store, name, FsCodec.SystemTextJson.Encoding.EncodeTryCompressUtf8 codec, fold, initial, accessStrategy, caching) + CosmosStore.CosmosStoreCategory<'event,'state,_>(store, name, FsCodec.SystemTextJson.Encoder.CompressedUtf8 codec, fold, initial, accessStrategy, caching) | Store.Config.Dynamo (store, caching, unfolds) -> let accessStrategy = if unfolds then DynamoStore.AccessStrategy.Snapshot snapshot else DynamoStore.AccessStrategy.Unoptimized - DynamoStore.DynamoStoreCategory<'event,'state,_>(store, name, FsCodec.Compression.EncodeTryCompress codec, fold, initial, accessStrategy, caching) + DynamoStore.DynamoStoreCategory<'event,'state,_>(store, name, FsCodec.Encoder.Compressed codec, fold, initial, accessStrategy, caching) | Store.Config.Es (context, caching, unfolds) -> let accessStrategy = if unfolds then EventStoreDb.AccessStrategy.RollingSnapshots snapshot else EventStoreDb.AccessStrategy.Unoptimized EventStoreDb.EventStoreCategory<'event,'state,_>(context, name, codec, fold, initial, accessStrategy, caching) diff --git a/samples/Store/Domain/Domain.fsproj b/samples/Store/Domain/Domain.fsproj index 53f6cbc39..c2a0ad5e1 100644 --- a/samples/Store/Domain/Domain.fsproj +++ b/samples/Store/Domain/Domain.fsproj @@ -19,7 +19,7 @@ - + diff --git a/samples/Store/Domain/Infrastructure.fs b/samples/Store/Domain/Infrastructure.fs index 02eb0296e..940b30ec6 100644 --- a/samples/Store/Domain/Infrastructure.fs +++ b/samples/Store/Domain/Infrastructure.fs @@ -92,7 +92,7 @@ module EventCodec = /// For CosmosStore - we encode to JsonElement as that's what the store talks let genJsonElement<'t when 't :> TypeShape.UnionContract.IUnionContract> = - FsCodec.SystemTextJson.CodecJsonElement.Create<'t>() |> FsCodec.SystemTextJson.Encoding.EncodeUncompressed + FsCodec.SystemTextJson.CodecJsonElement.Create<'t>() |> FsCodec.SystemTextJson.Encoder.Uncompressed /// For stores other than CosmosStore, we encode to UTF-8 and have the store do the right thing let gen<'t when 't :> TypeShape.UnionContract.IUnionContract> = diff --git a/samples/Tutorial/Infrastructure.fs b/samples/Tutorial/Infrastructure.fs index ee99c8a9d..09fe36afa 100644 --- a/samples/Tutorial/Infrastructure.fs +++ b/samples/Tutorial/Infrastructure.fs @@ -23,7 +23,7 @@ module EventCodec = /// For CosmosStore - we encode to JsonElement as that's what the store talks let genJsonElement<'t when 't :> TypeShape.UnionContract.IUnionContract> = - FsCodec.SystemTextJson.CodecJsonElement.Create<'t>() |> FsCodec.SystemTextJson.Encoding.EncodeUncompressed + FsCodec.SystemTextJson.CodecJsonElement.Create<'t>() |> FsCodec.SystemTextJson.Encoder.Uncompressed /// For stores other than CosmosStore, we encode to UTF-8 and have the store do the right thing let gen<'t when 't :> TypeShape.UnionContract.IUnionContract> = diff --git a/samples/Tutorial/Tutorial.fsproj b/samples/Tutorial/Tutorial.fsproj index a6d7334a3..71158912c 100644 --- a/samples/Tutorial/Tutorial.fsproj +++ b/samples/Tutorial/Tutorial.fsproj @@ -28,7 +28,7 @@ - + diff --git a/src/Equinox.CosmosStore/CosmosStore.fs b/src/Equinox.CosmosStore/CosmosStore.fs index f715843c4..05e51ec5a 100644 --- a/src/Equinox.CosmosStore/CosmosStore.fs +++ b/src/Equinox.CosmosStore/CosmosStore.fs @@ -14,13 +14,14 @@ type EncodedBody = (struct (int * JsonElement)) /// Interpretation of EncodedBody data is an external concern from the perspective of the Store /// The idiomatic implementation of the encoding logic is FsCodec.SystemTextJson.Compression, in versions 3.1.0 or later /// That implementation provides complete interop with encodings produced by Equinox.Cosmos/CosmosStore from V1 onwards, including integrated Deflate compression -module internal EncodedBody = +module EncodedBody = let internal jsonRawText: EncodedBody -> string = ValueTuple.snd >> _.GetRawText() let internal jsonUtf8Bytes = jsonRawText >> System.Text.Encoding.UTF8.GetByteCount - let [] deflateEncoding = 1 - // prior to the addition of the `D` field in 4.1.0, the integrated compression support - // was predicated entirely on a JSON String `d` value in the Unfold as implying it was UTF8->Deflate->Base64 encoded - let parseUnfold = function struct (0, e: JsonElement) when e.ValueKind = JsonValueKind.String -> struct (deflateEncoding, e) | x -> x + let [] private deflateEncoding = 1 + /// prior to the addition of the `D` field in 4.1.0, the integrated compression support + /// was predicated entirely on a JSON String `d` value in the Unfold as implying it was UTF8 -> Deflate -> Base64 encoded + let ofUnfoldBody struct (enc, data: JsonElement): EncodedBody = + if enc = 0 && data.ValueKind = JsonValueKind.String then (deflateEncoding, data) else (enc, data) /// A single Domain Event from the array held in a Batch [] @@ -108,7 +109,7 @@ type Unfold = [] M: int } member x.ToTimelineEvent(): ITimelineEvent = - FsCodec.Core.TimelineEvent.Create(x.i, x.c, EncodedBody.parseUnfold (x.D, x.d), (x.M, x.m), Guid.Empty, null, null, x.t, isUnfold = true) + FsCodec.Core.TimelineEvent.Create(x.i, x.c, EncodedBody.ofUnfoldBody (x.D, x.d), (x.M, x.m), Guid.Empty, null, null, x.t, isUnfold = true) // Arrays are not indexed by default. 1. enable filtering by `c`ase 2. index uncompressed fields within unfolds for filtering static member internal IndexedPaths = [| "/u/[]/c/?"; "/u/[]/d/*" |] @@ -255,7 +256,7 @@ module Log = f log retryPolicy.Execute withLoggingContextWrapping - let internal eventLen (x: #IEventData<_>) = EncodedBody.jsonUtf8Bytes x.Data + EncodedBody.jsonUtf8Bytes x.Meta + 80 + let internal eventLen (x: #IEventData) = EncodedBody.jsonUtf8Bytes x.Data + EncodedBody.jsonUtf8Bytes x.Meta + 80 let internal batchLen = Seq.sumBy eventLen [] type Operation = Tip | Tip404 | Tip304 | Query | Index | Write | Resync | Conflict | Prune | Delete | Trim diff --git a/tests/Equinox.CosmosStore.Integration/AccessStrategies.fs b/tests/Equinox.CosmosStore.Integration/AccessStrategies.fs index 1fcfea895..d73a04786 100644 --- a/tests/Equinox.CosmosStore.Integration/AccessStrategies.fs +++ b/tests/Equinox.CosmosStore.Integration/AccessStrategies.fs @@ -42,9 +42,9 @@ module SequenceCheck = | Add of {| value : int |} interface TypeShape.UnionContract.IUnionContract #if STORE_DYNAMO - let codec = FsCodec.SystemTextJson.Codec.Create() |> FsCodec.Compression.EncodeTryCompress + let codec = FsCodec.SystemTextJson.Codec.Create() |> FsCodec.Encoder.Compressed #else - let codec = FsCodec.SystemTextJson.CodecJsonElement.Create() |> FsCodec.SystemTextJson.Encoding.EncodeTryCompress + let codec = FsCodec.SystemTextJson.CodecJsonElement.Create() |> FsCodec.SystemTextJson.Encoder.Compressed #endif module Fold = diff --git a/tests/Equinox.CosmosStore.Integration/CosmosCoreIntegration.fs b/tests/Equinox.CosmosStore.Integration/CosmosCoreIntegration.fs index deecfddd0..32da9daaa 100644 --- a/tests/Equinox.CosmosStore.Integration/CosmosCoreIntegration.fs +++ b/tests/Equinox.CosmosStore.Integration/CosmosCoreIntegration.fs @@ -10,7 +10,7 @@ open System type TestEvents() = static member private Create(i, ?eventType, ?json) = - let enc = System.Text.Json.JsonSerializer.SerializeToElement >> FsCodec.SystemTextJson.Encoding.FromJsonElement + let enc = System.Text.Json.JsonSerializer.SerializeToElement >> FsCodec.SystemTextJson.Encoding.OfJsonElement FsCodec.Core.EventData.Create ( sprintf "%s:%d" (defaultArg eventType "test_event") i, enc (defaultArg json "{\"d\":\"d\"}"), diff --git a/tests/Equinox.CosmosStore.Integration/DocumentStoreIntegration.fs b/tests/Equinox.CosmosStore.Integration/DocumentStoreIntegration.fs index 88682f3ea..739f84304 100644 --- a/tests/Equinox.CosmosStore.Integration/DocumentStoreIntegration.fs +++ b/tests/Equinox.CosmosStore.Integration/DocumentStoreIntegration.fs @@ -15,7 +15,7 @@ open Equinox.CosmosStore.Integration.CosmosFixtures module Cart = let fold, initial = Cart.Fold.fold, Cart.Fold.initial #if STORE_DYNAMO - let codec = Cart.Events.codec |> FsCodec.Compression.EncodeTryCompress + let codec = Cart.Events.codec |> FsCodec.Encoder.Compressed #else let codec = Cart.Events.codecJe #endif @@ -49,7 +49,7 @@ module ContactPreferences = let fold, initial = ContactPreferences.Fold.fold, ContactPreferences.Fold.initial module ClientId = let gen (): ContactPreferences.ClientId = Guid.gen () |> Guid.toStringN |> ContactPreferences.ClientId #if STORE_DYNAMO - let codec = ContactPreferences.Events.codec |> FsCodec.Compression.EncodeTryCompress + let codec = ContactPreferences.Events.codec |> FsCodec.Encoder.Compressed #else let codec = ContactPreferences.Events.codecJe #endif diff --git a/tests/Equinox.CosmosStore.Integration/FsCodecCompressionTests.fs b/tests/Equinox.CosmosStore.Integration/FsCodecCompressionTests.fs index 6c50476a0..4857c50f8 100644 --- a/tests/Equinox.CosmosStore.Integration/FsCodecCompressionTests.fs +++ b/tests/Equinox.CosmosStore.Integration/FsCodecCompressionTests.fs @@ -1,7 +1,7 @@ // Prior to version v 4.1.0, CosmosStore owned: // - compression of snapshots (and APIs controlling conditionally of that) // - inflation of snapshots -// This is now an external concern, fully implemented by APIs presented in FsCodec.SystemTextJson.Compression v 3.1.0 and later +// This is now an external concern, fully implemented by APIs presented in FsCodec.SystemTextJson.Encod* v 3.1.0 and later // These tests are a sanity check pinning the basic mechanisms that are now externalized; any more thorough tests should be maintained in FsCodec // NOTE there is no strong dependency on FsCodec; CosmosStore is happy to roundtrip arbitrary pairs of D/d and M/m values // NOTE prior to v 4.1.0, CosmosStore provided a System.Text.Json integration for Microsoft.Azure.Cosmos @@ -38,14 +38,14 @@ type CoreBehaviors() = [] let ``serializes, achieving expected compression`` () = - let encoded = eventCodec |> FsCodec.SystemTextJson.Encoding.EncodeTryCompress |> _.Encode((), A { embed = String('x',5000) }) + let encoded = eventCodec |> FsCodec.SystemTextJson.Encoder.Compressed |> _.Encode((), A { embed = String('x',5000) }) let res = ser encoded test <@ res.Contains "\"d\":\"" && res.Length < 138 && res.Contains "\"D\":2" @> let codec compress = - let forceCompression: FsCodec.SystemTextJson.CompressionOptions = { minSize = 0; minGain = -1000 } - if compress then FsCodec.SystemTextJson.Encoding.EncodeTryCompress(eventCodec, options = forceCompression) - else FsCodec.SystemTextJson.Encoding.EncodeUncompressed eventCodec + let forceCompression: FsCodec.CompressionOptions = { minSize = 0; minGain = -1000 } + if compress then FsCodec.SystemTextJson.Encoder.Compressed(eventCodec, options = forceCompression) + else FsCodec.SystemTextJson.Encoder.Uncompressed eventCodec [] let roundtrips compress value = diff --git a/tools/Equinox.Tool/Program.fs b/tools/Equinox.Tool/Program.fs index c37cc7b16..1a223b0eb 100644 --- a/tools/Equinox.Tool/Program.fs +++ b/tools/Equinox.Tool/Program.fs @@ -483,8 +483,6 @@ module CosmosQuery = open Equinox.CosmosStore.Linq.Internal open FSharp.Control let inline miB x = Equinox.CosmosStore.Linq.Internal.miB x - type System.Text.Json.JsonElement with - member x.Utf8ByteCount = if x.ValueKind = System.Text.Json.JsonValueKind.Null then 0 else x.GetRawText() |> System.Text.Encoding.UTF8.GetByteCount type System.Text.Json.JsonDocument with member x.Cast<'T>() = System.Text.Json.JsonSerializer.Deserialize<'T>(x.RootElement) member x.Timestamp = @@ -498,9 +496,9 @@ module CosmosQuery = | _ -> () let selectedFields = match a.Mode with - | Mode.Default -> "c._etag, c.p, c.u[0].d" - | Mode.SnapOnly -> "c.u[0].d" - | Mode.SnapWithStream -> "c.p, c.u[0].d" + | Mode.Default -> "c._etag, c.p, c.u[0].D, c.u[0].d" + | Mode.SnapOnly -> "c.u[0].D, c.u[0].d" + | Mode.SnapWithStream -> "c.p, c.u[0].D, c.u[0].d" | Mode.ReadOnly -> "c.u" // TOCONSIDER remove; adjust TryLoad/TryHydrateTip | Mode.ReadWithStream -> "c.p, c.u" // TOCONSIDER remove; adjust TryLoad/TryHydrateTip | Mode.Raw -> "*" @@ -576,7 +574,7 @@ module CosmosTop = scratch.Position let inflatedUtf8Size x = scratch.Position <- 0L - FsCodec.SystemTextJson.Encoding.ExpandTo(scratch, x) + FsCodec.SystemTextJson.Encoding.ToStream(scratch, x) scratch.Position let infSize dataField formatField (x: JsonElement) = match x.TryProp dataField, x.TryProp formatField with From fb963b6ed916a12a6148444c2b83d3b014f77246 Mon Sep 17 00:00:00 2001 From: Ruben Bartelink Date: Sat, 4 Jan 2025 20:21:23 +0000 Subject: [PATCH 56/56] Release 4.1.0-alpha.20 --- samples/Store/Domain/Domain.fsproj | 2 +- samples/Tutorial/Favorites.fsx | 2 +- samples/Tutorial/Tutorial.fsproj | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/Store/Domain/Domain.fsproj b/samples/Store/Domain/Domain.fsproj index c2a0ad5e1..cd2687392 100644 --- a/samples/Store/Domain/Domain.fsproj +++ b/samples/Store/Domain/Domain.fsproj @@ -19,7 +19,7 @@ - + diff --git a/samples/Tutorial/Favorites.fsx b/samples/Tutorial/Favorites.fsx index 3702b6bf9..83315e1e0 100644 --- a/samples/Tutorial/Favorites.fsx +++ b/samples/Tutorial/Favorites.fsx @@ -74,7 +74,7 @@ let favesCa = fold favesCba removeBEffect let _removeBAgainEffect = Decisions.remove "b" favesCa //val _removeBAgainEffect : Event list = [] -// related streams are termed a Category; Each client will have it's own Stream. +// related streams are termed a Category; Each client will have its own Stream. let [] private CategoryName = "Favorites" let clientAFavoritesStreamId = FsCodec.StreamId.gen id "ClientA" diff --git a/samples/Tutorial/Tutorial.fsproj b/samples/Tutorial/Tutorial.fsproj index 71158912c..2f5b35d35 100644 --- a/samples/Tutorial/Tutorial.fsproj +++ b/samples/Tutorial/Tutorial.fsproj @@ -28,7 +28,7 @@ - +