Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support remote write 2.0 on receive #8033

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 127 additions & 3 deletions pkg/receive/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ import (
"github.com/thanos-io/thanos/pkg/store/labelpb"
"github.com/thanos-io/thanos/pkg/store/storepb"
"github.com/thanos-io/thanos/pkg/store/storepb/prompb"
"github.com/thanos-io/thanos/pkg/store/storepb/writev2pb"
"github.com/thanos-io/thanos/pkg/tenancy"
"github.com/thanos-io/thanos/pkg/tracing"
)
Expand Down Expand Up @@ -91,6 +92,7 @@ var (
type WriteableStoreAsyncClient interface {
storepb.WriteableStoreClient
RemoteWriteAsync(context.Context, *storepb.WriteRequest, endpointReplica, []int, chan writeResponse, func(error))
RemoteWriteAsyncV2(context.Context, *storepb.WriteRequestV2, endpointReplica, []int, chan writeResponse, func(error))
}

// Options for the web Handler.
Expand Down Expand Up @@ -474,6 +476,31 @@ func newWriteResponse(seriesIDs []int, err error, er endpointReplica) writeRespo
}
}

func parseProtoMsg(contentType string) (WriteProtoFullName, error) {
contentType = strings.TrimSpace(contentType)

parts := strings.Split(contentType, ";")
if parts[0] != appProtoContentType {
return "", fmt.Errorf("expected %v as the first (media) part, got %v content-type", appProtoContentType, contentType)
}
// Parse potential https://www.rfc-editor.org/rfc/rfc9110#parameter
for _, p := range parts[1:] {
pair := strings.Split(p, "=")
if len(pair) != 2 {
return "", fmt.Errorf("as per https://www.rfc-editor.org/rfc/rfc9110#parameter expected parameters to be key-values, got %v in %v content-type", p, contentType)
}
if pair[0] == "proto" {
ret := WriteProtoFullName(pair[1])
if err := ret.Validate(); err != nil {
return "", fmt.Errorf("got %v content type; %w", contentType, err)
}
return ret, nil
}
}
// No "proto=" parameter, assuming v1.
return WriteProtoFullNameV1, nil
}

func (h *Handler) receiveHTTP(w http.ResponseWriter, r *http.Request) {
var err error
span, ctx := tracing.StartSpan(r.Context(), "receive_http")
Expand Down Expand Up @@ -512,6 +539,42 @@ func (h *Handler) receiveHTTP(w http.ResponseWriter, r *http.Request) {
return
}

contentType := r.Header.Get("Content-Type")
if contentType == "" {
// Don't break yolo 1.0 clients if not needed.
// We could give http.StatusUnsupportedMediaType, but let's assume 1.0 message by default.
contentType = appProtoContentType
}

msgType, err := parseProtoMsg(contentType)
if err != nil {
level.Error(tLogger).Log("msg", "Error decoding remote write request", "err", err)
http.Error(w, err.Error(), http.StatusUnsupportedMediaType)
return
}

enc := r.Header.Get("Content-Encoding")
if enc == "" {
// Don't break yolo 1.0 clients if not needed. This is similar to what we did
// before 2.0: https://github.com/prometheus/prometheus/blob/d78253319daa62c8f28ed47e40bafcad2dd8b586/storage/remote/write_handler.go#L62
// We could give http.StatusUnsupportedMediaType, but let's assume snappy by default.
} else if enc != string(SnappyBlockCompression) {
err := fmt.Errorf("%v encoding (compression) is not accepted by this server; only %v is acceptable", enc, SnappyBlockCompression)
level.Error(tLogger).Log("msg", "Error decoding remote write request", "err", err)
http.Error(w, err.Error(), http.StatusUnsupportedMediaType)
}

switch msgType {
case WriteProtoFullNameV1:
h.storeV1(ctx, tLogger, w, r, tenantHTTP)
case WriteProtoFullNameV2:
h.storeV2(ctx, tLogger, w, r, tenantHTTP)
default:
}
}

func (h *Handler) storeV1(ctx context.Context, tLogger log.Logger, w http.ResponseWriter, r *http.Request, tenantHTTP string) {
var err error
requestLimiter := h.Limiter.RequestLimiter()
// io.ReadAll dynamically adjust the byte slice for read data, starting from 512B.
// Since this is receive hot path, grow upfront saving allocations and CPU time.
Expand Down Expand Up @@ -621,8 +684,10 @@ func (h *Handler) receiveHTTP(w http.ResponseWriter, r *http.Request) {
}

type requestStats struct {
timeseries int
totalSamples int
timeseries int
totalSamples int
totalExemplars int
totalHistograms int
}

type tenantRequestStats map[string]requestStats
Expand Down Expand Up @@ -865,7 +930,7 @@ func (h *Handler) distributeTimeseriesToReplicas(
}

for _, rn := range replicas {
endpoint, err := h.hashring.GetN(tenant, &ts, rn)
endpoint, err := h.hashring.GetN(tenant, ts.Labels, rn)
if err != nil {
return nil, nil, err
}
Expand Down Expand Up @@ -1059,6 +1124,36 @@ func (h *Handler) RemoteWrite(ctx context.Context, r *storepb.WriteRequest) (*st
}
}

// RemoteWriteV2 implements the gRPC remote write handler for storepb.WriteableStore.
func (h *Handler) RemoteWriteV2(ctx context.Context, r *storepb.WriteRequestV2) (*storepb.WriteResponse, error) {
span, ctx := tracing.StartSpan(ctx, "receive_grpc")
defer span.Finish()

wreq := writev2pb.Request{
Timeseries: r.Timeseries,
Symbols: r.Symbols,
}

_, err := h.handleRequestV2(ctx, h.logger, uint64(r.Replica), &wreq, r.Tenant)
if err != nil {
level.Debug(h.logger).Log("msg", "failed to handle request", "err", err)
}
switch errors.Cause(err) {
case nil:
return &storepb.WriteResponse{}, nil
case errNotReady:
return nil, status.Error(codes.Unavailable, err.Error())
case errUnavailable:
return nil, status.Error(codes.Unavailable, err.Error())
case errConflict:
return nil, status.Error(codes.AlreadyExists, err.Error())
case errBadReplica:
return nil, status.Error(codes.InvalidArgument, err.Error())
default:
return nil, status.Error(codes.Internal, err.Error())
}
}

// relabel relabels the time series labels in the remote write request.
func (h *Handler) relabel(wreq *prompb.WriteRequest) {
if len(h.options.RelabelConfigs) == 0 {
Expand Down Expand Up @@ -1330,6 +1425,10 @@ func (pw *peerWorker) RemoteWrite(ctx context.Context, in *storepb.WriteRequest,
return pw.client.RemoteWrite(ctx, in)
}

func (pw *peerWorker) RemoteWriteV2(ctx context.Context, in *storepb.WriteRequestV2, opts ...grpc.CallOption) (*storepb.WriteResponse, error) {
return pw.client.RemoteWriteV2(ctx, in)
}

type peerClient interface {
storepb.WriteableStoreClient
io.Closer
Expand Down Expand Up @@ -1414,6 +1513,31 @@ func (p *peerWorker) RemoteWriteAsync(ctx context.Context, req *storepb.WriteReq
})
}

func (p *peerWorker) RemoteWriteAsyncV2(ctx context.Context, req *storepb.WriteRequestV2, er endpointReplica, seriesIDs []int, responseWriter chan writeResponse, cb func(error)) {
now := time.Now()
p.wp.Go(func() {
p.forwardDelay.Observe(time.Since(now).Seconds())

tracing.DoInSpan(ctx, "receive_forward", func(ctx context.Context) {
_, err := p.client.RemoteWriteV2(ctx, req)
responseWriter <- newWriteResponse(
seriesIDs,
errors.Wrapf(err, "forwarding request to endpoint %v", er.endpoint),
er,
)
if err != nil {
sp := trace.SpanFromContext(ctx)
sp.SetAttributes(attribute.Bool("error", true))
sp.SetAttributes(attribute.String("error.msg", err.Error()))
}
cb(err)
}, opentracing.Tags{
"endpoint": er.endpoint,
"replica": er.replica,
})
})
}

type peerGroup struct {
logger log.Logger
dialOpts []grpc.DialOption
Expand Down
Loading
Loading