Skip to content

Commit

Permalink
pick-6.5: Support adaptive update interval for low resolution ts (#1484
Browse files Browse the repository at this point in the history
…) (#1531)

 

Signed-off-by: MyonKeminta <[email protected]>
Signed-off-by: ekexium <[email protected]>

Co-authored-by: MyonKeminta <[email protected]>
  • Loading branch information
ekexium and MyonKeminta authored Jan 9, 2025
1 parent 5f59e4e commit ccec7ef
Show file tree
Hide file tree
Showing 7 changed files with 792 additions and 89 deletions.
136 changes: 78 additions & 58 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,64 +41,66 @@ import (

// Client metrics.
var (
TiKVTxnCmdHistogram *prometheus.HistogramVec
TiKVBackoffHistogram *prometheus.HistogramVec
TiKVSendReqHistogram *prometheus.HistogramVec
TiKVSendReqCounter *prometheus.CounterVec
TiKVSendReqTimeCounter *prometheus.CounterVec
TiKVRPCNetLatencyHistogram *prometheus.HistogramVec
TiKVCoprocessorHistogram *prometheus.HistogramVec
TiKVLockResolverCounter *prometheus.CounterVec
TiKVRegionErrorCounter *prometheus.CounterVec
TiKVTxnWriteKVCountHistogram prometheus.Histogram
TiKVTxnWriteSizeHistogram prometheus.Histogram
TiKVRawkvCmdHistogram *prometheus.HistogramVec
TiKVRawkvSizeHistogram *prometheus.HistogramVec
TiKVTxnRegionsNumHistogram *prometheus.HistogramVec
TiKVLoadSafepointCounter *prometheus.CounterVec
TiKVSecondaryLockCleanupFailureCounter *prometheus.CounterVec
TiKVRegionCacheCounter *prometheus.CounterVec
TiKVLoadRegionCacheHistogram *prometheus.HistogramVec
TiKVLocalLatchWaitTimeHistogram prometheus.Histogram
TiKVStatusDuration *prometheus.HistogramVec
TiKVStatusCounter *prometheus.CounterVec
TiKVBatchWaitDuration prometheus.Histogram
TiKVBatchSendLatency prometheus.Histogram
TiKVBatchWaitOverLoad prometheus.Counter
TiKVBatchPendingRequests *prometheus.HistogramVec
TiKVBatchRequests *prometheus.HistogramVec
TiKVBatchClientUnavailable prometheus.Histogram
TiKVBatchClientWaitEstablish prometheus.Histogram
TiKVBatchClientRecycle prometheus.Histogram
TiKVBatchRecvLatency *prometheus.HistogramVec
TiKVRangeTaskStats *prometheus.GaugeVec
TiKVRangeTaskPushDuration *prometheus.HistogramVec
TiKVTokenWaitDuration prometheus.Histogram
TiKVTxnHeartBeatHistogram *prometheus.HistogramVec
TiKVPessimisticLockKeysDuration prometheus.Histogram
TiKVTTLLifeTimeReachCounter prometheus.Counter
TiKVNoAvailableConnectionCounter prometheus.Counter
TiKVTwoPCTxnCounter *prometheus.CounterVec
TiKVAsyncCommitTxnCounter *prometheus.CounterVec
TiKVOnePCTxnCounter *prometheus.CounterVec
TiKVStoreLimitErrorCounter *prometheus.CounterVec
TiKVGRPCConnTransientFailureCounter *prometheus.CounterVec
TiKVPanicCounter *prometheus.CounterVec
TiKVForwardRequestCounter *prometheus.CounterVec
TiKVTSFutureWaitDuration prometheus.Histogram
TiKVSafeTSUpdateCounter *prometheus.CounterVec
TiKVMinSafeTSGapSeconds *prometheus.GaugeVec
TiKVReplicaSelectorFailureCounter *prometheus.CounterVec
TiKVRequestRetryTimesHistogram prometheus.Histogram
TiKVTxnCommitBackoffSeconds prometheus.Histogram
TiKVTxnCommitBackoffCount prometheus.Histogram
TiKVSmallReadDuration prometheus.Histogram
TiKVReadThroughput prometheus.Histogram
TiKVUnsafeDestroyRangeFailuresCounterVec *prometheus.CounterVec
TiKVPrewriteAssertionUsageCounter *prometheus.CounterVec
TiKVStaleReadCounter *prometheus.CounterVec
TiKVStaleReadReqCounter *prometheus.CounterVec
TiKVStaleReadBytes *prometheus.CounterVec
TiKVTxnCmdHistogram *prometheus.HistogramVec
TiKVBackoffHistogram *prometheus.HistogramVec
TiKVSendReqHistogram *prometheus.HistogramVec
TiKVSendReqCounter *prometheus.CounterVec
TiKVSendReqTimeCounter *prometheus.CounterVec
TiKVRPCNetLatencyHistogram *prometheus.HistogramVec
TiKVCoprocessorHistogram *prometheus.HistogramVec
TiKVLockResolverCounter *prometheus.CounterVec
TiKVRegionErrorCounter *prometheus.CounterVec
TiKVTxnWriteKVCountHistogram prometheus.Histogram
TiKVTxnWriteSizeHistogram prometheus.Histogram
TiKVRawkvCmdHistogram *prometheus.HistogramVec
TiKVRawkvSizeHistogram *prometheus.HistogramVec
TiKVTxnRegionsNumHistogram *prometheus.HistogramVec
TiKVLoadSafepointCounter *prometheus.CounterVec
TiKVSecondaryLockCleanupFailureCounter *prometheus.CounterVec
TiKVRegionCacheCounter *prometheus.CounterVec
TiKVLoadRegionCacheHistogram *prometheus.HistogramVec
TiKVLocalLatchWaitTimeHistogram prometheus.Histogram
TiKVStatusDuration *prometheus.HistogramVec
TiKVStatusCounter *prometheus.CounterVec
TiKVBatchWaitDuration prometheus.Histogram
TiKVBatchSendLatency prometheus.Histogram
TiKVBatchWaitOverLoad prometheus.Counter
TiKVBatchPendingRequests *prometheus.HistogramVec
TiKVBatchRequests *prometheus.HistogramVec
TiKVBatchClientUnavailable prometheus.Histogram
TiKVBatchClientWaitEstablish prometheus.Histogram
TiKVBatchClientRecycle prometheus.Histogram
TiKVBatchRecvLatency *prometheus.HistogramVec
TiKVRangeTaskStats *prometheus.GaugeVec
TiKVRangeTaskPushDuration *prometheus.HistogramVec
TiKVTokenWaitDuration prometheus.Histogram
TiKVTxnHeartBeatHistogram *prometheus.HistogramVec
TiKVPessimisticLockKeysDuration prometheus.Histogram
TiKVTTLLifeTimeReachCounter prometheus.Counter
TiKVNoAvailableConnectionCounter prometheus.Counter
TiKVTwoPCTxnCounter *prometheus.CounterVec
TiKVAsyncCommitTxnCounter *prometheus.CounterVec
TiKVOnePCTxnCounter *prometheus.CounterVec
TiKVStoreLimitErrorCounter *prometheus.CounterVec
TiKVGRPCConnTransientFailureCounter *prometheus.CounterVec
TiKVPanicCounter *prometheus.CounterVec
TiKVForwardRequestCounter *prometheus.CounterVec
TiKVTSFutureWaitDuration prometheus.Histogram
TiKVSafeTSUpdateCounter *prometheus.CounterVec
TiKVMinSafeTSGapSeconds *prometheus.GaugeVec
TiKVReplicaSelectorFailureCounter *prometheus.CounterVec
TiKVRequestRetryTimesHistogram prometheus.Histogram
TiKVTxnCommitBackoffSeconds prometheus.Histogram
TiKVTxnCommitBackoffCount prometheus.Histogram
TiKVSmallReadDuration prometheus.Histogram
TiKVReadThroughput prometheus.Histogram
TiKVUnsafeDestroyRangeFailuresCounterVec *prometheus.CounterVec
TiKVPrewriteAssertionUsageCounter *prometheus.CounterVec
TiKVStaleReadCounter *prometheus.CounterVec
TiKVStaleReadReqCounter *prometheus.CounterVec
TiKVStaleReadBytes *prometheus.CounterVec
TiKVValidateReadTSFromPDCount prometheus.Counter
TiKVLowResolutionTSOUpdateIntervalSecondsGauge prometheus.Gauge
)

// Label constants.
Expand Down Expand Up @@ -617,6 +619,22 @@ func initMetrics(namespace, subsystem string) {
Help: "Counter of stale read requests bytes",
}, []string{LblResult, LblDirection})

TiKVValidateReadTSFromPDCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "validate_read_ts_from_pd_count",
Help: "Counter of validating read ts by getting a timestamp from PD",
})

TiKVLowResolutionTSOUpdateIntervalSecondsGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "low_resolution_tso_update_interval_seconds",
Help: "The actual working update interval for the low resolution TSO. As there are adaptive mechanism internally, this value may differ from the config.",
})

initShortcuts()
}

Expand Down Expand Up @@ -690,6 +708,8 @@ func RegisterMetrics() {
prometheus.MustRegister(TiKVStaleReadCounter)
prometheus.MustRegister(TiKVStaleReadReqCounter)
prometheus.MustRegister(TiKVStaleReadBytes)
prometheus.MustRegister(TiKVValidateReadTSFromPDCount)
prometheus.MustRegister(TiKVLowResolutionTSOUpdateIntervalSecondsGauge)
}

// readCounter reads the value of a prometheus.Counter.
Expand Down
13 changes: 13 additions & 0 deletions oracle/oracle.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,26 @@ type Oracle interface {
GetTimestampAsync(ctx context.Context, opt *Option) Future
GetLowResolutionTimestamp(ctx context.Context, opt *Option) (uint64, error)
GetLowResolutionTimestampAsync(ctx context.Context, opt *Option) Future
// GetStaleTimestamp generates a timestamp based on the recently fetched timestamp and the elapsed time since
// when that timestamp was fetched. The result is expected to be about `prevSecond` seconds before the current
// time.
// WARNING: This method does not guarantee whether the generated timestamp is legal for accessing the data.
// Neither is it safe to use it for verifying the legality of another calculated timestamp.
// Be sure to validate the timestamp before using it to access the data.
GetStaleTimestamp(ctx context.Context, txnScope string, prevSecond uint64) (uint64, error)
IsExpired(lockTimestamp, TTL uint64, opt *Option) bool
UntilExpired(lockTimeStamp, TTL uint64, opt *Option) int64
Close()

GetExternalTimestamp(ctx context.Context) (uint64, error)
SetExternalTimestamp(ctx context.Context, ts uint64) error

// ValidateSnapshotReadTS verifies whether it can be guaranteed that the given readTS doesn't exceed the maximum ts
// that has been allocated by the oracle, so that it's safe to use this ts to perform snapshot read, stale read,
// etc.
// Note that this method only checks the ts from the oracle's perspective. It doesn't check whether the snapshot
// has been GCed.
ValidateSnapshotReadTS(ctx context.Context, readTS uint64, opt *Option) error
}

// Future is a future which promises to return a timestamp.
Expand Down
12 changes: 12 additions & 0 deletions oracle/oracles/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (
"sync"
"time"

"github.com/pingcap/errors"
"github.com/tikv/client-go/v2/oracle"
)

Expand Down Expand Up @@ -134,3 +135,14 @@ func (l *localOracle) SetExternalTimestamp(ctx context.Context, newTimestamp uin
func (l *localOracle) GetExternalTimestamp(ctx context.Context) (uint64, error) {
return l.getExternalTimestamp(ctx)
}

func (l *localOracle) ValidateSnapshotReadTS(ctx context.Context, readTS uint64, opt *oracle.Option) error {
currentTS, err := l.GetTimestamp(ctx, opt)
if err != nil {
return errors.Errorf("fail to validate read timestamp: %v", err)
}
if currentTS < readTS {
return errors.Errorf("cannot set read timestamp to a future time")
}
return nil
}
12 changes: 12 additions & 0 deletions oracle/oracles/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,18 @@ func (o *MockOracle) GetLowResolutionTimestampAsync(ctx context.Context, opt *or
return o.GetTimestampAsync(ctx, opt)
}

// ValidateSnapshotReadTS implements oracle.Oracle interface.
func (o *MockOracle) ValidateSnapshotReadTS(ctx context.Context, readTS uint64, opt *oracle.Option) error {
currentTS, err := o.GetTimestamp(ctx, opt)
if err != nil {
return errors.Errorf("fail to validate read timestamp: %v", err)
}
if currentTS < readTS {
return errors.Errorf("cannot set read timestamp to a future time")
}
return nil
}

// IsExpired implements oracle.Oracle interface.
func (o *MockOracle) IsExpired(lockTimestamp, TTL uint64, _ *oracle.Option) bool {
o.RLock()
Expand Down
Loading

0 comments on commit ccec7ef

Please sign in to comment.