Skip to content

Commit

Permalink
add grpc health methods to runtime.
Browse files Browse the repository at this point in the history
This adds the standard grpc_health_v1 service to the iamruntime package.
This provides support for checking health on the runtime ensuring the runtime is healthy before use.

BEHAVIOR CHANGE:

With the introduction of the health check service, NewClient has been updated to block, waiting for the service to report healthy.
NewClient will block for up to 10 seconds before reporting that the runtime is not healthy.

This behavior change ensures clients are making requests against a healthy runtime.
To disable this new behavior `NewClient` must be updated to `NewClientWithoutWait`.
Alternatively setting the environment variable `IAMRUNTIME_NEW_CLIENT_WAIT_TIMEOUT` to 0 will disable this new behavior.

Signed-off-by: Mike Mason <[email protected]>
  • Loading branch information
mikemrm committed Jan 7, 2025
1 parent 1d1ecec commit 5528b51
Show file tree
Hide file tree
Showing 5 changed files with 531 additions and 4 deletions.
3 changes: 3 additions & 0 deletions iamruntime/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,7 @@ var (

// ErrAccessTokenInvalid is the error returned when an access token returned is not valid.
ErrAccessTokenInvalid = fmt.Errorf("%w: invalid access token", IdentityError)

// ErrNotReady is returned when an individual health check is not ready.
ErrNotReady = fmt.Errorf("%w: runtime not ready", Error)
)
91 changes: 91 additions & 0 deletions iamruntime/health.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package iamruntime

import (
"context"
"fmt"
"time"

"google.golang.org/grpc"
"google.golang.org/grpc/codes"
health "google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/grpc/status"
)

// HealthyRuntime extends [Runtime] adding grpc Health Client.
type HealthyRuntime interface {
Runtime

// HealthCheck calls the health service Check call.
HealthCheck(ctx context.Context, in *health.HealthCheckRequest, opts ...grpc.CallOption) (*health.HealthCheckResponse, error)

// HealthWatch calls the health service Watch call.
HealthWatch(ctx context.Context, in *health.HealthCheckRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[health.HealthCheckResponse], error)

// WaitHealthy calls the health service check waiting for a SERVING status.
// If the backend returns an unimplemented status code, no error is returned.
WaitHealthy(ctx context.Context, in *health.HealthCheckRequest, opts ...grpc.CallOption) error

// WaitHealthyWithTimeout calls WaitHealthy with a timeout.
// [ErrHealthCheckTimedout] is returned if a healthy response is not received within the provided timeout.
WaitHealthyWithTimeout(ctx context.Context, timeout time.Duration, in *health.HealthCheckRequest, opts ...grpc.CallOption) error
}

// HealthCheck calls the health service Check call.
func (r *runtime) HealthCheck(ctx context.Context, in *health.HealthCheckRequest, opts ...grpc.CallOption) (*health.HealthCheckResponse, error) {
return r.HealthClient.Check(ctx, in, opts...)
}

// HealthWatch calls the health service Watch call.
func (r *runtime) HealthWatch(ctx context.Context, in *health.HealthCheckRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[health.HealthCheckResponse], error) {
return r.HealthClient.Watch(ctx, in, opts...)
}

// healthy returns true when a successful serving response is received from the runtime.
func (r *runtime) healthy(ctx context.Context, in *health.HealthCheckRequest, opts ...grpc.CallOption) error {
resp, err := r.HealthCheck(ctx, in, opts...)
if err != nil {
if status.Code(err) == codes.Unimplemented {
return nil
}

return fmt.Errorf("%w: health check error: %w", ErrNotReady, err)
}

if resp.Status == health.HealthCheckResponse_SERVING {
return nil
}

return fmt.Errorf("%w: %s", ErrNotReady, resp.Status)
}

// WaitHealthy calls the health service check waiting for a SERVING status.
// If the backend returns an unimplemented status code, no error is returned.
func (r *runtime) WaitHealthy(ctx context.Context, in *health.HealthCheckRequest, opts ...grpc.CallOption) error {
ticker := time.NewTicker(r.healthyInterval)
defer ticker.Stop()

err := r.healthy(ctx, in, opts...)
if err == nil {
return nil
}

for {
select {
case <-ctx.Done():
return fmt.Errorf("%w: %w", err, ctx.Err())
case <-ticker.C:
err = r.healthy(ctx, in, opts...)
if err == nil {
return nil
}
}
}
}

// WaitHealthyWithTimeout calls WaitHealthy with a timeout.
func (r *runtime) WaitHealthyWithTimeout(ctx context.Context, timeout time.Duration, in *health.HealthCheckRequest, opts ...grpc.CallOption) error {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

return r.WaitHealthy(ctx, in, opts...)
}
Loading

0 comments on commit 5528b51

Please sign in to comment.