Skip to content

Commit

Permalink
include all the original headers + add comments
Browse files Browse the repository at this point in the history
  • Loading branch information
paulineribeyre committed Dec 12, 2024
1 parent 7fd649c commit 64a4048
Showing 1 changed file with 24 additions and 22 deletions.
46 changes: 24 additions & 22 deletions gen3workflow/routes/s3.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from datetime import datetime, timezone
import hashlib
import urllib.parse

Expand Down Expand Up @@ -82,6 +81,7 @@ async def s3_endpoint(path: str, request: Request):
user_id = token_claims.get("sub")
user_bucket = aws_utils.get_safe_name_from_user_id(user_id)

# ensure the user is making a call to their own bucket
request_bucket = path.split("?")[0].split("/")[0]
if request_bucket != user_bucket:
err_msg = f"'{path}' not allowed. You can make calls to your personal bucket, '{user_bucket}'"
Expand All @@ -101,34 +101,39 @@ async def s3_endpoint(path: str, request: Request):
request_path = path.split(user_bucket)[1]
api_endpoint = "/".join(request_path.split("/")[1:])

# generate the request headers
# headers = dict(request.headers)
# headers.pop("authorization")
headers = {}
# TODO try again to include all the headers
# `x-amz-content-sha256` is sometimes set to "STREAMING-AWS4-HMAC-SHA256-PAYLOAD" in
# the original request, but i was not able to get the signing working when copying it.
# if "Content-Type" in request.headers:
# headers["content-type"] = request.headers["Content-Type"]
headers["host"] = f"{user_bucket}.s3.amazonaws.com"
body = await request.body()
body_hash = hashlib.sha256(body).hexdigest()
timestamp = request.headers["x-amz-date"]
date = timestamp[:8] # the date portion (YYYYMMDD) of the timestamp
region = config["USER_BUCKETS_REGION"]
service = "s3"

# generate the request headers:
# - first, copy all the headers from the original request.
headers = dict(request.headers)
# - remove the `authorization` header: it contains a Gen3 token instead of an AWS IAM key.
# The new `authorization` header will be added _after_ generating the signature.
headers.pop("authorization")
# - overwrite the `x-amz-content-sha256` header value with the body hash. When this header is
# set to "STREAMING-AWS4-HMAC-SHA256-PAYLOAD" in the original request (payload sent over
# multiple chunks), we replace it with the body hash (because I couldn't get the signing to
# work for "STREAMING-AWS4-HMAC-SHA256-PAYLOAD" - I believe it requires using the signature
# from the previous chunk).
# NOTE: This may cause issues when large files are _actually_ uploaded over multiple chunks.
headers["x-amz-content-sha256"] = body_hash
# headers['x-amz-content-sha256'] = request.headers['x-amz-content-sha256']
# if 'content-length' in request.headers:
# headers['content-length'] = request.headers['content-length']
# if 'x-amz-decoded-content-length' in request.headers:
# headers['x-amz-decoded-content-length'] = request.headers['x-amz-decoded-content-length']
timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
headers["x-amz-date"] = timestamp
# - remove the `content-md5` header: when the `x-amz-content-sha256` header is overwritten (see
# above), the original `content-md5` value becomes incorrect. It's not required in V4 signing.
headers.pop("content-md5", None)
# - replace the `host` header, since we are re-signing and sending to a different host.
headers["host"] = f"{user_bucket}.s3.amazonaws.com"

# get AWS credentials from the configuration or the current assumed role session
if config["S3_ENDPOINTS_AWS_ACCESS_KEY_ID"]:
credentials = Credentials(
access_key=config["S3_ENDPOINTS_AWS_ACCESS_KEY_ID"],
secret_key=config["S3_ENDPOINTS_AWS_SECRET_ACCESS_KEY"],
)
else: # running in k8s: get credentials from the assumed role
else: # assume the service is running in k8s: get credentials from the assumed role
session = boto3.Session()
credentials = session.get_credentials()
assert credentials, "No AWS credentials found"
Expand Down Expand Up @@ -157,9 +162,6 @@ async def s3_endpoint(path: str, request: Request):
)

# construct the string to sign based on the canonical request
date = timestamp[:8] # the date portion (YYYYMMDD) of the timestamp
region = config["USER_BUCKETS_REGION"]
service = "s3"
string_to_sign = (
f"AWS4-HMAC-SHA256\n"
f"{timestamp}\n"
Expand Down

0 comments on commit 64a4048

Please sign in to comment.