diff --git a/dandiapi/api/models/upload.py b/dandiapi/api/models/upload.py index 5479e593e..a8f687710 100644 --- a/dandiapi/api/models/upload.py +++ b/dandiapi/api/models/upload.py @@ -51,7 +51,11 @@ def initialize_multipart_upload(cls, etag, size, dandiset: Dandiset): upload_id = uuid4() object_key = cls.object_key(upload_id, dandiset=dandiset) multipart_initialization = cls.blob.field.storage.multipart_manager.initialize_upload( - object_key, size + object_key, + size, + # The upload HTTP API does not pass the file name or content type, and it would be a + # breaking change to start requiring this. + 'application/octet-stream', ) upload = cls( diff --git a/dandiapi/api/storage.py b/dandiapi/api/storage.py index 906d6feb7..def6eb7d0 100644 --- a/dandiapi/api/storage.py +++ b/dandiapi/api/storage.py @@ -12,11 +12,11 @@ from dandischema.digests.dandietag import PartGenerator from django.conf import settings from django.core.files.storage import Storage, get_storage_class -from minio.error import NoSuchKey +from minio import S3Error from minio_storage.policy import Policy from minio_storage.storage import MinioStorage, create_minio_client_from_settings -from s3_file_field._multipart_boto3 import Boto3MultipartManager from s3_file_field._multipart_minio import MinioMultipartManager +from s3_file_field._multipart_s3 import S3MultipartManager from storages.backends.s3 import S3Storage @@ -44,36 +44,30 @@ def _iter_part_sizes(file_size: int) -> Iterator[tuple[int, int]]: _url_expiration = timedelta(days=7) -class DandiBoto3MultipartManager(DandiMultipartMixin, Boto3MultipartManager): +class DandiS3MultipartManager(DandiMultipartMixin, S3MultipartManager): """A custom multipart manager for passing ACL information.""" - def _create_upload_id(self, object_key: str, content_type: str | None = None) -> str: - kwargs = { - 'Bucket': self._bucket_name, - 'Key': object_key, - 'ACL': 'bucket-owner-full-control', - } - - if content_type is not None: - kwargs['Content-Type'] = content_type - - resp = self._client.create_multipart_upload(**kwargs) + def _create_upload_id(self, object_key: str, content_type: str) -> str: + resp = self._client.create_multipart_upload( + Bucket=self._bucket_name, + Key=object_key, + ContentType=content_type, + ACL='bucket-owner-full-control', + ) return resp['UploadId'] class DandiMinioMultipartManager(DandiMultipartMixin, MinioMultipartManager): """A custom multipart manager for passing ACL information.""" - def _create_upload_id(self, object_key: str, content_type: str | None = None) -> str: - metadata = {'x-amz-acl': 'bucket-owner-full-control'} - - if content_type is not None: - metadata['Content-Type'] = content_type - - return self._client._new_multipart_upload( + def _create_upload_id(self, object_key: str, content_type: str) -> str: + return self._client._create_multipart_upload( bucket_name=self._bucket_name, object_name=object_key, - metadata=metadata, + headers={ + 'Content-Type': content_type, + 'x-amz-acl': 'bucket-owner-full-control', + }, ) @@ -124,7 +118,7 @@ def __init__(self, **settings): class VerbatimNameS3Storage(VerbatimNameStorageMixin, TimeoutS3Storage): @property def multipart_manager(self): - return DandiBoto3MultipartManager(self) + return DandiS3MultipartManager(self) def etag_from_blob_name(self, blob_name) -> str | None: client = self.connection.meta.client @@ -197,8 +191,11 @@ def multipart_manager(self): def etag_from_blob_name(self, blob_name) -> str | None: try: response = self.client.stat_object(self.bucket_name, blob_name) - except NoSuchKey: - return None + except S3Error as e: + if e.code == 'NoSuchKey': + return None + else: + raise else: return response.etag @@ -215,7 +212,7 @@ def generate_presigned_put_object_url(self, blob_name: str, _: str) -> str: ) def generate_presigned_head_object_url(self, key: str) -> str: - return self.base_url_client.presigned_url('HEAD', self.bucket_name, key) + return self.base_url_client.get_presigned_url('HEAD', self.bucket_name, key) def generate_presigned_download_url(self, key: str, path: str) -> str: return self.base_url_client.presigned_get_object( @@ -301,11 +298,12 @@ def get_boto_client(storage: Storage | None = None): """Return an s3 client from the current storage.""" storage = storage if storage else get_storage() if isinstance(storage, MinioStorage): + storage_params = get_storage_params(storage) return boto3.client( 's3', - endpoint_url=storage.client._endpoint_url, - aws_access_key_id=storage.client._access_key, - aws_secret_access_key=storage.client._secret_key, + endpoint_url=storage_params['endpoint_url'], + aws_access_key_id=storage_params['access_key'], + aws_secret_access_key=storage_params['secret_key'], region_name='us-east-1', ) @@ -315,9 +313,9 @@ def get_boto_client(storage: Storage | None = None): def get_storage_params(storage: Storage): if isinstance(storage, MinioStorage): return { - 'endpoint_url': storage.client._endpoint_url, - 'access_key': storage.client._access_key, - 'secret_key': storage.client._secret_key, + 'endpoint_url': storage.client._base_url._url.geturl(), + 'access_key': storage.client._provider.retrieve().access_key, + 'secret_key': storage.client._provider.retrieve().secret_key, } return { diff --git a/dandiapi/conftest.py b/dandiapi/conftest.py index b6dfe2978..24bfe096f 100644 --- a/dandiapi/conftest.py +++ b/dandiapi/conftest.py @@ -84,16 +84,16 @@ def authenticated_api_client(user) -> APIClient: # storage fixtures are copied from django-s3-file-field test fixtures -def base_s3boto3_storage_factory(bucket_name: str) -> 'S3Storage': +def base_s3_storage_factory(bucket_name: str) -> 'S3Storage': return create_s3_storage(bucket_name) -def s3boto3_storage_factory(): - return base_s3boto3_storage_factory(settings.DANDI_DANDISETS_BUCKET_NAME) +def s3_storage_factory(): + return base_s3_storage_factory(settings.DANDI_DANDISETS_BUCKET_NAME) -def embargoed_s3boto3_storage_factory(): - return base_s3boto3_storage_factory(settings.DANDI_DANDISETS_EMBARGO_BUCKET_NAME) +def embargoed_s3_storage_factory(): + return base_s3_storage_factory(settings.DANDI_DANDISETS_EMBARGO_BUCKET_NAME) def base_minio_storage_factory(bucket_name: str) -> MinioStorage: @@ -109,13 +109,13 @@ def embargoed_minio_storage_factory() -> MinioStorage: @pytest.fixture -def s3boto3_storage() -> 'S3Storage': - return s3boto3_storage_factory() +def s3_storage() -> 'S3Storage': + return s3_storage_factory() @pytest.fixture -def embargoed_s3boto3_storage() -> 'S3Storage': - return s3boto3_storage_factory() +def embargoed_s3_storage() -> 'S3Storage': + return s3_storage_factory() @pytest.fixture @@ -128,10 +128,10 @@ def embargoed_minio_storage() -> MinioStorage: return minio_storage_factory() -@pytest.fixture(params=[s3boto3_storage_factory, minio_storage_factory], ids=['s3boto3', 'minio']) +@pytest.fixture(params=[s3_storage_factory, minio_storage_factory], ids=['s3', 'minio']) def storage(request, settings) -> Storage: storage_factory = request.param - if storage_factory == s3boto3_storage_factory: + if storage_factory == s3_storage_factory: settings.DEFAULT_FILE_STORAGE = 'storages.backends.s3.S3Storage' settings.AWS_S3_ACCESS_KEY_ID = settings.MINIO_STORAGE_ACCESS_KEY settings.AWS_S3_SECRET_ACCESS_KEY = settings.MINIO_STORAGE_SECRET_KEY @@ -153,8 +153,8 @@ def storage(request, settings) -> Storage: @pytest.fixture( - params=[embargoed_s3boto3_storage_factory, embargoed_minio_storage_factory], - ids=['s3boto3', 'minio'], + params=[embargoed_s3_storage_factory, embargoed_minio_storage_factory], + ids=['s3', 'minio'], ) def embargoed_storage(request) -> Storage: storage_factory = request.param @@ -163,10 +163,10 @@ def embargoed_storage(request) -> Storage: @pytest.fixture( params=[ - (s3boto3_storage_factory, embargoed_s3boto3_storage_factory), + (s3_storage_factory, embargoed_s3_storage_factory), (minio_storage_factory, embargoed_minio_storage_factory), ], - ids=['s3boto3', 'minio'], + ids=['s3', 'minio'], ) def storage_tuple(request) -> tuple[Storage, Storage]: storage_factory, embargoed_storage_factory = request.param diff --git a/setup.py b/setup.py index a0f136df0..947e513e4 100644 --- a/setup.py +++ b/setup.py @@ -62,17 +62,12 @@ 'zarr-checksum>=0.2.8', # Production-only 'django-composed-configuration[prod]>=0.23.0', - # pin directly to a version since we're extending the private multipart interface - 'django-s3-file-field[boto3]==0.3.2', + 'django-s3-file-field[s3]>=1.0.0', 'django-storages[s3]>=1.14.2', 'gunicorn', # Development-only, but required - # TODO: starting with v0.5.0, django-minio-storage requires v7 - # of the minio-py library. minio-py 7 introduces several - # breaking changes to the API, and django-s3-file-field is also - # incompatible with it since it has minio<7 as a dependency. - # Until these issues are resolved, we pin it to an older version. - 'django-minio-storage<0.5.0', + 'django-minio-storage', + 'minio>7', 'tqdm', ], extras_require={