Skip to content

Commit

Permalink
Use QuerySet.iterator to reduce memory usage
Browse files Browse the repository at this point in the history
  • Loading branch information
jjnesbitt committed Oct 15, 2024
1 parent ed4c2d0 commit 9a6b4f4
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions dandiapi/api/management/commands/extract_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging
from pathlib import Path
from typing import TYPE_CHECKING

from dandi.dandiapi import RemoteReadableAsset
from dandi.metadata.nwb import nwb2asset
Expand All @@ -15,6 +16,9 @@
from dandiapi.api.models import Asset, Dandiset, Version
from dandiapi.api.services.asset import change_asset

if TYPE_CHECKING:
from django.db.models import QuerySet

logger = logging.getLogger(__name__)


Expand All @@ -37,8 +41,12 @@ def get_asset_digest(asset: Asset) -> Digest:


def extract_asset_metadata(asset: Asset, draft_version: Version):
# Test
s3_url = asset.s3_url.replace(
'http://localhost:9000/dandi-dandisets', 'https://dandiarchive.s3.amazonaws.com'
)
readable_asset = RemoteReadableAsset(
asset.s3_url, size=asset.size, mtime=asset.modified, name=Path(asset.path).name
s3_url, size=asset.size, mtime=asset.modified, name=Path(asset.path).name
)

if not asset.path.lower().endswith('.nwb'):
Expand Down Expand Up @@ -77,7 +85,7 @@ def extract_asset_metadata(asset: Asset, draft_version: Version):

def extract_dandiset_assets(dandiset: Dandiset):
# Only update NWB assets which are out of date and do not belong to a published version
assets = dandiset.draft_version.assets.filter(
assets: QuerySet[Asset] = dandiset.draft_version.assets.filter(
published=False,
path__iendswith='.nwb',
metadata__schemaVersion__lt=get_schema_version(),
Expand All @@ -86,7 +94,7 @@ def extract_dandiset_assets(dandiset: Dandiset):
logger.info('No old draft NWB assets found in dandiset %s. Skipping...', dandiset)
return

for asset in tqdm(assets):
for asset in tqdm(assets.iterator(), total=assets.count()):
extract_asset_metadata(asset=asset, draft_version=dandiset.draft_version)


Expand All @@ -101,7 +109,7 @@ def asset(asset_id: str):
)

# Re-extract for every draft version
for version in draft_versions:
for version in draft_versions.iterator():
extract_asset_metadata(asset=asset, draft_version=version)


Expand All @@ -116,6 +124,6 @@ def dandiset(dandiset_id: str):

@group.command(name='all', help='Re-extracts the metadata of all assets in all draft versions')
def all_dandisets():
for dandiset in Dandiset.objects.all():
for dandiset in Dandiset.objects.iterator():
logger.info('DANDISET: %s', dandiset.identifier)
extract_dandiset_assets(dandiset)

0 comments on commit 9a6b4f4

Please sign in to comment.