mlcommons · aristizabal95 · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024
@@ -10,6 +10,8 @@
 from medperf.commands.benchmark.associate import AssociateBenchmark
 from medperf.commands.result.create import BenchmarkExecution
 
+from medperf.utils import parse_institutions_file
+
 app = typer.Typer()
 
 
@@ -56,6 +58,9 @@ def submit(
     evaluator_mlcube: int = typer.Option(
         ..., "--evaluator-mlcube", "-e", help="Evaluator MLCube UID"
     ),
+    institutions_file: str = typer.Option(
+        None, "--institutions", "-i", help="CSV file containing the institution and email per expected participant."
+    ),
     skip_data_preparation_step: bool = typer.Option(
         False,
         "--skip-demo-data-preparation",
@@ -68,6 +73,7 @@ def submit(
     ),
 ):
     """Submits a new benchmark to the platform"""
+    institutions = parse_institutions_file(institutions_file)
     benchmark_info = {
         "name": name,
         "description": description,
@@ -77,6 +83,7 @@ def submit(
         "data_preparation_mlcube": data_preparation_mlcube,
         "reference_model_mlcube": reference_model_mlcube,
         "data_evaluator_mlcube": evaluator_mlcube,
+        "institutions": institutions,
         "state": "OPERATION" if operational else "DEVELOPMENT",
     }
     SubmitBenchmark.run(

@@ -28,6 +28,7 @@ class Benchmark(Entity, ApprovableSchema, DeployableSchema):
     data_evaluator_mlcube: int
     metadata: dict = {}
     user_metadata: dict = {}
+    institutions: dict = {}
     is_active: bool = True
 
     @staticmethod

@@ -2,6 +2,7 @@
 
 import re
 import os
+import csv
 import signal
 import yaml
 import random
@@ -21,7 +22,7 @@
 from pexpect.exceptions import TIMEOUT
 from git import Repo, GitCommandError
 import medperf.config as config
-from medperf.exceptions import ExecutionError, MedperfException
+from medperf.exceptions import ExecutionError, MedperfException, InvalidArgumentError
 
 
 def get_file_hash(path: str) -> str:
@@ -469,6 +470,27 @@ def check_for_updates() -> None:
         logging.debug(e)
 
 
+def parse_institutions_file(institutions_file):
+    institutions = {}
+    if institutions_file is None:
+        return institutions
+
+    with open(institutions_file, 'r') as f:
+        reader = csv.DictReader(f)
+        fieldnames = set(reader.fieldnames)
+        exp_fieldnames = {'institution', 'email'}
+        if len(exp_fieldnames - fieldnames):
+            raise InvalidArgumentError(
+                'Institutions file must contain an "institution" and "email" columns'
+            )
+        for row in reader:
+            email = row['email']
+            institution = row['institution']
+            institutions[email] = institution
+
+    return institutions
+
+
 class spawn_and_kill:
     def __init__(self, cmd, timeout=None, *args, **kwargs):
         self.cmd = cmd

@@ -0,0 +1,18 @@
+# Generated by Django 3.2.20 on 2024-10-29 19:26
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('benchmark', '0002_alter_benchmark_demo_dataset_tarball_url'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='benchmark',
+            name='institutions',
+            field=models.JSONField(blank=True, default=dict, null=True),
+        ),
+    ]
@@ -39,6 +39,7 @@ class Benchmark(models.Model):
         related_name="data_evaluator_mlcube",
     )
     metadata = models.JSONField(default=dict, blank=True, null=True)
+    institutions = models.JSONField(default=dict, blank=True, null=True)
     state = models.CharField(
         choices=BENCHMARK_STATE, max_length=100, default="DEVELOPMENT"
     )

@@ -9,6 +9,16 @@ class Meta:
         fields = "__all__"
         read_only_fields = ["owner", "approved_at", "approval_status"]
 
+    def to_representation(self, instance):
+        representation = super().to_representation(instance)
+        request = self.context.get('request')
+
+        # Remove institutions field if user is not the owner
+        if request and request.user != instance.owner:
+            representation.pop("institutions", None)
+
+        return representation
+
     def validate(self, data):
         owner = self.context["request"].user
         pending_benchmarks = Benchmark.objects.filter(

@@ -132,7 +132,7 @@ def get(self, request, pk, format=None):
         Retrieve a benchmark instance.
         """
         benchmark = self.get_object(pk)
-        serializer = BenchmarkSerializer(benchmark)
+        serializer = BenchmarkSerializer(benchmark, context={'request': request})
         return Response(serializer.data)
 
     def put(self, request, pk, format=None):

@@ -1,5 +1,6 @@
 from rest_framework import serializers
 from django.utils import timezone
+from django.contrib.auth import get_user_model
 from benchmark.models import Benchmark
 from dataset.models import Dataset
 
@@ -75,10 +76,15 @@ def create(self, validated_data):
         if approval_status != "PENDING":
             validated_data["approved_at"] = timezone.now()
         else:
-            if (
-                validated_data["dataset"].owner.id
-                == validated_data["benchmark"].owner.id
-            ):
+            dset_owner = validated_data["dataset"].owner.id
+            bmk_owner = validated_data["benchmark"].owner.id
+            expected_emails = validated_data["benchmark"].institutions.keys()
+            User = get_user_model()
+            dset_user = User.objects.get(id=dset_owner)
+
+            is_same_owner = dset_owner == bmk_owner
+            is_expected_participant = dset_user.email in expected_emails
+            if is_same_owner or is_expected_participant:
                 validated_data["approval_status"] = "APPROVED"
                 validated_data["approved_at"] = timezone.now()
         return BenchmarkDataset.objects.create(**validated_data)