Add support for dynamic num citation/fulltext reviewers (#110)

* feat: Make num reviewers dynamic in db * feat: Add db event to handle dynamic num reviewers * feat: Add db migration for dynamic num reviewers * feat: Add v2 review schemas for api * feat: Add api logic for changing num reviewers * tests: Add tests for dynamic reviewers * fix: Make num reviewers v1 hack more seamless * feat: Support num reviewers in bulk screening post * feat: Make reviews look like v1 for api endpoints * feat: Make reviews look like v1 for api endpoints * feat: Add to-do filter studies by num revs * docs: Add to-do re: num reviewers filtering
datakind · May 11, 2024 · 3db3604 · 3db3604
1 parent eab7872
commit 3db3604
Show file tree

Hide file tree

Showing 10 changed files with 310 additions and 57 deletions.
diff --git a/TODO.md b/TODO.md
@@ -7,4 +7,5 @@
 - [ ] Improve and extend NLP functionality, just across the board
 - [ ] Enable https everywhere (via [let's encrypt](https://letsencrypt.org/)?)
 - [ ] Add a "deduplicate" button to front-end interface and only run dedupe jobs upon request
-- [ ] Allow for requiring multiple screeners on a configurable percentage of studies (for "rapid review" style projects)
+- [x] Allow for requiring multiple screeners on a configurable percentage of studies (for "rapid review" style projects)
+- [ ] Add filtering studies by number of citation/fulltext reviewers
diff --git a/colandr/apis/resources/citation_screenings.py b/colandr/apis/resources/citation_screenings.py
@@ -1,3 +1,5 @@
+import random
+
 import flask_jwt_extended as jwtext
 import sqlalchemy as sa
 from flask import current_app
@@ -453,28 +455,25 @@ def post(self, args, review_id, user_id):
             "inserted %s citation screenings", len(screenings_to_insert)
         )
         # bulk update citation statuses
-        num_screeners = review.num_citation_screening_reviewers
-        study_ids = sorted(s["study_id"] for s in screenings_to_insert)
-        # results = db.session.query(models.Screening)\
-        #     .filter(models.Screening.study_id.in_(study_ids))
-        # studies_to_update = [
-        #     {'id': cid, 'citation_status': assign_status(list(scrns), num_screeners)}
-        #     for cid, scrns in itertools.groupby(results, attrgetter('citation_id'))
-        #     ]
-        with db.engine.connect() as connection:
-            query = """
-                SELECT study_id, ARRAY_AGG(status)
-                FROM screenings
-                WHERE study_id IN ({study_ids})
-                GROUP BY study_id
-                ORDER BY study_id
-                """.format(study_ids=",".join(str(cid) for cid in study_ids))
-            results = connection.execute(sa.text(query))
+        study_ids: list[int] = sorted(s["study_id"] for s in screenings_to_insert)
+        study_num_citation_reviewers: list[int] = random.choices(
+            [num_pct["num"] for num_pct in review.citation_reviewer_num_pcts],
+            weights=[num_pct["pct"] for num_pct in review.citation_reviewer_num_pcts],
+            k=len(study_ids),
+        )
+        results = db.session.execute(
+            sa.select(
+                models.Screening.study_id, sa.func.array_agg(models.Screening.status)
+            )
+            .where(models.Screening.stage == "citation")
+            .where(models.Screening.study_id == sa.any_(study_ids))
+            .group_by(models.Screening.study_id)
+            .order_by(models.Screening.study_id)
+        )
         studies_to_update = [
-            {"id": row[0], "citation_status": assign_status(row[1], num_screeners)}
-            for row in results
+            {"id": row[0], "citation_status": assign_status(row[1], num_reviewers)}
+            for row, num_reviewers in zip(results, study_num_citation_reviewers)
         ]
-
         db.session.execute(sa.update(models.Study), studies_to_update)
         db.session.commit()
         current_app.logger.info(
@@ -484,7 +483,6 @@ def post(self, args, review_id, user_id):
         status_counts_stmt = (
             sa.select(models.Study.citation_status, db.func.count(1))
             .filter_by(review_id=review_id, dedupe_status="not_duplicate")
-            # .filter(models.Study.citation_status.in_(["included", "excluded"]))
             .filter(models.Study.citation_status == sa.any_(["included", "excluded"]))
             .group_by(models.Study.citation_status)
         )

diff --git a/colandr/apis/resources/fulltext_screenings.py b/colandr/apis/resources/fulltext_screenings.py
@@ -1,3 +1,5 @@
+import random
+
 import flask_jwt_extended as jwtext
 import sqlalchemy as sa
 from flask import current_app
@@ -452,28 +454,25 @@ def post(self, args, review_id, user_id):
             "inserted %s fulltext screenings", len(screenings_to_insert)
         )
         # bulk update fulltext statuses
-        num_screeners = review.num_fulltext_screening_reviewers
-        study_ids = sorted(s["study_id"] for s in screenings_to_insert)
-        # results = db.session.query(models.Screening)\
-        #     .filter(models.Screening.study_id.in_(study_ids))
-        # studies_to_update = [
-        #     {'id': cid, 'fulltext_status': assign_status(list(scrns), num_screeners)}
-        #     for cid, scrns in itertools.groupby(results, attrgetter('fulltext_id'))
-        #     ]
-        with db.engine.connect() as connection:
-            query = """
-                SELECT study_id, ARRAY_AGG(status)
-                FROM screenings
-                WHERE study_id IN ({study_ids})
-                GROUP BY study_id
-                ORDER BY study_id
-                """.format(study_ids=",".join(str(cid) for cid in study_ids))
-            results = connection.execute(sa.text(query))
+        study_ids: list[int] = sorted(s["study_id"] for s in screenings_to_insert)
+        study_num_fulltext_reviewers: list[int] = random.choices(
+            [num_pct["num"] for num_pct in review.fulltext_reviewer_num_pcts],
+            weights=[num_pct["pct"] for num_pct in review.fulltext_reviewer_num_pcts],
+            k=len(study_ids),
+        )
+        results = db.session.execute(
+            sa.select(
+                models.Screening.study_id, sa.func.array_agg(models.Screening.status)
+            )
+            .where(models.Screening.stage == "fulltext")
+            .where(models.Screening.study_id == sa.any_(study_ids))
+            .group_by(models.Screening.study_id)
+            .order_by(models.Screening.study_id)
+        )
         studies_to_update = [
-            {"id": row[0], "fulltext_status": assign_status(row[1], num_screeners)}
-            for row in results
+            {"id": row[0], "fulltext_status": assign_status(row[1], num_reviewers)}
+            for row, num_reviewers in zip(results, study_num_fulltext_reviewers)
         ]
-
         db.session.execute(sa.update(models.Study), studies_to_update)
         db.session.commit()
         current_app.logger.info(
@@ -500,7 +499,6 @@ def post(self, args, review_id, user_id):
         status_counts_stmt = (
             sa.select(models.Study.fulltext_status, db.func.count(1))
             .filter_by(review_id=review_id, dedupe_status="not_duplicate")
-            # .filter(models.Study.fulltext_status.in_(["included", "excluded"]))
             .filter(models.Study.fulltext_status == sa.any_(["included", "excluded"]))
             .group_by(models.Study.fulltext_status)
         )

diff --git a/colandr/apis/resources/reviews.py b/colandr/apis/resources/reviews.py
@@ -15,7 +15,7 @@
 from ...extensions import db
 from ...lib import constants
 from ..errors import forbidden_error, not_found_error
-from ..schemas import ReviewSchema
+from ..schemas import ReviewSchema, ReviewV2Schema
 from ..swagger import review_model
 
 
@@ -69,7 +69,8 @@ def get(self, id, fields):
         if fields and "id" not in fields:
             fields.append("id")
         current_app.logger.debug("got %s", review)
-        return ReviewSchema(only=fields).dump(review)
+        # return ReviewSchema(only=fields).dump(review)
+        return _convert_review_v2_into_v1(review, fields)
 
     @ns.doc(
         responses={
@@ -138,11 +139,17 @@ def put(self, args, id):
         for key, value in args.items():
             if key is missing:
                 continue
+            # HACK: allow setting old attributes, but convert them into new equivalents
+            elif key == "num_citation_screening_reviewers":
+                review.citation_reviewer_num_pcts = [{"num": value, "pct": 100}]
+            elif key == "num_fulltext_screening_reviewers":
+                review.fulltext_reviewer_num_pcts = [{"num": value, "pct": 100}]
             else:
                 setattr(review, key, value)
         db.session.commit()
         current_app.logger.info("modified %s", review)
-        return ReviewSchema().dump(review)
+        # return ReviewSchema().dump(review)
+        return _convert_review_v2_into_v1(review)
 
 
 @ns.route("")
@@ -200,7 +207,8 @@ def get(self, fields, _review_ids):
             reviews = current_user.reviews
         if fields and "id" not in fields:
             fields.append("id")
-        return ReviewSchema(only=fields, many=True).dump(reviews)
+        # return ReviewSchema(only=fields, many=True).dump(reviews)
+        return [_convert_review_v2_into_v1(review) for review in reviews]
 
     @ns.doc(
         expect=(review_model, "review data to be created"),
@@ -230,7 +238,8 @@ def post(self, args):
                 os.makedirs(dirname, exist_ok=True)
             except OSError:
                 pass  # TODO: fix this / the entire system for saving files to disk
-        return ReviewSchema().dump(review)
+        # return ReviewSchema().dump(review)
+        return _convert_review_v2_into_v1(review)
 
 
 def _is_allowed(
@@ -251,3 +260,17 @@ def _is_allowed(
         )
 
     return is_allowed
+
+
+def _convert_review_v2_into_v1(review, fields=None) -> dict:
+    record = ReviewV2Schema(only=fields).dump(review)
+    assert isinstance(record, dict)
+    if record.get("citation_reviewer_num_pcts"):
+        record["num_citation_screening_reviewers"] = record.pop(
+            "citation_reviewer_num_pcts"
+        )[0]["num"]
+    if record.get("fulltext_reviewer_num_pcts"):
+        record["num_fulltext_screening_reviewers"] = record.pop(
+            "fulltext_reviewer_num_pcts"
+        )[0]["num"]
+    return record
diff --git a/colandr/apis/resources/studies.py b/colandr/apis/resources/studies.py
@@ -158,6 +158,10 @@ def put(self, args, id):
         return StudySchema().dump(study)
 
 
+# TODO: add optional filter for num citation/fulltext reviewers
+# and maybe, finally, port these queries over to sqlalchemy orm
+
+
 @ns.route("")
 @ns.doc(
     summary="get collections of matching studies",

diff --git a/colandr/apis/schemas.py b/colandr/apis/schemas.py
@@ -28,15 +28,31 @@ class DataSourceSchema(Schema):
     source_type_and_name = fields.Str(dump_only=True)
 
 
+class ReviewerNumPct(Schema):
+    num = fields.Int(required=True, validate=Range(min=1, max=3))
+    pct = fields.Int(required=True, validate=Range(min=0, max=100))
+
+
 class ReviewSchema(Schema):
     id = fields.Int(dump_only=True)
     created_at = fields.DateTime(dump_only=True, format="iso")
     updated_at = fields.DateTime(dump_only=True, format="iso")
     name = fields.Str(required=True, validate=Length(max=500))
     description = fields.Str(load_default=None)
     status = fields.Str(validate=OneOf(constants.REVIEW_STATUSES))
-    num_citation_screening_reviewers = fields.Int(validate=Range(min=1, max=2))
-    num_fulltext_screening_reviewers = fields.Int(validate=Range(min=1, max=2))
+    num_citation_screening_reviewers = fields.Int(validate=Range(min=1, max=3))
+    num_fulltext_screening_reviewers = fields.Int(validate=Range(min=1, max=3))
+
+
+class ReviewV2Schema(Schema):
+    id = fields.Int(dump_only=True)
+    created_at = fields.DateTime(dump_only=True, format="iso")
+    updated_at = fields.DateTime(dump_only=True, format="iso")
+    name = fields.Str(required=True, validate=Length(max=500))
+    description = fields.Str(load_default=None)
+    status = fields.Str(validate=OneOf(constants.REVIEW_STATUSES))
+    citation_reviewer_num_pcts = fields.List(fields.Nested(ReviewerNumPct))
+    fulltext_reviewer_num_pcts = fields.List(fields.Nested(ReviewerNumPct))
 
 
 class ReviewPlanPICO(Schema):

diff --git a/colandr/apis/swagger.py b/colandr/apis/swagger.py
@@ -34,11 +34,34 @@
     },
 )
 
+reviewer_num_pct_model = ns.model(
+    "ReviewerNumPct",
+    {"num": fields.Integer(min=1, max=3), "pct": fields.Integer(min=0, max=100)},
+)
+
 review_model = ns.model(
     "Review",
     {
         "name": fields.String(required=True, max_length=500),
         "description": fields.String,
+        "status": fields.String,
+        "num_citation_screening_reviewers": fields.Integer(min=1, max=3),
+        "num_fulltext_screening_reviewers": fields.Integer(min=1, max=3),
+    },
+)
+
+review_v2_model = ns.model(
+    "ReviewV2",
+    {
+        "name": fields.String(required=True, max_length=500),
+        "description": fields.String,
+        "status": fields.String,
+        "citation_reviewer_num_pcts": fields.List(
+            fields.Nested(reviewer_num_pct_model)
+        ),
+        "fulltext_reviewer_num_pcts": fields.List(
+            fields.Nested(reviewer_num_pct_model)
+        ),
     },
 )
 
@@ -113,7 +136,7 @@
         "data_extraction_form": fields.List(
             fields.Nested(data_extraction_form_item_model)
         ),
-    }
+    },
     # 'suggested_keyterms': fields.Nested(review_plan_suggested_keyterms)}  # not user-set
 )