Skip to content

Commit

Permalink
Add support for dynamic num citation/fulltext reviewers (#110)
Browse files Browse the repository at this point in the history
* feat: Make num reviewers dynamic in db

* feat: Add db event to handle dynamic num reviewers

* feat: Add db migration for dynamic num reviewers

* feat: Add v2 review schemas for api

* feat: Add api logic for changing num reviewers

* tests: Add tests for dynamic reviewers

* fix: Make num reviewers v1 hack more seamless

* feat: Support num reviewers in bulk screening post

* feat: Make reviews look like v1 for api endpoints

* feat: Make reviews look like v1 for api endpoints

* feat: Add to-do filter studies by num revs

* docs: Add to-do re: num reviewers filtering
  • Loading branch information
bdewilde committed May 11, 2024
1 parent eab7872 commit 3db3604
Show file tree
Hide file tree
Showing 10 changed files with 310 additions and 57 deletions.
3 changes: 2 additions & 1 deletion TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@
- [ ] Improve and extend NLP functionality, just across the board
- [ ] Enable https everywhere (via [let's encrypt](https://letsencrypt.org/)?)
- [ ] Add a "deduplicate" button to front-end interface and only run dedupe jobs upon request
- [ ] Allow for requiring multiple screeners on a configurable percentage of studies (for "rapid review" style projects)
- [x] Allow for requiring multiple screeners on a configurable percentage of studies (for "rapid review" style projects)
- [ ] Add filtering studies by number of citation/fulltext reviewers
40 changes: 19 additions & 21 deletions colandr/apis/resources/citation_screenings.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import random

import flask_jwt_extended as jwtext
import sqlalchemy as sa
from flask import current_app
Expand Down Expand Up @@ -453,28 +455,25 @@ def post(self, args, review_id, user_id):
"inserted %s citation screenings", len(screenings_to_insert)
)
# bulk update citation statuses
num_screeners = review.num_citation_screening_reviewers
study_ids = sorted(s["study_id"] for s in screenings_to_insert)
# results = db.session.query(models.Screening)\
# .filter(models.Screening.study_id.in_(study_ids))
# studies_to_update = [
# {'id': cid, 'citation_status': assign_status(list(scrns), num_screeners)}
# for cid, scrns in itertools.groupby(results, attrgetter('citation_id'))
# ]
with db.engine.connect() as connection:
query = """
SELECT study_id, ARRAY_AGG(status)
FROM screenings
WHERE study_id IN ({study_ids})
GROUP BY study_id
ORDER BY study_id
""".format(study_ids=",".join(str(cid) for cid in study_ids))
results = connection.execute(sa.text(query))
study_ids: list[int] = sorted(s["study_id"] for s in screenings_to_insert)
study_num_citation_reviewers: list[int] = random.choices(
[num_pct["num"] for num_pct in review.citation_reviewer_num_pcts],
weights=[num_pct["pct"] for num_pct in review.citation_reviewer_num_pcts],
k=len(study_ids),
)
results = db.session.execute(
sa.select(
models.Screening.study_id, sa.func.array_agg(models.Screening.status)
)
.where(models.Screening.stage == "citation")
.where(models.Screening.study_id == sa.any_(study_ids))
.group_by(models.Screening.study_id)
.order_by(models.Screening.study_id)
)
studies_to_update = [
{"id": row[0], "citation_status": assign_status(row[1], num_screeners)}
for row in results
{"id": row[0], "citation_status": assign_status(row[1], num_reviewers)}
for row, num_reviewers in zip(results, study_num_citation_reviewers)
]

db.session.execute(sa.update(models.Study), studies_to_update)
db.session.commit()
current_app.logger.info(
Expand All @@ -484,7 +483,6 @@ def post(self, args, review_id, user_id):
status_counts_stmt = (
sa.select(models.Study.citation_status, db.func.count(1))
.filter_by(review_id=review_id, dedupe_status="not_duplicate")
# .filter(models.Study.citation_status.in_(["included", "excluded"]))
.filter(models.Study.citation_status == sa.any_(["included", "excluded"]))
.group_by(models.Study.citation_status)
)
Expand Down
40 changes: 19 additions & 21 deletions colandr/apis/resources/fulltext_screenings.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import random

import flask_jwt_extended as jwtext
import sqlalchemy as sa
from flask import current_app
Expand Down Expand Up @@ -452,28 +454,25 @@ def post(self, args, review_id, user_id):
"inserted %s fulltext screenings", len(screenings_to_insert)
)
# bulk update fulltext statuses
num_screeners = review.num_fulltext_screening_reviewers
study_ids = sorted(s["study_id"] for s in screenings_to_insert)
# results = db.session.query(models.Screening)\
# .filter(models.Screening.study_id.in_(study_ids))
# studies_to_update = [
# {'id': cid, 'fulltext_status': assign_status(list(scrns), num_screeners)}
# for cid, scrns in itertools.groupby(results, attrgetter('fulltext_id'))
# ]
with db.engine.connect() as connection:
query = """
SELECT study_id, ARRAY_AGG(status)
FROM screenings
WHERE study_id IN ({study_ids})
GROUP BY study_id
ORDER BY study_id
""".format(study_ids=",".join(str(cid) for cid in study_ids))
results = connection.execute(sa.text(query))
study_ids: list[int] = sorted(s["study_id"] for s in screenings_to_insert)
study_num_fulltext_reviewers: list[int] = random.choices(
[num_pct["num"] for num_pct in review.fulltext_reviewer_num_pcts],
weights=[num_pct["pct"] for num_pct in review.fulltext_reviewer_num_pcts],
k=len(study_ids),
)
results = db.session.execute(
sa.select(
models.Screening.study_id, sa.func.array_agg(models.Screening.status)
)
.where(models.Screening.stage == "fulltext")
.where(models.Screening.study_id == sa.any_(study_ids))
.group_by(models.Screening.study_id)
.order_by(models.Screening.study_id)
)
studies_to_update = [
{"id": row[0], "fulltext_status": assign_status(row[1], num_screeners)}
for row in results
{"id": row[0], "fulltext_status": assign_status(row[1], num_reviewers)}
for row, num_reviewers in zip(results, study_num_fulltext_reviewers)
]

db.session.execute(sa.update(models.Study), studies_to_update)
db.session.commit()
current_app.logger.info(
Expand All @@ -500,7 +499,6 @@ def post(self, args, review_id, user_id):
status_counts_stmt = (
sa.select(models.Study.fulltext_status, db.func.count(1))
.filter_by(review_id=review_id, dedupe_status="not_duplicate")
# .filter(models.Study.fulltext_status.in_(["included", "excluded"]))
.filter(models.Study.fulltext_status == sa.any_(["included", "excluded"]))
.group_by(models.Study.fulltext_status)
)
Expand Down
33 changes: 28 additions & 5 deletions colandr/apis/resources/reviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ...extensions import db
from ...lib import constants
from ..errors import forbidden_error, not_found_error
from ..schemas import ReviewSchema
from ..schemas import ReviewSchema, ReviewV2Schema
from ..swagger import review_model


Expand Down Expand Up @@ -69,7 +69,8 @@ def get(self, id, fields):
if fields and "id" not in fields:
fields.append("id")
current_app.logger.debug("got %s", review)
return ReviewSchema(only=fields).dump(review)
# return ReviewSchema(only=fields).dump(review)
return _convert_review_v2_into_v1(review, fields)

@ns.doc(
responses={
Expand Down Expand Up @@ -138,11 +139,17 @@ def put(self, args, id):
for key, value in args.items():
if key is missing:
continue
# HACK: allow setting old attributes, but convert them into new equivalents
elif key == "num_citation_screening_reviewers":
review.citation_reviewer_num_pcts = [{"num": value, "pct": 100}]
elif key == "num_fulltext_screening_reviewers":
review.fulltext_reviewer_num_pcts = [{"num": value, "pct": 100}]
else:
setattr(review, key, value)
db.session.commit()
current_app.logger.info("modified %s", review)
return ReviewSchema().dump(review)
# return ReviewSchema().dump(review)
return _convert_review_v2_into_v1(review)


@ns.route("")
Expand Down Expand Up @@ -200,7 +207,8 @@ def get(self, fields, _review_ids):
reviews = current_user.reviews
if fields and "id" not in fields:
fields.append("id")
return ReviewSchema(only=fields, many=True).dump(reviews)
# return ReviewSchema(only=fields, many=True).dump(reviews)
return [_convert_review_v2_into_v1(review) for review in reviews]

@ns.doc(
expect=(review_model, "review data to be created"),
Expand Down Expand Up @@ -230,7 +238,8 @@ def post(self, args):
os.makedirs(dirname, exist_ok=True)
except OSError:
pass # TODO: fix this / the entire system for saving files to disk
return ReviewSchema().dump(review)
# return ReviewSchema().dump(review)
return _convert_review_v2_into_v1(review)


def _is_allowed(
Expand All @@ -251,3 +260,17 @@ def _is_allowed(
)

return is_allowed


def _convert_review_v2_into_v1(review, fields=None) -> dict:
record = ReviewV2Schema(only=fields).dump(review)
assert isinstance(record, dict)
if record.get("citation_reviewer_num_pcts"):
record["num_citation_screening_reviewers"] = record.pop(
"citation_reviewer_num_pcts"
)[0]["num"]
if record.get("fulltext_reviewer_num_pcts"):
record["num_fulltext_screening_reviewers"] = record.pop(
"fulltext_reviewer_num_pcts"
)[0]["num"]
return record
4 changes: 4 additions & 0 deletions colandr/apis/resources/studies.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ def put(self, args, id):
return StudySchema().dump(study)


# TODO: add optional filter for num citation/fulltext reviewers
# and maybe, finally, port these queries over to sqlalchemy orm


@ns.route("")
@ns.doc(
summary="get collections of matching studies",
Expand Down
20 changes: 18 additions & 2 deletions colandr/apis/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,31 @@ class DataSourceSchema(Schema):
source_type_and_name = fields.Str(dump_only=True)


class ReviewerNumPct(Schema):
num = fields.Int(required=True, validate=Range(min=1, max=3))
pct = fields.Int(required=True, validate=Range(min=0, max=100))


class ReviewSchema(Schema):
id = fields.Int(dump_only=True)
created_at = fields.DateTime(dump_only=True, format="iso")
updated_at = fields.DateTime(dump_only=True, format="iso")
name = fields.Str(required=True, validate=Length(max=500))
description = fields.Str(load_default=None)
status = fields.Str(validate=OneOf(constants.REVIEW_STATUSES))
num_citation_screening_reviewers = fields.Int(validate=Range(min=1, max=2))
num_fulltext_screening_reviewers = fields.Int(validate=Range(min=1, max=2))
num_citation_screening_reviewers = fields.Int(validate=Range(min=1, max=3))
num_fulltext_screening_reviewers = fields.Int(validate=Range(min=1, max=3))


class ReviewV2Schema(Schema):
id = fields.Int(dump_only=True)
created_at = fields.DateTime(dump_only=True, format="iso")
updated_at = fields.DateTime(dump_only=True, format="iso")
name = fields.Str(required=True, validate=Length(max=500))
description = fields.Str(load_default=None)
status = fields.Str(validate=OneOf(constants.REVIEW_STATUSES))
citation_reviewer_num_pcts = fields.List(fields.Nested(ReviewerNumPct))
fulltext_reviewer_num_pcts = fields.List(fields.Nested(ReviewerNumPct))


class ReviewPlanPICO(Schema):
Expand Down
25 changes: 24 additions & 1 deletion colandr/apis/swagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,34 @@
},
)

reviewer_num_pct_model = ns.model(
"ReviewerNumPct",
{"num": fields.Integer(min=1, max=3), "pct": fields.Integer(min=0, max=100)},
)

review_model = ns.model(
"Review",
{
"name": fields.String(required=True, max_length=500),
"description": fields.String,
"status": fields.String,
"num_citation_screening_reviewers": fields.Integer(min=1, max=3),
"num_fulltext_screening_reviewers": fields.Integer(min=1, max=3),
},
)

review_v2_model = ns.model(
"ReviewV2",
{
"name": fields.String(required=True, max_length=500),
"description": fields.String,
"status": fields.String,
"citation_reviewer_num_pcts": fields.List(
fields.Nested(reviewer_num_pct_model)
),
"fulltext_reviewer_num_pcts": fields.List(
fields.Nested(reviewer_num_pct_model)
),
},
)

Expand Down Expand Up @@ -113,7 +136,7 @@
"data_extraction_form": fields.List(
fields.Nested(data_extraction_form_item_model)
),
}
},
# 'suggested_keyterms': fields.Nested(review_plan_suggested_keyterms)} # not user-set
)

Expand Down
Loading

0 comments on commit 3db3604

Please sign in to comment.