-
Notifications
You must be signed in to change notification settings - Fork 0
/
filter-1tok.awk
38 lines (38 loc) · 1.14 KB
/
filter-1tok.awk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
function strip( thing ) {
sub(" *$","",thing);sub("^ *","",thing);
return thing;
}
function finish( id,doc,question,answer,candidates, A,n,i ) {
answer=strip(answer);
doc=strip(doc);
if (index(answer," ")==0 && length(doc)>0) {
printf "%s\n",id > "accepted.ids.txt"
print id; print doc; print question; print answer;
n=split(candidates,A);
candidates = "";
for (i=1;i<=n;i++) {
A[i]=strip(A[i]);
if (index(A[i]," ")==0) {
if (i==1 || A[i] != A[i-1]) { candidates=candidates FS A[i]; }
}
}
print substr(candidates,length(FS)+1);
return 0;
}
printf "%s\n",id > "rejected.ids.txt"
}
BEGIN { RS=ORS="\n\n";FS=OFS="\n";state="id"; }
{
if (state == "id") {
if (id) {
finish(id,doc,question,answer,candidates);
}
for(i=1;i<=NF;i++) {id=$i; if (id !~ /^[#]/) break;}
state="doc";
}
else if (state == "doc") { doc=$0; state="question"; }
else if (state == "question") { question=$0; state="answer"; }
else if (state == "answer") { answer=$0; state="candidates"; }
else if (state == "candidates") { candidates=$0; state="id"; }
}
END { finish(id,doc,question,answer,candidates); }