GreaseLM/preprocess_utils/convert_obqa.py at main · DurgaMurali/GreaseLM

45 lines (36 loc) · 1.74 KB

import json
from tqdm import tqdm
__all__ = ['convert_to_obqa_statement']
# String used to indicate a blank
BLANK_STR = "___"
def convert_to_obqa_statement(qa_file: str, output_file1: str, output_file2: str):
    print(f'converting {qa_file} to entailment dataset...')
    nrow = sum(1 for _ in open(qa_file, 'r'))
    with open(output_file1, 'w') as output_handle1, open(output_file2, 'w') as output_handle2, open(qa_file, 'r') as qa_handle:
        # print("Writing to {} from {}".format(output_file, qa_file))
        for line in tqdm(qa_handle, total=nrow):
            json_line = json.loads(line)
            output_dict = convert_qajson_to_entailment(json_line)
            output_handle1.write(json.dumps(output_dict))
            output_handle1.write("\n")
            output_handle2.write(json.dumps(output_dict))
            output_handle2.write("\n")
    print(f'converted statements saved to {output_file1}, {output_file2}')
    print()
# Convert the QA file json to output dictionary containing premise and hypothesis
def convert_qajson_to_entailment(qa_json: dict):
    question_text = qa_json["question"]["stem"]
    choices = qa_json["question"]["choices"]
    for choice in choices:
        choice_text = choice["text"]
        statement = question_text + ' ' + choice_text
        create_output_dict(qa_json, statement, choice["label"] == qa_json.get("answerKey", "A"))
    return qa_json
# Create the output json dictionary from the input json, premise and hypothesis statement
def create_output_dict(input_json: dict, statement: str, label: bool) -> dict:
    if "statements" not in input_json:
        input_json["statements"] = []
    input_json["statements"].append({"label": label, "statement": statement})
    return input_json

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

convert_obqa.py

Latest commit

History

convert_obqa.py

File metadata and controls