55from preprocess_utils .conceptnet import extract_english , construct_graph
66from preprocess_utils .grounding import create_matcher_patterns , ground
77from preprocess_utils .graph import generate_adj_data_from_grounded_concepts__use_LM
8-
8+ from preprocess_utils . tagging import tag
99input_paths = {
1010 'csqa' : {
1111 'train' : './data/csqa/train_rand_split.jsonl' ,
4646 'adj-dev' : './data/csqa/graph/dev.graph.adj.pk' ,
4747 'adj-test' : './data/csqa/graph/test.graph.adj.pk' ,
4848 },
49+ 'tagged' :{
50+ 'train' : './data/csqa/tagged/train.tagged.jsonl' ,
51+ 'dev' : './data/csqa/tagged/dev.tagged.jsonl' ,
52+ 'test' : './data/csqa/tagged/test.tagged.jsonl' ,
53+ },
4954 },
5055 'obqa' : {
5156 'statement' : {
6166 'dev' : './data/obqa/grounded/dev.grounded.jsonl' ,
6267 'test' : './data/obqa/grounded/test.grounded.jsonl' ,
6368 },
69+ 'tagged' :{
70+ 'train' : './data/obqa/tagged/train.tagged.jsonl' ,
71+ 'dev' : './data/obqa/tagged/dev.tagged.jsonl' ,
72+ 'test' : './data/obqa/tagged/test.tagged.jsonl' ,
73+ },
6474 'graph' : {
6575 'adj-train' : './data/obqa/graph/train.graph.adj.pk' ,
6676 'adj-dev' : './data/obqa/graph/dev.graph.adj.pk' ,
@@ -81,42 +91,54 @@ def main():
8191 raise NotImplementedError ()
8292
8393 routines = {
84- 'common' : [
85- {'func' : extract_english , 'args' : (input_paths ['cpnet' ]['csv' ], output_paths ['cpnet' ]['csv' ], output_paths ['cpnet' ]['vocab' ])},
86- {'func' : construct_graph , 'args' : (output_paths ['cpnet' ]['csv' ], output_paths ['cpnet' ]['vocab' ],
87- output_paths ['cpnet' ]['unpruned-graph' ], False )},
88- {'func' : construct_graph , 'args' : (output_paths ['cpnet' ]['csv' ], output_paths ['cpnet' ]['vocab' ],
89- output_paths ['cpnet' ]['pruned-graph' ], True )},
90- {'func' : create_matcher_patterns , 'args' : (output_paths ['cpnet' ]['vocab' ], output_paths ['cpnet' ]['patterns' ])},
91- ],
94+ # 'common': [
95+ # {'func': extract_english, 'args': (input_paths['cpnet']['csv'], output_paths['cpnet']['csv'], output_paths['cpnet']['vocab'])},
96+ # {'func': construct_graph, 'args': (output_paths['cpnet']['csv'], output_paths['cpnet']['vocab'],
97+ # output_paths['cpnet']['unpruned-graph'], False)},
98+ # {'func': construct_graph, 'args': (output_paths['cpnet']['csv'], output_paths['cpnet']['vocab'],
99+ # output_paths['cpnet']['pruned-graph'], True)},
100+ # {'func': create_matcher_patterns, 'args': (output_paths['cpnet']['vocab'], output_paths['cpnet']['patterns'])},
101+ # ],
92102 'csqa' : [
93- {'func' : convert_to_entailment , 'args' : (input_paths ['csqa' ]['train' ], output_paths ['csqa' ]['statement' ]['train' ])},
94- {'func' : convert_to_entailment , 'args' : (input_paths ['csqa' ]['dev' ], output_paths ['csqa' ]['statement' ]['dev' ])},
95- {'func' : convert_to_entailment , 'args' : (input_paths ['csqa' ]['test' ], output_paths ['csqa' ]['statement' ]['test' ])},
96- {'func' : ground , 'args' : (output_paths ['csqa' ]['statement' ]['train' ], output_paths ['cpnet' ]['vocab' ],
97- output_paths ['cpnet' ]['patterns' ], output_paths ['csqa' ]['grounded' ]['train' ], args .nprocs )},
98- {'func' : ground , 'args' : (output_paths ['csqa' ]['statement' ]['dev' ], output_paths ['cpnet' ]['vocab' ],
99- output_paths ['cpnet' ]['patterns' ], output_paths ['csqa' ]['grounded' ]['dev' ], args .nprocs )},
100- {'func' : ground , 'args' : (output_paths ['csqa' ]['statement' ]['test' ], output_paths ['cpnet' ]['vocab' ],
101- output_paths ['cpnet' ]['patterns' ], output_paths ['csqa' ]['grounded' ]['test' ], args .nprocs )},
102- {'func' : generate_adj_data_from_grounded_concepts__use_LM , 'args' : (output_paths ['csqa' ]['grounded' ]['train' ], output_paths ['cpnet' ]['pruned-graph' ], output_paths ['cpnet' ]['vocab' ], output_paths ['csqa' ]['graph' ]['adj-train' ], args .nprocs )},
103- {'func' : generate_adj_data_from_grounded_concepts__use_LM , 'args' : (output_paths ['csqa' ]['grounded' ]['dev' ], output_paths ['cpnet' ]['pruned-graph' ], output_paths ['cpnet' ]['vocab' ], output_paths ['csqa' ]['graph' ]['adj-dev' ], args .nprocs )},
104- {'func' : generate_adj_data_from_grounded_concepts__use_LM , 'args' : (output_paths ['csqa' ]['grounded' ]['test' ], output_paths ['cpnet' ]['pruned-graph' ], output_paths ['cpnet' ]['vocab' ], output_paths ['csqa' ]['graph' ]['adj-test' ], args .nprocs )},
103+ # {'func': convert_to_entailment, 'args': (input_paths['csqa']['train'], output_paths['csqa']['statement']['train'])},
104+ # {'func': convert_to_entailment, 'args': (input_paths['csqa']['dev'], output_paths['csqa']['statement']['dev'])},
105+ # {'func': convert_to_entailment, 'args': (input_paths['csqa']['test'], output_paths['csqa']['statement']['test'])},
106+ # {'func': ground, 'args': (output_paths['csqa']['statement']['train'], output_paths['cpnet']['vocab'],
107+ # output_paths['cpnet']['patterns'], output_paths['csqa']['grounded']['train'], args.nprocs)},
108+ # {'func': ground, 'args': (output_paths['csqa']['statement']['dev'], output_paths['cpnet']['vocab'],
109+ # output_paths['cpnet']['patterns'], output_paths['csqa']['grounded']['dev'], args.nprocs)},
110+ # {'func': ground, 'args': (output_paths['csqa']['statement']['test'], output_paths['cpnet']['vocab'],
111+ # output_paths['cpnet']['patterns'], output_paths['csqa']['grounded']['test'], args.nprocs)},
112+ {'func' : tag , 'args' : (output_paths ['csqa' ]['statement' ]['train' ], output_paths ['cpnet' ]['vocab' ],
113+ output_paths ['cpnet' ]['patterns' ], output_paths ['csqa' ]['tagged' ]['train' ], args .nprocs )},
114+ {'func' : tag , 'args' : (output_paths ['csqa' ]['statement' ]['dev' ], output_paths ['cpnet' ]['vocab' ],
115+ output_paths ['cpnet' ]['patterns' ], output_paths ['csqa' ]['tagged' ]['dev' ], args .nprocs )},
116+ {'func' : tag , 'args' : (output_paths ['csqa' ]['statement' ]['test' ], output_paths ['cpnet' ]['vocab' ],
117+ output_paths ['cpnet' ]['patterns' ], output_paths ['csqa' ]['tagged' ]['test' ], args .nprocs )},
118+ # {'func': generate_adj_data_from_grounded_concepts__use_LM, 'args': (output_paths['csqa']['grounded']['train'], output_paths['cpnet']['pruned-graph'], output_paths['cpnet']['vocab'], output_paths['csqa']['graph']['adj-train'], args.nprocs)},
119+ # {'func': generate_adj_data_from_grounded_concepts__use_LM, 'args': (output_paths['csqa']['grounded']['dev'], output_paths['cpnet']['pruned-graph'], output_paths['cpnet']['vocab'], output_paths['csqa']['graph']['adj-dev'], args.nprocs)},
120+ # {'func': generate_adj_data_from_grounded_concepts__use_LM, 'args': (output_paths['csqa']['grounded']['test'], output_paths['cpnet']['pruned-graph'], output_paths['cpnet']['vocab'], output_paths['csqa']['graph']['adj-test'], args.nprocs)},
105121 ],
106122
107123 'obqa' : [
108- {'func' : convert_to_obqa_statement , 'args' : (input_paths ['obqa' ]['train' ], output_paths ['obqa' ]['statement' ]['train' ], output_paths ['obqa' ]['statement' ]['train-fairseq' ])},
109- {'func' : convert_to_obqa_statement , 'args' : (input_paths ['obqa' ]['dev' ], output_paths ['obqa' ]['statement' ]['dev' ], output_paths ['obqa' ]['statement' ]['dev-fairseq' ])},
110- {'func' : convert_to_obqa_statement , 'args' : (input_paths ['obqa' ]['test' ], output_paths ['obqa' ]['statement' ]['test' ], output_paths ['obqa' ]['statement' ]['test-fairseq' ])},
111- {'func' : ground , 'args' : (output_paths ['obqa' ]['statement' ]['train' ], output_paths ['cpnet' ]['vocab' ],
112- output_paths ['cpnet' ]['patterns' ], output_paths ['obqa' ]['grounded' ]['train' ], args .nprocs )},
113- {'func' : ground , 'args' : (output_paths ['obqa' ]['statement' ]['dev' ], output_paths ['cpnet' ]['vocab' ],
114- output_paths ['cpnet' ]['patterns' ], output_paths ['obqa' ]['grounded' ]['dev' ], args .nprocs )},
115- {'func' : ground , 'args' : (output_paths ['obqa' ]['statement' ]['test' ], output_paths ['cpnet' ]['vocab' ],
116- output_paths ['cpnet' ]['patterns' ], output_paths ['obqa' ]['grounded' ]['test' ], args .nprocs )},
117- {'func' : generate_adj_data_from_grounded_concepts__use_LM , 'args' : (output_paths ['obqa' ]['grounded' ]['train' ], output_paths ['cpnet' ]['pruned-graph' ], output_paths ['cpnet' ]['vocab' ], output_paths ['obqa' ]['graph' ]['adj-train' ], args .nprocs )},
118- {'func' : generate_adj_data_from_grounded_concepts__use_LM , 'args' : (output_paths ['obqa' ]['grounded' ]['dev' ], output_paths ['cpnet' ]['pruned-graph' ], output_paths ['cpnet' ]['vocab' ], output_paths ['obqa' ]['graph' ]['adj-dev' ], args .nprocs )},
119- {'func' : generate_adj_data_from_grounded_concepts__use_LM , 'args' : (output_paths ['obqa' ]['grounded' ]['test' ], output_paths ['cpnet' ]['pruned-graph' ], output_paths ['cpnet' ]['vocab' ], output_paths ['obqa' ]['graph' ]['adj-test' ], args .nprocs )},
124+ # {'func': convert_to_obqa_statement, 'args': (input_paths['obqa']['train'], output_paths['obqa']['statement']['train'], output_paths['obqa']['statement']['train-fairseq'])},
125+ # {'func': convert_to_obqa_statement, 'args': (input_paths['obqa']['dev'], output_paths['obqa']['statement']['dev'], output_paths['obqa']['statement']['dev-fairseq'])},
126+ # {'func': convert_to_obqa_statement, 'args': (input_paths['obqa']['test'], output_paths['obqa']['statement']['test'], output_paths['obqa']['statement']['test-fairseq'])},
127+ # {'func': ground, 'args': (output_paths['obqa']['statement']['train'], output_paths['cpnet']['vocab'],
128+ # output_paths['cpnet']['patterns'], output_paths['obqa']['grounded']['train'], args.nprocs)},
129+ # {'func': ground, 'args': (output_paths['obqa']['statement']['dev'], output_paths['cpnet']['vocab'],
130+ # output_paths['cpnet']['patterns'], output_paths['obqa']['grounded']['dev'], args.nprocs)},
131+ # {'func': ground, 'args': (output_paths['obqa']['statement']['test'], output_paths['cpnet']['vocab'],
132+ # output_paths['cpnet']['patterns'], output_paths['obqa']['grounded']['test'], args.nprocs)},
133+ {'func' : tag , 'args' : (output_paths ['obqa' ]['statement' ]['train' ], output_paths ['cpnet' ]['vocab' ],
134+ output_paths ['cpnet' ]['patterns' ], output_paths ['obqa' ]['tagged' ]['train' ], args .nprocs )},
135+ {'func' : tag , 'args' : (output_paths ['obqa' ]['statement' ]['dev' ], output_paths ['cpnet' ]['vocab' ],
136+ output_paths ['cpnet' ]['patterns' ], output_paths ['obqa' ]['tagged' ]['dev' ], args .nprocs )},
137+ {'func' : tag , 'args' : (output_paths ['obqa' ]['statement' ]['test' ], output_paths ['cpnet' ]['vocab' ],
138+ output_paths ['cpnet' ]['patterns' ], output_paths ['obqa' ]['tagged' ]['test' ], args .nprocs )},
139+ # {'func': generate_adj_data_from_grounded_concepts__use_LM, 'args': (output_paths['obqa']['grounded']['train'], output_paths['cpnet']['pruned-graph'], output_paths['cpnet']['vocab'], output_paths['obqa']['graph']['adj-train'], args.nprocs)},
140+ # {'func': generate_adj_data_from_grounded_concepts__use_LM, 'args': (output_paths['obqa']['grounded']['dev'], output_paths['cpnet']['pruned-graph'], output_paths['cpnet']['vocab'], output_paths['obqa']['graph']['adj-dev'], args.nprocs)},
141+ # {'func': generate_adj_data_from_grounded_concepts__use_LM, 'args': (output_paths['obqa']['grounded']['test'], output_paths['cpnet']['pruned-graph'], output_paths['cpnet']['vocab'], output_paths['obqa']['graph']['adj-test'], args.nprocs)},
120142 ],
121143 }
122144
0 commit comments