# Run this is TWSI2_complete/lexsub_task cat twsi2.xml \ | sed 's///]]>/g' \ | sed 's/<\/head>/<\/head>/]]><\/context>/g' >twsi2_clean.xml # for(s <- gold.gold.items.flatMap(_.substitutionWords).distinct if s.contains(";"); # t <- """\w+;""".r.findAllIn(s)) # println("""| sed 's/%s/%s/g' \""".format(t, t.replaceAll(";", """\\\\;"""))) cat twsi2.gold \ | sed 's/1;2;3;4/1\\;2\\;3\\;4/g' \ | sed 's/5;280/5\\;280/g' \ | sed 's/brochure;/brochure\\;/g' \ | sed 's/past;/past\\;/g' \ | sed 's/single;/single\\;/g' \ | sed 's/double;/double\\;/g' \ | sed 's/triple;/triple\\;/g' \ | sed 's/single;/single\\;/g' \ | sed 's/double;/double\\;/g' \ | sed 's/speech;/speech\\;/g' \ | sed 's/flat;/flat\\;/g' \ | sed 's/thin;/thin\\;/g' \ | sed 's/bureau;/bureau\\;/g' \ | sed 's/noun;/noun\\;/g' \ | sed 's/feeling;/feeling\\;/g' \ | sed 's/spirit;/spirit\\;/g' \ | sed 's/shape;/shape\\;/g' \ | sed 's/sophomore;/sophomore\\;/g' \ | sed 's/noun;/noun\\;/g' \ | sed 's/usage;/usage\\;/g' \ | sed 's/adj;/adj\\;/g' \ | sed 's/rollercoasters;/rollercoasters\\;/g' \ | sed 's/whirls;/whirls\\;/g' \ | sed 's/adj;/adj\\;/g' \ | sed 's/sort;/sort\\;/g' \ | sed 's/oops;/oops\\;/g' \ | sed 's/speech;/speech\\;/g' \ | sed 's/instantly;/instantly\\;/g' \ | sed 's/immediately;/immediately\\;/g' \ | sed 's/delay;/delay\\;/g' \ | sed 's/site;/site\\;/g' \ | sed 's/here;/here\\;/g' \ | sed 's/rich;/rich\\;/g' \ | sed 's/speech;/speech\\;/g' \ | sed 's/speech;/speech\\;/g' \ | sed 's/speech;/speech\\;/g' \ | sed 's/optical;/optical\\;/g' \ | sed 's/speech;/speech\\;/g' \ | sed 's/appearance;/appearance\\;/g' \ | sed 's/position;/position\\;/g' >twsi2_clean.gold