|
| 1 | +"""Example of Converting TextSum model data. |
| 2 | +Usage: |
| 3 | +python data_convert_example.py --command binary_to_text --in_file data/data --out_file data/text_data |
| 4 | +python data_convert_example.py --command text_to_binary --in_file data/text_data --out_file data/binary_data |
| 5 | +python data_convert_example.py --command binary_to_text --in_file data/binary_data --out_file data/text_data2 |
| 6 | +diff data/text_data2 data/text_data |
| 7 | +""" |
| 8 | + |
| 9 | +import struct |
| 10 | +import sys |
| 11 | + |
| 12 | +import tensorflow as tf |
| 13 | +from tensorflow.core.example import example_pb2 |
| 14 | + |
| 15 | +FLAGS = tf.app.flags.FLAGS |
| 16 | +tf.app.flags.DEFINE_string('command', 'binary_to_text', |
| 17 | + 'Either binary_to_text or text_to_binary.' |
| 18 | + 'Specify FLAGS.in_file accordingly.') |
| 19 | +tf.app.flags.DEFINE_string('in_file', '', 'path to file') |
| 20 | +tf.app.flags.DEFINE_string('out_file', '', 'path to file') |
| 21 | + |
| 22 | +def _binary_to_text(): |
| 23 | + reader = open(FLAGS.in_file, 'rb') |
| 24 | + writer = open(FLAGS.out_file, 'w') |
| 25 | + while True: |
| 26 | + len_bytes = reader.read(8) |
| 27 | + if not len_bytes: |
| 28 | + sys.stderr.write('Done reading\n') |
| 29 | + return |
| 30 | + str_len = struct.unpack('q', len_bytes)[0] |
| 31 | + tf_example_str = struct.unpack('%ds' % str_len, reader.read(str_len))[0] |
| 32 | + tf_example = example_pb2.Example.FromString(tf_example_str) |
| 33 | + examples = [] |
| 34 | + for key in tf_example.features.feature: |
| 35 | + examples.append('%s=%s' % (key, tf_example.features.feature[key].bytes_list.value[0])) |
| 36 | + writer.write('%s\n' % '\t'.join(examples)) |
| 37 | + reader.close() |
| 38 | + writer.close() |
| 39 | + |
| 40 | + |
| 41 | +def _text_to_binary(): |
| 42 | + inputs = open(FLAGS.in_file, 'r').readlines() |
| 43 | + writer = open(FLAGS.out_file, 'wb') |
| 44 | + for inp in inputs: |
| 45 | + tf_example = example_pb2.Example() |
| 46 | + for feature in inp.strip().split('\t'): |
| 47 | + (k, v) = feature.split('=') |
| 48 | + tf_example.features.feature[k].bytes_list.value.extend([v]) |
| 49 | + tf_example_str = tf_example.SerializeToString() |
| 50 | + str_len = len(tf_example_str) |
| 51 | + writer.write(struct.pack('q', str_len)) |
| 52 | + writer.write(struct.pack('%ds' % str_len, tf_example_str)) |
| 53 | + writer.close() |
| 54 | + |
| 55 | + |
| 56 | +def main(unused_argv): |
| 57 | + assert FLAGS.command and FLAGS.in_file and FLAGS.out_file |
| 58 | + if FLAGS.command == 'binary_to_text': |
| 59 | + _binary_to_text() |
| 60 | + elif FLAGS.command == 'text_to_binary': |
| 61 | + _text_to_binary() |
| 62 | + |
| 63 | + |
| 64 | +if __name__ == '__main__': |
| 65 | + tf.app.run() |
0 commit comments