Skip to content

Commit 8583611

Browse files
author
Xin Pan
committed
Add example code of binary/text data conversion.
1 parent a6d7a7b commit 8583611

File tree

2 files changed

+67
-0
lines changed

2 files changed

+67
-0
lines changed

textsum/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ for example vocabulary format. In <b>How To Run</b> below, users can use toy
2727
data and vocab provided in the data/ directory to run the training by replacing
2828
the data directory flag.
2929

30+
data_convert_example.py contains example of convert between binary and text.
31+
3032

3133
<b>Experiment Result</b>
3234

textsum/data_convert_example.py

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""Example of Converting TextSum model data.
2+
Usage:
3+
python data_convert_example.py --command binary_to_text --in_file data/data --out_file data/text_data
4+
python data_convert_example.py --command text_to_binary --in_file data/text_data --out_file data/binary_data
5+
python data_convert_example.py --command binary_to_text --in_file data/binary_data --out_file data/text_data2
6+
diff data/text_data2 data/text_data
7+
"""
8+
9+
import struct
10+
import sys
11+
12+
import tensorflow as tf
13+
from tensorflow.core.example import example_pb2
14+
15+
FLAGS = tf.app.flags.FLAGS
16+
tf.app.flags.DEFINE_string('command', 'binary_to_text',
17+
'Either binary_to_text or text_to_binary.'
18+
'Specify FLAGS.in_file accordingly.')
19+
tf.app.flags.DEFINE_string('in_file', '', 'path to file')
20+
tf.app.flags.DEFINE_string('out_file', '', 'path to file')
21+
22+
def _binary_to_text():
23+
reader = open(FLAGS.in_file, 'rb')
24+
writer = open(FLAGS.out_file, 'w')
25+
while True:
26+
len_bytes = reader.read(8)
27+
if not len_bytes:
28+
sys.stderr.write('Done reading\n')
29+
return
30+
str_len = struct.unpack('q', len_bytes)[0]
31+
tf_example_str = struct.unpack('%ds' % str_len, reader.read(str_len))[0]
32+
tf_example = example_pb2.Example.FromString(tf_example_str)
33+
examples = []
34+
for key in tf_example.features.feature:
35+
examples.append('%s=%s' % (key, tf_example.features.feature[key].bytes_list.value[0]))
36+
writer.write('%s\n' % '\t'.join(examples))
37+
reader.close()
38+
writer.close()
39+
40+
41+
def _text_to_binary():
42+
inputs = open(FLAGS.in_file, 'r').readlines()
43+
writer = open(FLAGS.out_file, 'wb')
44+
for inp in inputs:
45+
tf_example = example_pb2.Example()
46+
for feature in inp.strip().split('\t'):
47+
(k, v) = feature.split('=')
48+
tf_example.features.feature[k].bytes_list.value.extend([v])
49+
tf_example_str = tf_example.SerializeToString()
50+
str_len = len(tf_example_str)
51+
writer.write(struct.pack('q', str_len))
52+
writer.write(struct.pack('%ds' % str_len, tf_example_str))
53+
writer.close()
54+
55+
56+
def main(unused_argv):
57+
assert FLAGS.command and FLAGS.in_file and FLAGS.out_file
58+
if FLAGS.command == 'binary_to_text':
59+
_binary_to_text()
60+
elif FLAGS.command == 'text_to_binary':
61+
_text_to_binary()
62+
63+
64+
if __name__ == '__main__':
65+
tf.app.run()

0 commit comments

Comments
 (0)