Source code for source.vecs2nps

"""
Script to create `vecs.npy` and `vecs.vocab` from files with the following format:
<row_num> <dim>
<word_1> <vector_1>
...
<word_n> <vector_n>
"""

import numpy as np
import argh


[docs]def main(input_file, output_file): fh = open(input_file, 'r', errors='replace') # input file TODO: try better encoding foutname = output_file # output file path first = fh.readline() size = list(map(int, first.strip().split())) wvecs = np.zeros((size[0], size[1]), float) vocab = [] for i in range(size[0]): ln = fh.readline() line = ln.strip().split() vocab.append(line[0]) wvecs[i, ] = np.array(list(map(float, line[1:]))) np.save(foutname + ".npy", wvecs) with open(foutname + ".vocab", "w") as outf: outf.write(" ".join(vocab))
if __name__ == '__main__': argh.dispatch_command(main)