Dependencies:
pip3 install annoy
Download from:
https://nlp.stanford.edu/projects/glove/
Code:
from annoy import AnnoyIndex
file = "/Users/gary/Downloads/glove.6B/glove.6B.50d.txt"
content = []
with open(file) as f:
content = f.readlines()
content = [x.split(" ") for x in content]
t = AnnoyIndex(50)
idx = 0
terms = [i[0] for i in content]
for i in content:
vec = [float(a) for a in i[1:]]
print(len(vec))
t.add_item(idx, vec)
idx = idx + 1
t.build(10) # 10 trees
t.save('test.ann')
Lookup:
u = AnnoyIndex(50)
u.load('test.ann')
near = u.get_nns_by_item(0, 10) # nearest 10 terms
nearWords = [terms[i] for i in near]
print(nearWords)