增加腾讯词向量封装

2025-12-06 01:16:47 +00:00 · 2020-08-25 17:25:03 +08:00
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+gensim
+flask
+flasgger
--- a/src/tenxun.py
+++ b/src/tenxun.py
@@ -0,0 +1,32 @@
+from gensim.models import KeyedVectors
+from flask import Flask, request, jsonify, send_file
+from flasgger import Swagger
+import os
+import json
+import time
+
+time1 = time.time()
+app = Flask(__name__)
+app.config['JSON_AS_ASCII'] = False
+app.config['JSONIFY_MIMETYPE'] = "application/json;charset=utf-8"
+Swagger(app)
+file = 'Tencent_AILab_ChineseEmbedding.txt'
+wv_from_text = KeyedVectors.load_word2vec_format(file, binary=False)  # 加载时间比较长
+wv_from_text.init_sims(replace=True)
+print("加载时间为:" + str(time.time() - time1))
+
+
+@app.route('/api/tengxun', methods=['post'])
+def to_kg():
+    data = request.data.decode('utf-8')
+    word = json.loads(data)['word']
+    if word in wv_from_text.wv.vocab.keys():
+        vec = wv_from_text[word]
+        return json.dumps({"result": wv_from_text.most_similar(positive=[vec], topn=20)})
+    else:
+        return json.dumps({"result": "没找到"})
+
+
+if __name__ == '__main__':
+    app.run('0.0.0.0', port=8020)
+~