增加腾讯词向量封装

2025-12-07 09:56:51 +00:00 · 2020-08-25 17:25:03 +08:00
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,3 @@
 gensim
 flask
 flasgger
--- a/src/tenxun.py
+++ b/src/tenxun.py
@@ -0,0 +1,32 @@
 from gensim.models import KeyedVectors
 from flask import Flask, request, jsonify, send_file
 from flasgger import Swagger
 import os
 import json
 import time
 time1 = time.time()
 app = Flask(__name__)
 app.config['JSON_AS_ASCII'] = False
 app.config['JSONIFY_MIMETYPE'] = "application/json;charset=utf-8"
 Swagger(app)
 file = 'Tencent_AILab_ChineseEmbedding.txt'
 wv_from_text = KeyedVectors.load_word2vec_format(file, binary=False)  # 加载时间比较长
 wv_from_text.init_sims(replace=True)
 print("加载时间为:" + str(time.time() - time1))
@app.route('/api/tengxun', methods=['post'])
 def to_kg():
    data = request.data.decode('utf-8')
    word = json.loads(data)['word']
    if word in wv_from_text.wv.vocab.keys():
        vec = wv_from_text[word]
        return json.dumps({"result": wv_from_text.most_similar(positive=[vec], topn=20)})
    else:
        return json.dumps({"result": "没找到"})
 if __name__ == '__main__':
    app.run('0.0.0.0', port=8020)
 ~