镜像自地址
https://gitee.com/medical-alliance/Medical-nlp.git
已同步 2025-12-06 01:16:47 +00:00
增加腾讯词向量封装
这个提交包含在:
3
requirements.txt
普通文件
3
requirements.txt
普通文件
@@ -0,0 +1,3 @@
|
||||
gensim
|
||||
flask
|
||||
flasgger
|
||||
32
src/tenxun.py
普通文件
32
src/tenxun.py
普通文件
@@ -0,0 +1,32 @@
|
||||
from gensim.models import KeyedVectors
|
||||
from flask import Flask, request, jsonify, send_file
|
||||
from flasgger import Swagger
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
|
||||
time1 = time.time()
|
||||
app = Flask(__name__)
|
||||
app.config['JSON_AS_ASCII'] = False
|
||||
app.config['JSONIFY_MIMETYPE'] = "application/json;charset=utf-8"
|
||||
Swagger(app)
|
||||
file = 'Tencent_AILab_ChineseEmbedding.txt'
|
||||
wv_from_text = KeyedVectors.load_word2vec_format(file, binary=False) # 加载时间比较长
|
||||
wv_from_text.init_sims(replace=True)
|
||||
print("加载时间为:" + str(time.time() - time1))
|
||||
|
||||
|
||||
@app.route('/api/tengxun', methods=['post'])
|
||||
def to_kg():
|
||||
data = request.data.decode('utf-8')
|
||||
word = json.loads(data)['word']
|
||||
if word in wv_from_text.wv.vocab.keys():
|
||||
vec = wv_from_text[word]
|
||||
return json.dumps({"result": wv_from_text.most_similar(positive=[vec], topn=20)})
|
||||
else:
|
||||
return json.dumps({"result": "没找到"})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run('0.0.0.0', port=8020)
|
||||
~
|
||||
在新工单中引用
屏蔽一个用户