diff --git a/.gitignore b/.gitignore index 0d7d1ca..85e22db 100644 --- a/.gitignore +++ b/.gitignore @@ -13,5 +13,6 @@ lora-** wandb todo.txt .vscode/ +*tmp* .DS_Store -.idea \ No newline at end of file +.idea diff --git a/README.md b/README.md index b2f2c30..49bcd03 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,8 @@ lora-llama-med/ bash ./scripts/infer.sh ``` +也可参考`./scripts/test.sh` + ### 数据集构建 我们采用了公开和自建的中文医学知识库,主要参考了[cMeKG](https://github.com/king-yyf/CMeKG_tools)。 医学知识库围绕疾病、药物、检查指标等构建,字段包括并发症,高危因素,组织学检查,临床症状,药物治疗,辅助治疗等。知识库示例如下: diff --git a/infer.py b/infer.py index 89bd5fe..0f2ff21 100644 --- a/infer.py +++ b/infer.py @@ -31,7 +31,7 @@ def main( instruct_dir: str = "", use_lora: bool = True, lora_weights: str = "tloen/alpaca-lora-7b", - # The prompt template to use, will default to alpaca. + # The prompt template to use, will default to med_template. prompt_template: str = "med_template", ): prompter = Prompter(prompt_template) @@ -111,9 +111,10 @@ def main( infer_from_json(instruct_dir) else: for instruction in [ - "一位50岁女性出现不适、厌油腻、肝囊肿等症状,检查后发现为胆囊癌,并且病情十分严重,应该如何进行治疗?", + "我感冒了,怎么治疗", "一个患有肝衰竭综合征的病人,除了常见的临床表现外,还有哪些特殊的体征?", "急性阑尾炎和缺血性心脏病的多发群体有何不同?", + "小李最近出现了心动过速的症状,伴有轻度胸痛。体检发现P-R间期延长,伴有T波低平和ST段异常", ]: print("Instruction:", instruction) print("Response:", evaluate(instruction)) diff --git a/scripts/test.sh b/scripts/test.sh new file mode 100644 index 0000000..c7747c9 --- /dev/null +++ b/scripts/test.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +# If inferring with the llama model, set 'use_lora' to 'False' and 'prompt_template' to 'ori_template'. +# If inferring with the default alpaca model, set 'use_lora' to 'True', 'lora_weights' to 'tloen/alpaca-lora-7b', and 'prompt_template' to 'alpaca'. +# If inferring with the llama-med model, download the LORA weights and set 'lora_weights' to './lora-llama-med' (or the exact directory of LORA weights) and 'prompt_template' to 'med_template'. + +BASE_MODEL="decapoda-research/llama-7b-hf" +# 原始llama +o_cmd="python infer.py \ + --base_model ${BASE_MODEL} \ + --use_lora False \ + --prompt_template 'ori_template'" + +# Alpaca +a_cmd="python infer.py \ + --base_model ${BASE_MODEL} \ + --use_lora True \ + --lora_weights "tloen/alpaca-lora-7b" \ + --prompt_template 'alpaca'" + +# llama-med +m_cmd="python infer.py \ + --base_model ${BASE_MODEL} \ + --use_lora True \ + --lora_weights "lora-llama-med" \ + --prompt_template 'med_template'" + +echo "ori" +eval $o_cmd > infer_result/o_tmp.txt +echo "alpaca" +eval $a_cmd > infer_result/a_tmp.txt +echo "med" +eval $m_cmd > infer_result/m_tmp.txt \ No newline at end of file