upload train code

2025-12-06 06:36:49 +00:00 · 2020-02-17 16:17:23 +08:00
--- a/CTAI_model/cv/init.py
+++ b/CTAI_model/cv/init.py
--- a/CTAI_model/cv/get_ROI-all.py
+++ b/CTAI_model/cv/get_ROI-all.py
@@ -0,0 +1,114 @@
+import SimpleITK as sitk
+import cv2
+import numpy as np
+
+from data_set.make import get_train_files
+
+# 跑train不加第二个train
+train_data_path = '../data/train/train/'
+
+
+# train_data_path = '../data/CT/'
+
+
+def get_roi(path):
+    global w
+    file_name = path.split('/')[-3] + '-' + path.split('/')[-1].replace('.dcm', '')
+    image = sitk.ReadImage(path)
+    image = sitk.GetArrayFromImage(image)[0, :, :]
+    image[image < -300] = 0
+    image[image > 300] = 0
+    img_o = image.copy()
+    ROI = np.zeros(image.shape, np.uint8)
+    slices = [image]
+    img = slices[int(len(slices) / 2)].copy()
+    img = np.uint8(img)
+    # kernel = np.ones((3, 3), np.uint8)
+    # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
+    # img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
+    # img = cv2.dilate(img, kernel, iterations=1)
+
+    kernel = np.ones((4, 4), np.uint8)
+    img = cv2.dilate(img, kernel, iterations=1)
+
+    # 对图像进行阈值分割
+    ret, img = cv2.threshold(img, 150, 255, cv2.THRESH_BINARY_INV)
+    # 提取分割结果中的轮廓，并填充孔洞
+    im2, contours, x = cv2.findContours(img.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+    area = []
+    for c in contours:
+        area.append(cv2.contourArea(c))
+    cparea = area.copy()
+
+    area.sort(reverse=True)
+    ROI_tmp = np.zeros(img.shape, np.uint8)
+    # for i in range(1, 10):
+    #     # 选择最有可能的一个区域
+    #     ROI_tmp = np.zeros(img.shape, np.uint8)
+    #     max_idx = cparea.index(area[i])
+    #
+    #     # 强度匹配 因为直接映射到dcm上不用转uint8
+    #     cv2.drawContours(ROI_tmp, contours, max_idx, (255, 255, 255), -1)
+    #
+    #     index = np.nonzero(ROI_tmp)
+    #     mean = np.mean(img_o[index])
+    #     std = np.std(img_o[index])
+    #     # if mean > 90 or mean < 10 or std > 70 or std < 15:
+    #     #     continue
+    #
+    #     s = pd.Series(img_o[index])
+    #     piandu = s.skew()
+    #     fengdu = s.kurt()
+    #     # if piandu > 1 or fengdu < -5 or fengdu > 20:
+    #     #     continue
+    #
+    #     # 生成矩
+    #     M = cv2.moments(contours[max_idx])
+    #
+    #     # 面积周长
+    #     perimeter = cv2.arcLength(contours[max_idx], True)
+    #     # if area[i] > 2000 or area[i] < 500 or perimeter > 250 or perimeter < 80:
+    #     #     continue
+    #     if area[i] > 2000  or perimeter > 400 :
+    #         continue
+    #
+    #     # if area[i] > 4000 or area[i] < 500 or perimeter > 400 :
+    #     #     continue
+    #     #
+    #     # 椭圆拟合
+    #     # try:
+    #     #     (x, y), (MA, ma), angle = cv2.fitEllipse(contours[max_idx])
+    #     #     if ma - MA > 25:
+    #     #         continue
+    #     #     # ellipse.append(ma-MA)
+    #     # except:
+    #     #     continue
+    #
+    #
+    #     # img_o[]
+    #
+    #     # 加矩形框
+    #     x, y, w, h = cv2.boundingRect(contours[max_idx])
+    #     ROI = cv2.rectangle(ROI, (x, y), (x + w + 10, y + h + 10), (255, 255, 255), -1)
+    #
+    #     # 填充
+    #     # cv2.drawContours(ROI, contours, max_idx, (255, 255, 255), -1)
+
+    ROI_tmp[270:430, 200:300] = image[270:430, 200:300]
+
+    cv2.imshow("Image", image)
+    cv2.imshow("Image", ROI_tmp)
+    cv2.waitKey(0)
+    print(f"{train_data_path}ROI-{file_name}.png")
+    # cv2.imwrite(f"{train_data_path}ROI-{file_name}.png", ROI, [int(cv2.IMWRITE_PNG_COMPRESSION), 0])
+
+
+def main():
+    global w
+    dcm_files, _ = get_train_files(train_data_path, file_type='dcm', all=False)
+    for i in dcm_files:
+        get_roi(i)
+
+
+if __name__ == '__main__':
+    main()
--- a/CTAI_model/cv/get_ROI-one.py
+++ b/CTAI_model/cv/get_ROI-one.py
@@ -0,0 +1,68 @@
+import SimpleITK as sitk
+import cv2
+import numpy as np
+
+image = sitk.ReadImage('../data/train/train/10087.dcm')
+image = sitk.GetArrayFromImage(image)[0, :, :]
+
+image[image < -300] = 0
+image[image > 300] = 0
+
+ROI = np.zeros(image.shape, np.uint8)
+# 获取图像中的像素数据
+slices = [image]
+
+# 复制Dicom图像中的像素数据
+img = slices[int(len(slices) / 2)].copy()
+img = np.uint8(img)
+
+kernel = np.ones((4, 4), np.uint8)
+img = cv2.dilate(img, kernel, iterations=1)
+
+# 对图像进行阈值分割
+ret, img = cv2.threshold(img, 150, 255, cv2.THRESH_BINARY_INV)
+
+xxx = img
+# 提取分割结果中的轮廓，并填充孔洞
+im2, contours, x = cv2.findContours(img.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+# 需要反色处理一下  现在找的是白色的  应该是黑色的
+
+# mask = np.zeros(img.shape, np.uint8)
+# for contour in contours:
+#     cv2.fillPoly(mask, [contour], 255)
+# img[(mask > 0)] = 255
+
+
+area = []
+for c in contours:
+    area.append(cv2.contourArea(c))
+cparea = area.copy()
+
+area.sort(reverse=True)
+
+for i in range(3, 8):
+    max_idx = cparea.index(area[i])
+    perimeter = cv2.arcLength(contours[max_idx], True)
+    if area[i] > 5000 or perimeter > 500:
+        continue
+    print('周长', perimeter)
+
+    cv2.drawContours(ROI, contours, max_idx, (220, 20, 60), -1)
+
+# max_idx = cparea.index(area[3])
+# cv2.drawContours(ROI, contours, max_idx, (220, 20, 60), -1)
+# cv2.drawContours(ROI, contours, max_idx, (220, 20, 60), -1)
+
+
+# 对分割结果进行形态学的开操作
+# kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2, 2))
+# img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
+
+# plt.figure(figsize=(10, 7))
+# plt.imshow(img, 'gray')
+# plt.title('Mask')
+# plt.show()
+
+
+cv2.imshow("Image", ROI)
+cv2.waitKey(0)
--- a/CTAI_model/cv/get_all_feature.py
+++ b/CTAI_model/cv/get_all_feature.py
@@ -0,0 +1,232 @@
+import sys
+
+sys.path.append("..")
+from data_set.make import get_person_files
+import cv2
+import numpy as np
+import SimpleITK as sitk
+import pandas as pd
+from numba import jit
+import inspect
+import csv
+
+np.set_printoptions(suppress=True)  # 输出时禁止科学表示法，直接输出小数值
+
+mask_data_path = '../data/all/d2/'
+
+# ID为第一个字段名会会让xlsx报错
+# dict用表头
+column_all = ['ID', '性别', '年龄', '阴性/阳性', 'area', 'perimeter', 'focus_x', 'focus_y', 'ellipse', 'mean', 'std', 'piandu',
+              'fengdu',
+              'small_grads_dominance',
+              'big_grads_dominance', 'gray_asymmetry', 'grads_asymmetry', 'energy', 'gray_mean', 'grads_mean',
+              'gray_variance', 'grads_variance', 'corelation', 'gray_entropy', 'grads_entropy', 'entropy', 'inertia',
+              'differ_moment']
+# 实际表头
+column_all_c = ['ID', '性别', '年龄', '阴性/阳性', '面积', '周长', '重心x', '重心y', '近圆度', '灰度均值', '灰度方差', '灰度偏度',
+                '灰度峰态', '小梯度优势', '大梯度优势', '灰度分布不均匀性', '梯度分布不均匀性', '能量', '灰度平均', '梯度平均',
+                '灰度均方差', '梯度均方差', '相关', '灰度熵', '梯度熵', '混合熵', '惯性', '逆差矩']
+
+features_list = ['area', 'perimeter', 'focus_x', 'focus_y', 'ellipse', 'mean', 'std', 'piandu', 'fengdu',
+                 'small_grads_dominance',
+                 'big_grads_dominance', 'gray_asymmetry', 'grads_asymmetry', 'energy', 'gray_mean', 'grads_mean',
+                 'gray_variance', 'grads_variance', 'corelation', 'gray_entropy', 'grads_entropy', 'entropy', 'inertia',
+                 'differ_moment']
+
+
+# 最后俩偏度 峰度
+
+
+# 获取变量的名
+def get_variable_name(variable):
+    callers_local_vars = inspect.currentframe().f_back.f_locals.items()
+    return [var_name for var_name, var_val in callers_local_vars if var_val is variable]
+
+
+def glcm(img_gray, ngrad=16, ngray=16):
+    '''Gray Level-Gradient Co-occurrence Matrix,取归一化后的灰度值、梯度值分别为16、16'''
+    # 利用sobel算子分别计算x-y方向上的梯度值
+    gsx = cv2.Sobel(img_gray, cv2.CV_64F, 1, 0, ksize=3)
+    gsy = cv2.Sobel(img_gray, cv2.CV_64F, 0, 1, ksize=3)
+    height, width = img_gray.shape
+    grad = (gsx ** 2 + gsy ** 2) ** 0.5  # 计算梯度值
+    grad = np.asarray(1.0 * grad * (ngrad - 1) / grad.max(), dtype=np.int16)
+    gray = np.asarray(1.0 * img_gray * (ngray - 1) / img_gray.max(), dtype=np.int16)  # 0-255变换为0-15
+    gray_grad = np.zeros([ngray, ngrad])  # 灰度梯度共生矩阵
+    for i in range(height):
+        for j in range(width):
+            gray_value = gray[i][j]
+            grad_value = grad[i][j]
+            gray_grad[gray_value][grad_value] += 1
+    gray_grad = 1.0 * gray_grad / (height * width)  # 归一化灰度梯度矩阵，减少计算量
+    get_glcm_features(gray_grad)
+
+
+@jit
+def get_gray_feature():
+    # 灰度特征提取算法
+    hist = cv2.calcHist([image_ROI_uint8[index]], [0], None, [256], [0, 256])
+
+    c_features['mean'].append(np.mean(image_ROI[index]))
+    c_features['std'].append(np.std(image_ROI[index]))
+
+    s = pd.Series(image_ROI[index])
+    c_features['piandu'].append(s.skew())
+    c_features['fengdu'].append(s.kurt())
+
+
+def get_glcm_features(mat):
+    '''根据灰度梯度共生矩阵计算纹理特征量，包括小梯度优势，大梯度优势，灰度分布不均匀性，梯度分布不均匀性，能量，灰度平均，梯度平均，
+    灰度方差，梯度方差，相关，灰度熵，梯度熵，混合熵，惯性，逆差矩'''
+    sum_mat = mat.sum()
+    small_grads_dominance = big_grads_dominance = gray_asymmetry = grads_asymmetry = energy = gray_mean = grads_mean = 0
+    gray_variance = grads_variance = corelation = gray_entropy = grads_entropy = entropy = inertia = differ_moment = 0
+    for i in range(mat.shape[0]):
+        gray_variance_temp = 0
+        for j in range(mat.shape[1]):
+            small_grads_dominance += mat[i][j] / ((j + 1) ** 2)
+            big_grads_dominance += mat[i][j] * j ** 2
+            energy += mat[i][j] ** 2
+            if mat[i].sum() != 0:
+                gray_entropy -= mat[i][j] * np.log(mat[i].sum())
+            if mat[:, j].sum() != 0:
+                grads_entropy -= mat[i][j] * np.log(mat[:, j].sum())
+            if mat[i][j] != 0:
+                entropy -= mat[i][j] * np.log(mat[i][j])
+                inertia += (i - j) ** 2 * np.log(mat[i][j])
+            differ_moment += mat[i][j] / (1 + (i - j) ** 2)
+            gray_variance_temp += mat[i][j] ** 0.5
+
+        gray_asymmetry += mat[i].sum() ** 2
+        gray_mean += i * mat[i].sum() ** 2
+        gray_variance += (i - gray_mean) ** 2 * gray_variance_temp
+    for j in range(mat.shape[1]):
+        grads_variance_temp = 0
+        for i in range(mat.shape[0]):
+            grads_variance_temp += mat[i][j] ** 0.5
+        grads_asymmetry += mat[:, j].sum() ** 2
+        grads_mean += j * mat[:, j].sum() ** 2
+        grads_variance += (j - grads_mean) ** 2 * grads_variance_temp
+    small_grads_dominance /= sum_mat
+    big_grads_dominance /= sum_mat
+    gray_asymmetry /= sum_mat
+    grads_asymmetry /= sum_mat
+    gray_variance = gray_variance ** 0.5
+    grads_variance = grads_variance ** 0.5
+    for i in range(mat.shape[0]):
+        for j in range(mat.shape[1]):
+            corelation += (i - gray_mean) * (j - grads_mean) * mat[i][j]
+    glgcm_features = [small_grads_dominance, big_grads_dominance, gray_asymmetry, grads_asymmetry, energy, gray_mean,
+                      grads_mean,
+                      gray_variance, grads_variance, corelation, gray_entropy, grads_entropy, entropy, inertia,
+                      differ_moment]
+    for i in range(len(glgcm_features)):
+        t = get_variable_name(glgcm_features[i])[0]
+        c_features[t].append(np.round(glgcm_features[i], 4))
+
+
+def get_geometry_feature():
+    # 形态特征  分割mask获得一些特征
+    im2, contours, x = cv2.findContours(mask_array.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+    tarea = []
+    tperimeter = []
+    for c in contours:
+        # 生成矩
+        try:
+            M = cv2.moments(c)
+            cx = int(M['m10'] / M['m00'])
+            cy = int(M['m01'] / M['m00'])
+            c_features['focus_x'].append(cx)
+            c_features['focus_y'].append(cy)
+        except:
+            print('error')
+
+        # 椭圆拟合
+        try:
+            (x, y), (MA, ma), angle = cv2.fitEllipse(c)
+            c_features['ellipse'].append((ma - MA))
+        except:
+            continue
+        # 面积周长
+        tarea.append(cv2.contourArea(c))
+        tperimeter.append(cv2.arcLength(c, True))
+
+    # 将mask里的最大值追加 有黑洞
+    try:
+        c_features['area'].append(max(tarea))
+        c_features['perimeter'].append(round(max(tperimeter), 4))
+    except:
+        print('area error')
+
+
+# 提取肿瘤特征
+def get_feature(image, mask):
+    global w
+    global image_ROI_uint8, index, image_ROI_mini, image_ROI, mask_array
+
+    mask_array = cv2.imread(mask, 0)
+    image = sitk.ReadImage(image)
+    image_arrary = sitk.GetArrayFromImage(image)[0, :, :]
+    # 映射到CT获得特征
+    image_ROI = np.zeros(shape=image_arrary.shape)
+    index = np.nonzero(mask_array)
+    if not index[0].any():
+        # c_features['no'] = True
+        return None
+    image_ROI[index] = image_arrary[index]
+    image_ROI_uint8 = np.uint8(image_ROI)
+    # 获得只有肿瘤的图片
+    x, y, w, h = cv2.boundingRect(mask_array)
+    image_ROI_mini = np.uint8(image_arrary[y:y + h, x:x + w])
+    w = image_ROI_mini
+
+    get_geometry_feature()
+
+    get_gray_feature()
+
+    glcm(image_ROI_mini, 15, 15)
+
+    return c_features
+
+
+def main():
+    global w
+    # 注意下面文件路径 格式
+    csv_file = open('../data/all/res.csv', 'w', encoding='gbk', newline='')
+    writer1 = csv.writer(csv_file)
+    writer1.writerow(column_all_c)
+    writer2 = csv.DictWriter(csv_file, column_all)
+
+    df = pd.read_csv('../data/all/临床数据.csv', encoding='gbk')
+
+    image = get_person_files('../data/all/d2/')
+    mask = get_person_files('../data/out/')
+    # image1 mask2
+
+    for i in range(len(image)):
+        person_id = image[i][0]
+        global c_features
+        c_features = {}
+        for k in features_list:
+            c_features[k] = []
+        if len(image[i][1]) != len(mask[i][2]):
+            print('文件有错')
+        for j in range(len(image[i][1])):
+            get_feature(image[i][1][j], mask[i][2][j])
+
+        for j in c_features:
+            if j == 'id':
+                continue
+            c_features[j] = np.round(np.mean(c_features[j]), 4)
+
+        person_info = df[df['ID'].isin([person_id])].to_dict('index').values()
+        person_info = list(person_info)[0]
+        person_info.update(c_features)
+        writer2.writerow(person_info)
+        print(person_info)
+
+    csv_file.close()
+
+
+if __name__ == '__main__':
+    main()