KimJyun

최종 코드 업로드

This diff could not be displayed because it is too large.
1 +import pandas as pd
2 +import torch
3 +import numpy as np
4 +from torch.utils.data import Dataset
5 +import os
6 +from PIL import Image
7 +
8 +
9 +class CXRDataset(Dataset):
10 +
11 + def __init__(
12 + self,
13 + path_to_images,
14 + fold,
15 + transform=None,
16 + transform_bb=None,
17 + finding="any",
18 + fine_tune=False,
19 + regression=False,
20 + label_path="/content/gdrive/MyDrive/ColabNotebooks/brixia/labels"):
21 +
22 + self.transform = transform
23 + self.transform_bb = transform_bb
24 + self.path_to_images = path_to_images
25 + if not fine_tune:
26 + self.df = pd.read_csv(label_path + "/nih_original_split.csv")
27 + elif fine_tune and not regression:
28 + self.df = pd.read_csv(label_path + "/brixia_split_classification.csv")
29 + else:
30 + self.df = pd.read_csv(label_path + "/brixia_split_regression.csv")
31 + self.fold = fold
32 + self.fine_tune = fine_tune
33 + self.regression = regression
34 +
35 + if not fold == 'BBox':
36 + self.df = self.df[self.df['fold'] == fold]
37 + else:
38 + bbox_images_df = pd.read_csv(label_path + "/BBox_List_2017.csv")
39 + self.df = pd.merge(left=self.df, right=bbox_images_df, how="inner", on="Image Index")
40 +
41 + if not self.fine_tune:
42 + self.PRED_LABEL = [
43 + 'Atelectasis',
44 + 'Cardiomegaly',
45 + 'Effusion',
46 + 'Infiltration',
47 + 'Mass',
48 + 'Nodule',
49 + 'Pneumonia',
50 + 'Pneumothorax',
51 + 'Consolidation',
52 + 'Edema',
53 + 'Emphysema',
54 + 'Fibrosis',
55 + 'Pleural_Thickening',
56 + 'Hernia']
57 + else:
58 + self.PRED_LABEL = [
59 + 'Detector01',
60 + 'Detector2',
61 + 'Detector3']
62 +
63 + if not finding == "any" and not fine_tune: # can filter for positive findings of the kind described; useful for evaluation
64 + self.df = self.df[self.df['Finding Label'] == finding]
65 + elif not finding == "any" and fine_tune and not regression:
66 + self.df = self.df[self.df[finding] == 1]
67 +
68 + self.df = self.df.set_index("Image Index")
69 +
70 + def __len__(self):
71 + return len(self.df)
72 +
73 + def __getitem__(self, idx):
74 +
75 + image = Image.open(
76 + os.path.join(
77 + self.path_to_images,
78 + self.df.index[idx]))
79 + image = image.convert('RGB')
80 +
81 + if not self.fine_tune:
82 + label = np.zeros(len(self.PRED_LABEL), dtype=int)
83 + for i in range(0, len(self.PRED_LABEL)):
84 + # can leave zero if zero, else make one
85 + if self.df[self.PRED_LABEL[i].strip()].iloc[idx].astype('int') > 0:
86 + label[i] = self.df[self.PRED_LABEL[i].strip()
87 + ].iloc[idx].astype('int')
88 + elif self.fine_tune and not self.regression:
89 + covid_label = np.zeros(len(self.PRED_LABEL), dtype=int)
90 + covid_label[0] = self.df['Detector01'].iloc[idx]
91 + covid_label[1] = self.df['Detector2'].iloc[idx]
92 + covid_label[2] = self.df['Detector3'].iloc[idx]
93 + else:
94 + ground_truth = np.array(self.df['BrixiaScoreGlobal'].iloc[idx].astype('float32'))
95 +
96 + if self.transform:
97 + image = self.transform(image)
98 +
99 + if self.fold == "BBox":
100 + # exctract bounding box coordinates from dataframe, they exist in the the columns specified below
101 + bounding_box = self.df.iloc[idx, -7:-3].to_numpy()
102 +
103 + if self.transform_bb:
104 + transformed_bounding_box = self.transform_bb(bounding_box)
105 +
106 + return image, label, self.df.index[idx], transformed_bounding_box
107 + elif self.fine_tune and not self.regression:
108 + return image, covid_label, self.df.index[idx]
109 + elif self.fine_tune and self.regression:
110 + return image, ground_truth, self.df.index[idx]
111 + else:
112 + return image, label, self.df.index[idx]
113 +
114 + def pos_neg_balance_weights(self):
115 + pos_neg_weights = []
116 +
117 + for i in range(0, len(self.PRED_LABEL)):
118 + num_negatives = self.df[self.df[self.PRED_LABEL[i].strip()] == 0].shape[0]
119 + num_positives = self.df[self.df[self.PRED_LABEL[i].strip()] == 1].shape[0]
120 +
121 + pos_neg_weights.append(num_negatives / num_positives)
122 +
123 + pos_neg_weights = torch.Tensor(pos_neg_weights)
124 + pos_neg_weights = pos_neg_weights.cuda()
125 + pos_neg_weights = pos_neg_weights.type(torch.cuda.FloatTensor)
126 + return pos_neg_weights
127 +
128 +
129 +class RescaleBB(object):
130 + """Rescale the bounding box in a sample to a given size.
131 +
132 + Args:
133 + output_image_size (int): Desired output size.
134 + """
135 +
136 + def __init__(self, output_image_size, original_image_size):
137 + assert isinstance(output_image_size, int)
138 + self.output_image_size = output_image_size
139 + self.original_image_size = original_image_size
140 +
141 + def __call__(self, sample):
142 + assert sample.shape == (4,)
143 + x, y, w, h = sample[0], sample[1], sample[2], sample[3]
144 +
145 + scale_factor = self.output_image_size / self.original_image_size
146 + new_x, new_y, new_w, new_h = x * scale_factor, y * scale_factor, w * scale_factor, h * scale_factor
147 + transformed_sample = np.array([new_x, new_y, new_w, new_h])
148 +
149 + return transformed_sample
150 +
151 +class BrixiaScoreLocal:
152 + def __init__(self, label_path):
153 + self.data_brixia = pd.read_csv(label_path + "/metadata_global_v2.csv", sep=";")
154 + self.data_brixia.set_index("Filename", inplace=True)
155 +
156 + def getScore(self, filename,print_score=False):
157 + score = self.data_brixia.loc[filename.replace(".jpg", ".dcm"), "BrixiaScore"].astype(str)
158 + score = '0' * (6 - len(score)) + score
159 + if print_score:
160 + print('Brixia 6 regions Score: ')
161 + print(score[0], ' | ', score[3])
162 + print(score[1], ' | ', score[4])
163 + print(score[2], ' | ', score[5])
164 + return list(map(int, score))
165 +
166 +
1 +import torch
2 +import pandas as pd
3 +import cxr_dataset as CXR
4 +from torch.utils.data import Dataset, DataLoader
5 +import sklearn.metrics as sklm
6 +import numpy as np
7 +
8 +
9 +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
10 +
11 +
12 +def make_pred_multilabel(dataloader, model, save_as_csv=False, fine_tune=False):
13 + """
14 + Gives predictions for test fold and calculates AUCs using previously trained model
15 +
16 + Args:
17 + data_transforms: torchvision transforms to preprocess raw images; same as validation transforms
18 + model: densenet-121 from torchvision previously fine tuned to training data
19 + PATH_TO_IMAGES: path at which NIH images can be found
20 + Returns:
21 + pred_df: dataframe containing individual predictions and ground truth for each test image
22 + auc_df: dataframe containing aggregate AUCs by train/test tuples
23 + """
24 +
25 + batch_size = dataloader.batch_size
26 + # set model to eval mode; required for proper predictions given use of batchnorm
27 + model.train(False)
28 +
29 + # create empty dfs
30 + pred_df = pd.DataFrame(columns=["Image Index"])
31 + true_df = pd.DataFrame(columns=["Image Index"])
32 +
33 + # iterate over dataloader
34 + for i, data in enumerate(dataloader):
35 +
36 + inputs, labels, _ = data
37 + inputs, labels = inputs.to(device), labels.to(device)
38 +
39 + true_labels = labels.cpu().data.numpy()
40 + # batch_size = true_labels.shape
41 +
42 + outputs = model(inputs)
43 + outputs = torch.sigmoid(outputs)
44 + probs = outputs.cpu().data.numpy()
45 +
46 + # get predictions and true values for each item in batch
47 + for j in range(0, true_labels.shape[0]):
48 + thisrow = {}
49 + truerow = {}
50 + thisrow["Image Index"] = dataloader.dataset.df.index[batch_size * i + j]
51 + truerow["Image Index"] = dataloader.dataset.df.index[batch_size * i + j]
52 +
53 + # iterate over each entry in prediction vector; each corresponds to
54 + # individual label
55 + for k in range(len(dataloader.dataset.PRED_LABEL)):
56 + thisrow["prob_" + dataloader.dataset.PRED_LABEL[k]] = probs[j, k]
57 + truerow[dataloader.dataset.PRED_LABEL[k]] = true_labels[j, k]
58 +
59 + pred_df = pred_df.append(thisrow, ignore_index=True)
60 + true_df = true_df.append(truerow, ignore_index=True)
61 +
62 + # if(i % 10 == 0):
63 + # print(str(i * BATCH_SIZE))
64 +
65 + auc_df = pd.DataFrame(columns=["label", "auc"])
66 +
67 + # calc AUCs
68 + for column in true_df:
69 +
70 + if not fine_tune:
71 + if column not in [
72 + 'Atelectasis',
73 + 'Cardiomegaly',
74 + 'Effusion',
75 + 'Infiltration',
76 + 'Mass',
77 + 'Nodule',
78 + 'Pneumonia',
79 + 'Pneumothorax',
80 + 'Consolidation',
81 + 'Edema',
82 + 'Emphysema',
83 + 'Fibrosis',
84 + 'Pleural_Thickening',
85 + 'Hernia']:
86 + continue
87 + else:
88 + if column not in [
89 + 'Detector01',
90 + 'Detector2',
91 + 'Detector3']:
92 + continue
93 + actual = true_df[column]
94 + pred = pred_df["prob_" + column]
95 + thisrow = {}
96 + thisrow['label'] = column
97 + thisrow['auc'] = np.nan
98 + thisrow['AP'] = np.nan
99 + try:
100 + thisrow['auc'] = sklm.roc_auc_score(actual.to_numpy().astype(int), pred.to_numpy())
101 + thisrow['AP'] = sklm.average_precision_score(actual.to_numpy().astype(int), pred.to_numpy())
102 + except BaseException:
103 + print("can't calculate auc for " + str(column))
104 + auc_df = auc_df.append(thisrow, ignore_index=True)
105 +
106 + if save_as_csv:
107 + pred_df.to_csv("results/preds.csv", index=False)
108 + auc_df.to_csv("results/aucs.csv", index=False)
109 +
110 + return pred_df, auc_df
111 +
112 +
113 +def evaluate_mae(dataloader, model):
114 + """
115 + Calculates MAE using previously trained model
116 +
117 + Args:
118 + data_transforms: torchvision transforms to preprocess raw images; same as validation transforms
119 + model: densenet-121 from torchvision previously fine tuned to training data
120 + Returns:
121 + mae: MAE
122 + """
123 +
124 + # calc preds in batches of 32, can reduce if your GPU has less RAM
125 + batch_size = dataloader.batch_size
126 + # set model to eval mode; required for proper predictions given use of batchnorm
127 + model.train(False)
128 +
129 + # create empty dfs
130 + pred_df = pd.DataFrame(columns=["Image Index"])
131 + true_df = pd.DataFrame(columns=["Image Index"])
132 +
133 + # iterate over dataloader
134 + for i, data in enumerate(dataloader):
135 +
136 + inputs, ground_truths, _ = data
137 + inputs, ground_truths = inputs.to(device), ground_truths.to(device)
138 +
139 + true_scores = ground_truths.cpu().data.numpy()
140 +
141 + outputs = model(inputs)
142 + preds = outputs.cpu().data.numpy()
143 +
144 + # get predictions and true values for each item in batch
145 + for j in range(0, true_scores.shape[0]):
146 + thisrow = {}
147 + truerow = {}
148 + thisrow["Image Index"] = dataloader.dataset.df.index[batch_size * i + j]
149 + truerow["Image Index"] = dataloader.dataset.df.index[batch_size * i + j]
150 +
151 + # iterate over each entry in prediction vector; each corresponds to
152 + # individual label
153 + thisrow["pred_score"] = preds[j]
154 + truerow["true_score"] = true_scores[j]
155 +
156 + pred_df = pred_df.append(thisrow, ignore_index=True)
157 + true_df = true_df.append(truerow, ignore_index=True)
158 +
159 + actual = true_df["true_score"]
160 + pred = pred_df["pred_score"]
161 + try:
162 + mae = sklm.mean_absolute_error(actual.to_numpy().astype(int), pred.to_numpy())
163 + return mae, true_df, pred_df
164 + except BaseException:
165 + print("can't calculate mae")
166 +
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.