Showing
15 changed files
with
332 additions
and
0 deletions
final_code/Brixia_Regression.ipynb
0 → 100644
This diff could not be displayed because it is too large.
final_code/DB/1000186638823204855.jpg
0 → 100644
3.93 KB
final_code/DB/10005836788378209022.jpg
0 → 100644
5.76 KB
final_code/DB/10011454155587105152.jpg
0 → 100644
3.83 KB
final_code/DB/10015354220486554048.jpg
0 → 100644
4.17 KB
final_code/DB/10026271850367430724.jpg
0 → 100644
4.17 KB
final_code/DB/10027044307414466695.jpg
0 → 100644
3.91 KB
final_code/DB/10027500604909952472.jpg
0 → 100644
5.19 KB
final_code/DB/10028581328861447555.jpg
0 → 100644
4.1 KB
final_code/DB/10030929591921881379.jpg
0 → 100644
4.16 KB
final_code/DB/10062027240959229488.jpg
0 → 100644
3.9 KB
final_code/cxr_dataset.py
0 → 100644
1 | +import pandas as pd | ||
2 | +import torch | ||
3 | +import numpy as np | ||
4 | +from torch.utils.data import Dataset | ||
5 | +import os | ||
6 | +from PIL import Image | ||
7 | + | ||
8 | + | ||
9 | +class CXRDataset(Dataset): | ||
10 | + | ||
11 | + def __init__( | ||
12 | + self, | ||
13 | + path_to_images, | ||
14 | + fold, | ||
15 | + transform=None, | ||
16 | + transform_bb=None, | ||
17 | + finding="any", | ||
18 | + fine_tune=False, | ||
19 | + regression=False, | ||
20 | + label_path="/content/gdrive/MyDrive/ColabNotebooks/brixia/labels"): | ||
21 | + | ||
22 | + self.transform = transform | ||
23 | + self.transform_bb = transform_bb | ||
24 | + self.path_to_images = path_to_images | ||
25 | + if not fine_tune: | ||
26 | + self.df = pd.read_csv(label_path + "/nih_original_split.csv") | ||
27 | + elif fine_tune and not regression: | ||
28 | + self.df = pd.read_csv(label_path + "/brixia_split_classification.csv") | ||
29 | + else: | ||
30 | + self.df = pd.read_csv(label_path + "/brixia_split_regression.csv") | ||
31 | + self.fold = fold | ||
32 | + self.fine_tune = fine_tune | ||
33 | + self.regression = regression | ||
34 | + | ||
35 | + if not fold == 'BBox': | ||
36 | + self.df = self.df[self.df['fold'] == fold] | ||
37 | + else: | ||
38 | + bbox_images_df = pd.read_csv(label_path + "/BBox_List_2017.csv") | ||
39 | + self.df = pd.merge(left=self.df, right=bbox_images_df, how="inner", on="Image Index") | ||
40 | + | ||
41 | + if not self.fine_tune: | ||
42 | + self.PRED_LABEL = [ | ||
43 | + 'Atelectasis', | ||
44 | + 'Cardiomegaly', | ||
45 | + 'Effusion', | ||
46 | + 'Infiltration', | ||
47 | + 'Mass', | ||
48 | + 'Nodule', | ||
49 | + 'Pneumonia', | ||
50 | + 'Pneumothorax', | ||
51 | + 'Consolidation', | ||
52 | + 'Edema', | ||
53 | + 'Emphysema', | ||
54 | + 'Fibrosis', | ||
55 | + 'Pleural_Thickening', | ||
56 | + 'Hernia'] | ||
57 | + else: | ||
58 | + self.PRED_LABEL = [ | ||
59 | + 'Detector01', | ||
60 | + 'Detector2', | ||
61 | + 'Detector3'] | ||
62 | + | ||
63 | + if not finding == "any" and not fine_tune: # can filter for positive findings of the kind described; useful for evaluation | ||
64 | + self.df = self.df[self.df['Finding Label'] == finding] | ||
65 | + elif not finding == "any" and fine_tune and not regression: | ||
66 | + self.df = self.df[self.df[finding] == 1] | ||
67 | + | ||
68 | + self.df = self.df.set_index("Image Index") | ||
69 | + | ||
70 | + def __len__(self): | ||
71 | + return len(self.df) | ||
72 | + | ||
73 | + def __getitem__(self, idx): | ||
74 | + | ||
75 | + image = Image.open( | ||
76 | + os.path.join( | ||
77 | + self.path_to_images, | ||
78 | + self.df.index[idx])) | ||
79 | + image = image.convert('RGB') | ||
80 | + | ||
81 | + if not self.fine_tune: | ||
82 | + label = np.zeros(len(self.PRED_LABEL), dtype=int) | ||
83 | + for i in range(0, len(self.PRED_LABEL)): | ||
84 | + # can leave zero if zero, else make one | ||
85 | + if self.df[self.PRED_LABEL[i].strip()].iloc[idx].astype('int') > 0: | ||
86 | + label[i] = self.df[self.PRED_LABEL[i].strip() | ||
87 | + ].iloc[idx].astype('int') | ||
88 | + elif self.fine_tune and not self.regression: | ||
89 | + covid_label = np.zeros(len(self.PRED_LABEL), dtype=int) | ||
90 | + covid_label[0] = self.df['Detector01'].iloc[idx] | ||
91 | + covid_label[1] = self.df['Detector2'].iloc[idx] | ||
92 | + covid_label[2] = self.df['Detector3'].iloc[idx] | ||
93 | + else: | ||
94 | + ground_truth = np.array(self.df['BrixiaScoreGlobal'].iloc[idx].astype('float32')) | ||
95 | + | ||
96 | + if self.transform: | ||
97 | + image = self.transform(image) | ||
98 | + | ||
99 | + if self.fold == "BBox": | ||
100 | + # exctract bounding box coordinates from dataframe, they exist in the the columns specified below | ||
101 | + bounding_box = self.df.iloc[idx, -7:-3].to_numpy() | ||
102 | + | ||
103 | + if self.transform_bb: | ||
104 | + transformed_bounding_box = self.transform_bb(bounding_box) | ||
105 | + | ||
106 | + return image, label, self.df.index[idx], transformed_bounding_box | ||
107 | + elif self.fine_tune and not self.regression: | ||
108 | + return image, covid_label, self.df.index[idx] | ||
109 | + elif self.fine_tune and self.regression: | ||
110 | + return image, ground_truth, self.df.index[idx] | ||
111 | + else: | ||
112 | + return image, label, self.df.index[idx] | ||
113 | + | ||
114 | + def pos_neg_balance_weights(self): | ||
115 | + pos_neg_weights = [] | ||
116 | + | ||
117 | + for i in range(0, len(self.PRED_LABEL)): | ||
118 | + num_negatives = self.df[self.df[self.PRED_LABEL[i].strip()] == 0].shape[0] | ||
119 | + num_positives = self.df[self.df[self.PRED_LABEL[i].strip()] == 1].shape[0] | ||
120 | + | ||
121 | + pos_neg_weights.append(num_negatives / num_positives) | ||
122 | + | ||
123 | + pos_neg_weights = torch.Tensor(pos_neg_weights) | ||
124 | + pos_neg_weights = pos_neg_weights.cuda() | ||
125 | + pos_neg_weights = pos_neg_weights.type(torch.cuda.FloatTensor) | ||
126 | + return pos_neg_weights | ||
127 | + | ||
128 | + | ||
129 | +class RescaleBB(object): | ||
130 | + """Rescale the bounding box in a sample to a given size. | ||
131 | + | ||
132 | + Args: | ||
133 | + output_image_size (int): Desired output size. | ||
134 | + """ | ||
135 | + | ||
136 | + def __init__(self, output_image_size, original_image_size): | ||
137 | + assert isinstance(output_image_size, int) | ||
138 | + self.output_image_size = output_image_size | ||
139 | + self.original_image_size = original_image_size | ||
140 | + | ||
141 | + def __call__(self, sample): | ||
142 | + assert sample.shape == (4,) | ||
143 | + x, y, w, h = sample[0], sample[1], sample[2], sample[3] | ||
144 | + | ||
145 | + scale_factor = self.output_image_size / self.original_image_size | ||
146 | + new_x, new_y, new_w, new_h = x * scale_factor, y * scale_factor, w * scale_factor, h * scale_factor | ||
147 | + transformed_sample = np.array([new_x, new_y, new_w, new_h]) | ||
148 | + | ||
149 | + return transformed_sample | ||
150 | + | ||
151 | +class BrixiaScoreLocal: | ||
152 | + def __init__(self, label_path): | ||
153 | + self.data_brixia = pd.read_csv(label_path + "/metadata_global_v2.csv", sep=";") | ||
154 | + self.data_brixia.set_index("Filename", inplace=True) | ||
155 | + | ||
156 | + def getScore(self, filename,print_score=False): | ||
157 | + score = self.data_brixia.loc[filename.replace(".jpg", ".dcm"), "BrixiaScore"].astype(str) | ||
158 | + score = '0' * (6 - len(score)) + score | ||
159 | + if print_score: | ||
160 | + print('Brixia 6 regions Score: ') | ||
161 | + print(score[0], ' | ', score[3]) | ||
162 | + print(score[1], ' | ', score[4]) | ||
163 | + print(score[2], ' | ', score[5]) | ||
164 | + return list(map(int, score)) | ||
165 | + | ||
166 | + |
final_code/eval_model.py
0 → 100644
1 | +import torch | ||
2 | +import pandas as pd | ||
3 | +import cxr_dataset as CXR | ||
4 | +from torch.utils.data import Dataset, DataLoader | ||
5 | +import sklearn.metrics as sklm | ||
6 | +import numpy as np | ||
7 | + | ||
8 | + | ||
9 | +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | ||
10 | + | ||
11 | + | ||
12 | +def make_pred_multilabel(dataloader, model, save_as_csv=False, fine_tune=False): | ||
13 | + """ | ||
14 | + Gives predictions for test fold and calculates AUCs using previously trained model | ||
15 | + | ||
16 | + Args: | ||
17 | + data_transforms: torchvision transforms to preprocess raw images; same as validation transforms | ||
18 | + model: densenet-121 from torchvision previously fine tuned to training data | ||
19 | + PATH_TO_IMAGES: path at which NIH images can be found | ||
20 | + Returns: | ||
21 | + pred_df: dataframe containing individual predictions and ground truth for each test image | ||
22 | + auc_df: dataframe containing aggregate AUCs by train/test tuples | ||
23 | + """ | ||
24 | + | ||
25 | + batch_size = dataloader.batch_size | ||
26 | + # set model to eval mode; required for proper predictions given use of batchnorm | ||
27 | + model.train(False) | ||
28 | + | ||
29 | + # create empty dfs | ||
30 | + pred_df = pd.DataFrame(columns=["Image Index"]) | ||
31 | + true_df = pd.DataFrame(columns=["Image Index"]) | ||
32 | + | ||
33 | + # iterate over dataloader | ||
34 | + for i, data in enumerate(dataloader): | ||
35 | + | ||
36 | + inputs, labels, _ = data | ||
37 | + inputs, labels = inputs.to(device), labels.to(device) | ||
38 | + | ||
39 | + true_labels = labels.cpu().data.numpy() | ||
40 | + # batch_size = true_labels.shape | ||
41 | + | ||
42 | + outputs = model(inputs) | ||
43 | + outputs = torch.sigmoid(outputs) | ||
44 | + probs = outputs.cpu().data.numpy() | ||
45 | + | ||
46 | + # get predictions and true values for each item in batch | ||
47 | + for j in range(0, true_labels.shape[0]): | ||
48 | + thisrow = {} | ||
49 | + truerow = {} | ||
50 | + thisrow["Image Index"] = dataloader.dataset.df.index[batch_size * i + j] | ||
51 | + truerow["Image Index"] = dataloader.dataset.df.index[batch_size * i + j] | ||
52 | + | ||
53 | + # iterate over each entry in prediction vector; each corresponds to | ||
54 | + # individual label | ||
55 | + for k in range(len(dataloader.dataset.PRED_LABEL)): | ||
56 | + thisrow["prob_" + dataloader.dataset.PRED_LABEL[k]] = probs[j, k] | ||
57 | + truerow[dataloader.dataset.PRED_LABEL[k]] = true_labels[j, k] | ||
58 | + | ||
59 | + pred_df = pred_df.append(thisrow, ignore_index=True) | ||
60 | + true_df = true_df.append(truerow, ignore_index=True) | ||
61 | + | ||
62 | + # if(i % 10 == 0): | ||
63 | + # print(str(i * BATCH_SIZE)) | ||
64 | + | ||
65 | + auc_df = pd.DataFrame(columns=["label", "auc"]) | ||
66 | + | ||
67 | + # calc AUCs | ||
68 | + for column in true_df: | ||
69 | + | ||
70 | + if not fine_tune: | ||
71 | + if column not in [ | ||
72 | + 'Atelectasis', | ||
73 | + 'Cardiomegaly', | ||
74 | + 'Effusion', | ||
75 | + 'Infiltration', | ||
76 | + 'Mass', | ||
77 | + 'Nodule', | ||
78 | + 'Pneumonia', | ||
79 | + 'Pneumothorax', | ||
80 | + 'Consolidation', | ||
81 | + 'Edema', | ||
82 | + 'Emphysema', | ||
83 | + 'Fibrosis', | ||
84 | + 'Pleural_Thickening', | ||
85 | + 'Hernia']: | ||
86 | + continue | ||
87 | + else: | ||
88 | + if column not in [ | ||
89 | + 'Detector01', | ||
90 | + 'Detector2', | ||
91 | + 'Detector3']: | ||
92 | + continue | ||
93 | + actual = true_df[column] | ||
94 | + pred = pred_df["prob_" + column] | ||
95 | + thisrow = {} | ||
96 | + thisrow['label'] = column | ||
97 | + thisrow['auc'] = np.nan | ||
98 | + thisrow['AP'] = np.nan | ||
99 | + try: | ||
100 | + thisrow['auc'] = sklm.roc_auc_score(actual.to_numpy().astype(int), pred.to_numpy()) | ||
101 | + thisrow['AP'] = sklm.average_precision_score(actual.to_numpy().astype(int), pred.to_numpy()) | ||
102 | + except BaseException: | ||
103 | + print("can't calculate auc for " + str(column)) | ||
104 | + auc_df = auc_df.append(thisrow, ignore_index=True) | ||
105 | + | ||
106 | + if save_as_csv: | ||
107 | + pred_df.to_csv("results/preds.csv", index=False) | ||
108 | + auc_df.to_csv("results/aucs.csv", index=False) | ||
109 | + | ||
110 | + return pred_df, auc_df | ||
111 | + | ||
112 | + | ||
113 | +def evaluate_mae(dataloader, model): | ||
114 | + """ | ||
115 | + Calculates MAE using previously trained model | ||
116 | + | ||
117 | + Args: | ||
118 | + data_transforms: torchvision transforms to preprocess raw images; same as validation transforms | ||
119 | + model: densenet-121 from torchvision previously fine tuned to training data | ||
120 | + Returns: | ||
121 | + mae: MAE | ||
122 | + """ | ||
123 | + | ||
124 | + # calc preds in batches of 32, can reduce if your GPU has less RAM | ||
125 | + batch_size = dataloader.batch_size | ||
126 | + # set model to eval mode; required for proper predictions given use of batchnorm | ||
127 | + model.train(False) | ||
128 | + | ||
129 | + # create empty dfs | ||
130 | + pred_df = pd.DataFrame(columns=["Image Index"]) | ||
131 | + true_df = pd.DataFrame(columns=["Image Index"]) | ||
132 | + | ||
133 | + # iterate over dataloader | ||
134 | + for i, data in enumerate(dataloader): | ||
135 | + | ||
136 | + inputs, ground_truths, _ = data | ||
137 | + inputs, ground_truths = inputs.to(device), ground_truths.to(device) | ||
138 | + | ||
139 | + true_scores = ground_truths.cpu().data.numpy() | ||
140 | + | ||
141 | + outputs = model(inputs) | ||
142 | + preds = outputs.cpu().data.numpy() | ||
143 | + | ||
144 | + # get predictions and true values for each item in batch | ||
145 | + for j in range(0, true_scores.shape[0]): | ||
146 | + thisrow = {} | ||
147 | + truerow = {} | ||
148 | + thisrow["Image Index"] = dataloader.dataset.df.index[batch_size * i + j] | ||
149 | + truerow["Image Index"] = dataloader.dataset.df.index[batch_size * i + j] | ||
150 | + | ||
151 | + # iterate over each entry in prediction vector; each corresponds to | ||
152 | + # individual label | ||
153 | + thisrow["pred_score"] = preds[j] | ||
154 | + truerow["true_score"] = true_scores[j] | ||
155 | + | ||
156 | + pred_df = pred_df.append(thisrow, ignore_index=True) | ||
157 | + true_df = true_df.append(truerow, ignore_index=True) | ||
158 | + | ||
159 | + actual = true_df["true_score"] | ||
160 | + pred = pred_df["pred_score"] | ||
161 | + try: | ||
162 | + mae = sklm.mean_absolute_error(actual.to_numpy().astype(int), pred.to_numpy()) | ||
163 | + return mae, true_df, pred_df | ||
164 | + except BaseException: | ||
165 | + print("can't calculate mae") | ||
166 | + |
final_code/model.py
0 → 100644
This diff is collapsed. Click to expand it.
final_code/model_l1.py
0 → 100644
This diff is collapsed. Click to expand it.
-
Please register or login to post a comment