From 15f68f7fd5bf31ed1aa07330c792a4c99a4425f6 Mon Sep 17 00:00:00 2001 From: divya-nk Date: Thu, 15 Mar 2018 08:26:55 -0400 Subject: [PATCH 1/7] modified acc to own dataset --- voc_conversion_scripts/convert_to_hdf5.py | 174 ++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 voc_conversion_scripts/convert_to_hdf5.py diff --git a/voc_conversion_scripts/convert_to_hdf5.py b/voc_conversion_scripts/convert_to_hdf5.py new file mode 100644 index 0000000..0b15e64 --- /dev/null +++ b/voc_conversion_scripts/convert_to_hdf5.py @@ -0,0 +1,174 @@ +''' +made few changes according to my dataset + +Also included code to prepare individual text files (for train, val and test) that includes the list of files names +and store it in 'ImageSets/....txt' + +No need of separate dataset for 'val' as train-val split is implemented in the yolo_retrain.py +''' + + + + +import argparse +import os +import xml.etree.ElementTree as ElementTree + +import h5py +import numpy as np + +train_set = 'train' +#val_set = 'val' +test_set = 'test' + +classes = ["lable-1", "lable-2"] + +parser = argparse.ArgumentParser( + description='Convert object detection phase-I dataset to HDF5.') +parser.add_argument( + '-p', + '--path_to_data', + help='path to Images', + default='data') + + +def get_boxes_for_id(data_path, dataset, image_id): + """Get object bounding boxes annotations for given image. + + Parameters + ---------- + data_path : str + Path to data directory. + dataset : str + Folder name for train, test or val + image_id : str + File name for given image. + + Returns + ------- + boxes : array of int + bounding box annotations of class label, xmin, ymin, xmax, ymax as a + 5xN array. + """ + fname = os.path.join(data_path, 'Annotations/{}/{}.txt'.format(dataset,image_id)) + with open(fname) as in_file: + xml_tree = ElementTree.parse(in_file) + root = xml_tree.getroot() + boxes = [] + for obj in root.iter('object'): + label = obj.find('name').text + xml_box = obj.find('bndbox') + bbox = (classes.index(label), int(xml_box.find('xmin').text), + int(xml_box.find('ymin').text), int(xml_box.find('xmax').text), + int(xml_box.find('ymax').text)) + boxes.extend(bbox) + + return np.array(boxes) + +def get_image_for_id(data_path, dataset, image_id): + """Get image data as uint8 array for given image. + + Parameters + ---------- + data_path : str + Path to data directory. + dataset : str + Folder name for - train, test or val + image_id : str + File name for given image. + + Returns + ------- + image_data : array of uint8 + Compressed PNG byte string represented as array of uint8. + """ + fname = os.path.join(data_path, 'PNGImages/{}/{}.png'.format(dataset,image_id)) + with open(fname, 'rb') as in_file: + data = in_file.read() + # Use of encoding based on: https://github.com/h5py/h5py/issues/745 + return np.fromstring(data, dtype='uint8') + +def get_ids(data_path, dataset): + """Get image identifiers for corresponding list of dataset identifies. + + Parameters + ---------- + data_path : str + Path to data directory. + dataset : train, test or val + + Returns + ------- + ids : list of str + List of all image identifiers for given datasets. + """ + + #writes file names in txt files + for subdir,dirs,files in os.walk(data_path+os.sep+'PNGImages'+os.sep+dataset): + for file in files: + with open(data_path+os.sep+'ImageSets'+os.sep+dataset+'.txt', 'a') as f: + f.write(os.path.splitext(file)[0]+'\n') + #print('done') + + ids = [] + id_file = os.path.join(data_path, 'ImageSets/{}.txt'.format(dataset)) + print(id_file) + with open(id_file, 'r') as image_ids: + ids.extend(map(str.strip, image_ids.readlines())) + return ids + +def add_to_dataset(data_path, dataset, ids, images, boxes, start=0): + """Process all given ids and adds them to given datasets.""" + for i, img_id in enumerate(ids): + image_data = get_image_for_id(data_path, dataset, img_id) + image_boxes = get_boxes_for_id(data_path, dataset, img_id) + images[start + i] = image_data + boxes[start + i] = image_boxes + return i + +def _main(args): + data_path = os.path.expanduser(args.path_to_data) + train_ids = get_ids(data_path, train_set) + #val_ids = get_ids(data_path, val_set) + test_ids = get_ids(data_path, test_set) + + #Create HDF5 dataset structure + print('Creating HDF5 dataset structure...') + fname = os.path.join(data_path, 'phaseI-dataset.hdf5') + phaseI_h5file = h5py.File(fname, 'w') + uint8_dt = h5py.special_dtype(vlen=np.dtype('uint8')) #variable length uint8 + uint16_dt = h5py.special_dtype(vlen=np.dtype('uint16')) # included uint16 as coordinates of bounding boxes are > 255 + vlen_int_dt = h5py.special_dtype(vlen=np.dtype(int)) #variable lenght int + train_group = phaseI_h5file.create_group('train') + #val_group = phaseI_h5file.create_group('val') + test_group = phaseI_h5file.create_group('test') + + #store class list for reference class ids as csv fixed-length numpy string + phaseI_h5file.attrs['classes'] = np.string_(str.join(',', classes)) + + #store images as variable length uint8 array + train_images = train_group.create_dataset('images', shape=(len(train_ids), ), dtype=uint8_dt) + #val_images = val_group.create_dataset('images', shape=(len(val_ids), ), dtype=uint8_dt) + test_images = test_group.create_dataset('images', shape=(len(test_ids), ), dtype=uint8_dt) + + #store boxes as class_id, xmin, ymin, xmax, ymax + train_boxes = train_group.create_dataset('boxes', shape=(len(train_ids), ), dtype=uint16_dt) + #val_boxes = val_group.create_dataset('boxes', shape=(len(val_ids), ), dtype=uint16_dt) + test_boxes = test_group.create_dataset('boxes', shape=(len(test_ids), ), dtype=uint16_dt) + + #process all ids and add to datasets + print('Processing Phase I datasets for training set.') + add_to_dataset(data_path, train_set, train_ids, train_images, train_boxes) + #print('Processing Phase I datasets for val set.') + #add_to_dataset(data_path, val_set, val_ids, val_images, val_boxes) + print('Processing Phase I datasets for test set.') + add_to_dataset(data_path, test_set, test_ids, test_images, test_boxes) + + print('Closing HDF5 file.') + phaseI_h5file.close() + print('Done!') + +if __name__=='__main__': + _main(parser.parse_args()) + + From 059acc981d1a00ffee94ed1026ca4a4b87bf592a Mon Sep 17 00:00:00 2001 From: divya-nk Date: Thu, 15 Mar 2018 08:27:58 -0400 Subject: [PATCH 2/7] updated docstring --- voc_conversion_scripts/convert_to_hdf5.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/voc_conversion_scripts/convert_to_hdf5.py b/voc_conversion_scripts/convert_to_hdf5.py index 0b15e64..affe519 100644 --- a/voc_conversion_scripts/convert_to_hdf5.py +++ b/voc_conversion_scripts/convert_to_hdf5.py @@ -7,9 +7,6 @@ No need of separate dataset for 'val' as train-val split is implemented in the yolo_retrain.py ''' - - - import argparse import os import xml.etree.ElementTree as ElementTree From e5376a9755621af0ec47a77a7d7012dc4f0af09b Mon Sep 17 00:00:00 2001 From: divya-nk Date: Thu, 15 Mar 2018 08:41:32 -0400 Subject: [PATCH 3/7] modified retrain_yolo to work with .hdf5 input --- myds_retrain.py | 320 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 320 insertions(+) create mode 100644 myds_retrain.py diff --git a/myds_retrain.py b/myds_retrain.py new file mode 100644 index 0000000..c5f1625 --- /dev/null +++ b/myds_retrain.py @@ -0,0 +1,320 @@ +#! /usr/bin/env python + +import argparse +import io +import os +import matplotlib +matplotlib.use('agg') +import h5py +import matplotlib.pyplot as plt +import numpy as np +import PIL +import tensorflow as tf +from keras import backend as K +from keras.layers import Input, Lambda, Conv2D +from keras.models import Model, load_model +from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping + +from yad2k.models.keras_yolo import (preprocess_true_boxes, yolo_body, + yolo_eval, yolo_head, yolo_loss) +from yad2k.utils.draw_boxes import draw_boxes + +YOLO_ANCHORS = np.array( + ((0.57273, 0.677385), (1.87446, 2.06253), (3.33843, 5.47434), + (7.88282, 3.52778), (9.77052, 9.16828))) + +argparser = argparse.ArgumentParser( + description='Train YOLO_v2 model to labelled dataset.') + +argparser.add_argument( + '-d', + '--data_path', + help='path to HDF5 file containing pascal voc dataset', + default='data/phaseI-dataset.hdf5') + +argparser.add_argument( + '-a', + '--anchors_path', + help='path to anchors file, defaults to yolo_anchors.txt', + default='model_data/yolo_anchors.txt') + +argparser.add_argument( + '-c', + '--classes_path', + help='path to classes file, defaults to labels.txt', + default='model_data/labels.txt') + +def _main(args): + data_path = os.path.expanduser(args.data_path) + classes_path = os.path.expanduser(args.classes_path) + anchors_path = os.path.expanduser(args.anchors_path) + + with open(classes_path) as f: + class_names = f.readlines() + class_names = [c.strip() for c in class_names] + + if os.path.isfile(anchors_path): + with open(anchors_path) as f: + anchors = f.readline() + anchors = [float(x) for x in anchors.split(',')] + anchors = np.array(anchors).reshape(-1, 2) + else: + anchors = YOLO_ANCHORS + + data = h5py.File(data_path, 'r') + + #Pre-processing data + boxes_list, image_data_list = get_preprocessed_data(data) + detectors_mask, matching_true_boxes = get_detector_mask(boxes_list, anchors) + + + #Create model + model_body, model = create_model(anchors, class_names, load_pretrained=True, freeze_body=False) + + #train model + train(model, class_names, anchors, image_data_list, boxes_list, detectors_mask, matching_true_boxes) + + draw(model_body, class_names, anchors, image_data_list, image_set='val', # assumes training/validation split is 0.9 + weights_name='trained_stage_3_best.h5', + save_all=False) + +def get_preprocessed_data(data): + image_list = [] + boxes_list = [] + image_data_list = [] + processed_box_data = [] + + # boxes processing + box_dataset = data['train/boxes'] + processed_box_data = boxprocessing(box_dataset) + processed_box_data = processed_box_data.reshape(len(box_dataset),4,5) + + for i in range(len(box_dataset)): + image = PIL.Image.open(io.BytesIO(data['train/images'][i])) + orig_size = np.array([image.width, image.height]) + orig_size = np.expand_dims(orig_size, axis=0) + + #Image preprocessing + image = image.resize((416,416), PIL.Image.BICUBIC) + image_data = np.array(image, dtype=np.float) + image_data /= 255.0 + image_data.resize((image_data.shape[0], image_data.shape[1], 1)) + image_data = np.repeat(image_data, 3, 2) + image_list.append(image) + image_data_list.append(image_data) + + #Box preprocessing + boxes = processed_box_data[i] + + #Get box parameters as x_center, y_center, box_width, box_height, class + boxes_xy = 0.5 * (boxes[:, 3:5] + boxes[:, 1:3]) + boxes_wh = boxes[:, 3:5] - boxes[:, 1:3] + boxes_xy = boxes_xy / orig_size + boxes_wh = boxes_wh / orig_size + boxes = np.concatenate((boxes_xy, boxes_wh, boxes[:, 0:1]), axis=1) + boxes_list.append(boxes) + + boxes_list = np.array(boxes_list, float) + image_data_list = np.array(image_data_list, dtype=np.float) + + return np.array(boxes_list, float), np.array(image_data_list, dtype=np.float) + + + +def get_detector_mask(boxes_list, anchors): + ''' + Precompute detectors_mask and matching_true_boxes for training. + Detectors mask is 1 for each spatial position in the final conv layer and + anchor that should be active for the given boxes and 0 otherwise. + Matching true boxes gives the regression targets for the ground truth box + that caused a detector to be active or 0 otherwise. + ''' + detectors_mask = [0 for i in range(len(boxes_list))] + matching_true_boxes = [0 for i in range(len(boxes_list))] + for i, box in enumerate(boxes_list): + detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(box, anchors, [416, 416]) + + return np.array(detectors_mask), np.array(matching_true_boxes) + +def create_model(anchors, class_names, load_pretrained=True, freeze_body=True): + + detectors_mask_shape = (13, 13, 5, 1) + matching_boxes_shape = (13, 13, 5, 5) + + #Create model input layers + image_input = Input(shape=(416,416, 3)) + boxes_input = Input(shape=(None, 5)) + detectors_mask_input = Input(shape=detectors_mask_shape) + matching_boxes_input = Input(shape=matching_boxes_shape) + + #Create model body + yolo_model = yolo_body(image_input,len(anchors),len(class_names)) + topless_yolo = Model(yolo_model.input, yolo_model.layers[-2].output) + + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + session = tf.Session(config=config) + + if load_pretrained: + # Save topless yolo: + topless_yolo_path = os.path.join('model_data', 'yolo_topless.h5') + if not os.path.exists(topless_yolo_path): + print("CREATING TOPLESS WEIGHTS FILE") + yolo_path = os.path.join('model_data', 'yolo.h5') + model_body = load_model(yolo_path) + model_body = Model(model_body.inputs, model_body.layers[-2].output) + model_body.save_weights(topless_yolo_path) + topless_yolo.load_weights(topless_yolo_path) + + if freeze_body: + for layer in topless_yolo.layers: + layer.trainable = False + final_layer = Conv2D(len(anchors)*(5+len(class_names)), (1, 1), activation='linear')(topless_yolo.output) + + model_body = Model(image_input, final_layer) + + #model_body = Model(image_input, model_body.output) + + with tf.device('/cpu:0'): + model_loss = Lambda( + yolo_loss, + output_shape=(1,), + name='yolo_loss', + arguments={'anchors': anchors,'num_classes': len(class_names)})([ + model_body.output, boxes_input, + detectors_mask_input, matching_boxes_input]) + + model = Model( + [model_body.input, boxes_input, detectors_mask_input, + matching_boxes_input], model_loss) + + model.summary() + #stop + + return model_body, model + +def train(model, class_names, anchors, image_data, boxes, detectors_mask, matching_true_boxes, validation_split=0.1): + ''' + retrain/fine-tune the model + + logs training with tensorboard + + saves training weights in current directory + + best weights according to val_loss is saved as trained_stage_3_best.h5 + ''' + print('content of boxes') + #print(boxes[1]) + print(boxes.shape) + + + model.compile( + optimizer='adam', loss={ + 'yolo_loss': lambda y_true, y_pred: y_pred + }) # This is a hack to use the custom loss function in the last layer. + + + logging = TensorBoard() + checkpoint = ModelCheckpoint("trained_stage_3_best.h5", monitor='val_loss', + save_weights_only=True, save_best_only=True) + #early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=15, verbose=1, mode='auto') + + ''' + model.fit([image_data, boxes, detectors_mask, matching_true_boxes], + np.zeros(len(image_data)), + validation_split=validation_split, + batch_size=32, + epochs=5, + callbacks=[logging]) + model.save_weights('trained_stage_1.h5') + + + model_body, model = create_model(anchors, class_names, load_pretrained=True, freeze_body=True) + + #model.load_weights('trained_stage_1.h5') + + model.compile( + optimizer='adam', loss={ + 'yolo_loss': lambda y_true, y_pred: y_pred + }) # This is a hack to use the custom loss function in the last layer. + + + model.fit([image_data, boxes, detectors_mask, matching_true_boxes], + np.zeros(len(image_data)), + validation_split=0.1, + batch_size=8, + epochs=30, + callbacks=[logging]) + + model.save_weights('trained_stage_2.h5') + ''' + + model.fit([image_data, boxes, detectors_mask, matching_true_boxes], + np.zeros(len(image_data)), + validation_split=0.1, + batch_size=32, + epochs=500, + callbacks=[logging, checkpoint]) + + model.save_weights('trained_stage_3.h5') + +def draw(model_body, class_names, anchors, image_data, image_set='val', + weights_name='trained_stage_3_best.h5', out_path="output_images", save_all=True): + ''' + Draw bounding boxes on image data + ''' + if image_set == 'train': + image_data = np.array([np.expand_dims(image, axis=0) + for image in image_data[:int(len(image_data)*.9)]]) + elif image_set == 'val': + image_data = np.array([np.expand_dims(image, axis=0) + for image in image_data[int(len(image_data)*.9):]]) + elif image_set == 'all': + image_data = np.array([np.expand_dims(image, axis=0) + for image in image_data]) + else: + ValueError("draw argument image_set must be 'train', 'val', or 'all'") + + # model.load_weights(weights_name) + print(image_data.shape) + model_body.load_weights(weights_name) + + # Create output variables for prediction. + yolo_outputs = yolo_head(model_body.output, anchors, len(class_names)) + input_image_shape = K.placeholder(shape=(2, )) + boxes, scores, classes = yolo_eval( + yolo_outputs, input_image_shape, score_threshold=0.40, iou_threshold=0.0) + + # Run prediction + sess = K.get_session() # TODO: Remove dependence on Tensorflow session. + + if not os.path.exists(out_path): + os.makedirs(out_path) + for i in range(len(image_data)): + out_boxes, out_scores, out_classes = sess.run( + [boxes, scores, classes], + feed_dict={ + model_body.input: image_data[i], + input_image_shape: [image_data.shape[2], image_data.shape[3]], + K.learning_phase(): 0 + }) + print('Found {} boxes for image {}.'.format(len(out_boxes), str(i))) + print(out_boxes) + + # Plot image with predicted boxes. + image_with_boxes = draw_boxes(image_data[i][0], out_boxes, out_classes, + class_names, out_scores) + # Save the image: + if save_all or (len(out_boxes) > 0): + image = PIL.Image.fromarray(image_with_boxes) + image.save(os.path.join(out_path,str(i)+'.png')) + + # To display (pauses the program): + # plt.imshow(image_with_boxes, interpolation='nearest') + # plt.show() + +if __name__ == '__main__': + args = argparser.parse_args() + _main(args) + + From 27b950a25ac1b6538250c05fc34fbfd90cd3be26 Mon Sep 17 00:00:00 2001 From: divya-nk Date: Thu, 15 Mar 2018 08:42:40 -0400 Subject: [PATCH 4/7] incl fn to preprocesses multiple bbox --- myds_retrain.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/myds_retrain.py b/myds_retrain.py index c5f1625..71c67b7 100644 --- a/myds_retrain.py +++ b/myds_retrain.py @@ -119,7 +119,16 @@ def get_preprocessed_data(data): return np.array(boxes_list, float), np.array(image_data_list, dtype=np.float) - +def boxprocessing(box_data): + processed_box_data = [] + processed_box_data = np.array(processed_box_data) + + for i in range(len(box_data)): + z = np.zeros([1,20]) + y = np.append(box_data[i], z) + y = y[0:20] + processed_box_data = np.append(processed_box_data, y) + return processed_box_data def get_detector_mask(boxes_list, anchors): ''' From d89cd51d3ff474748ef9c52e8c2d9b940a75ab4f Mon Sep 17 00:00:00 2001 From: divya-nk Date: Thu, 15 Mar 2018 08:51:30 -0400 Subject: [PATCH 5/7] incl docstring --- myds_retrain.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/myds_retrain.py b/myds_retrain.py index 71c67b7..feef0ec 100644 --- a/myds_retrain.py +++ b/myds_retrain.py @@ -29,7 +29,7 @@ argparser.add_argument( '-d', '--data_path', - help='path to HDF5 file containing pascal voc dataset', + help='path to HDF5 file containing own dataset', default='data/phaseI-dataset.hdf5') argparser.add_argument( @@ -79,6 +79,10 @@ def _main(args): save_all=False) def get_preprocessed_data(data): + ''' + function to preprocess hdf5 data + borrowed code from train_overfit and retrain_yolo and modified to suit my input dataset type (hdf5) + ''' image_list = [] boxes_list = [] image_data_list = [] @@ -120,13 +124,14 @@ def get_preprocessed_data(data): return np.array(boxes_list, float), np.array(image_data_list, dtype=np.float) def boxprocessing(box_data): + #function assumes that there are a maximum of 4 bbox in an image processed_box_data = [] processed_box_data = np.array(processed_box_data) for i in range(len(box_data)): - z = np.zeros([1,20]) + z = np.zeros([1,20]) #change here, multiple of 5 - for more bbox y = np.append(box_data[i], z) - y = y[0:20] + y = y[0:20] # also here processed_box_data = np.append(processed_box_data, y) return processed_box_data @@ -198,7 +203,6 @@ def create_model(anchors, class_names, load_pretrained=True, freeze_body=True): matching_boxes_input], model_loss) model.summary() - #stop return model_body, model @@ -212,10 +216,6 @@ def train(model, class_names, anchors, image_data, boxes, detectors_mask, matchi best weights according to val_loss is saved as trained_stage_3_best.h5 ''' - print('content of boxes') - #print(boxes[1]) - print(boxes.shape) - model.compile( optimizer='adam', loss={ @@ -226,9 +226,10 @@ def train(model, class_names, anchors, image_data, boxes, detectors_mask, matchi logging = TensorBoard() checkpoint = ModelCheckpoint("trained_stage_3_best.h5", monitor='val_loss', save_weights_only=True, save_best_only=True) + + #uncomment following line to implement early stopping #early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=15, verbose=1, mode='auto') - ''' model.fit([image_data, boxes, detectors_mask, matching_true_boxes], np.zeros(len(image_data)), validation_split=validation_split, @@ -256,7 +257,6 @@ def train(model, class_names, anchors, image_data, boxes, detectors_mask, matchi callbacks=[logging]) model.save_weights('trained_stage_2.h5') - ''' model.fit([image_data, boxes, detectors_mask, matching_true_boxes], np.zeros(len(image_data)), @@ -285,7 +285,6 @@ def draw(model_body, class_names, anchors, image_data, image_set='val', ValueError("draw argument image_set must be 'train', 'val', or 'all'") # model.load_weights(weights_name) - print(image_data.shape) model_body.load_weights(weights_name) # Create output variables for prediction. From 4ad07e7316cb6a7aa23f854a53f5a9feb9646e68 Mon Sep 17 00:00:00 2001 From: divya-nk Date: Thu, 15 Mar 2018 08:55:30 -0400 Subject: [PATCH 6/7] added labels.txt --- model_data/labels.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 model_data/labels.txt diff --git a/model_data/labels.txt b/model_data/labels.txt new file mode 100644 index 0000000..96ae080 --- /dev/null +++ b/model_data/labels.txt @@ -0,0 +1,2 @@ +lable-1 +lable-2 From 22353a0d065db26d34c37de38b9e93ea48a90f4f Mon Sep 17 00:00:00 2001 From: divya-nk Date: Thu, 15 Mar 2018 08:59:12 -0400 Subject: [PATCH 7/7] updated docstring with folder structure --- voc_conversion_scripts/convert_to_hdf5.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/voc_conversion_scripts/convert_to_hdf5.py b/voc_conversion_scripts/convert_to_hdf5.py index affe519..ad42d0a 100644 --- a/voc_conversion_scripts/convert_to_hdf5.py +++ b/voc_conversion_scripts/convert_to_hdf5.py @@ -1,5 +1,9 @@ ''' made few changes according to my dataset +Folder structure +data/Annotations - contains all annotations txt files +data/PNGImages - contains all PNG files +data/ImageSets - contains txt files with all files names listed (eg. train.txt, test.txt) Also included code to prepare individual text files (for train, val and test) that includes the list of files names and store it in 'ImageSets/....txt'