similar_images_AE.py 13.8 KB
Newer Older
Aleksis's avatar
Aleksis committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
'''

 similar_images_AE.py (author: Anson Wong / git: ankonzoid)
 
 Image similarity recommender system using an autoencoder-clustering model.
 
 Autoencoder method:
  1) Train an autoencoder (simple/Conv) on training images in 'db/images_training' 
  2) Saves trained autoencoder, encoder, and decoder to 'db/models'

 Clustering method:
  3) Using our trained encoder in 'db/models', we encode inventory images in 'db/images_inventory'
  4) Train kNN model using encoded inventory images
  5) Encode query images in 'query', and predict their NN using our trained kNN model
  6) Compute a score for each inventory encoding relative to our query encoding (centroid/closest)
  7) Make k-recommendations by cloning top-k inventory images into 'answer'
  
'''
import sys, os, shutil
import numpy as np
sys.path.append("src")
from autoencoders.AE import AE
from clustering.KNN import KNearestNeighbours
from utilities.image_utilities import ImageUtils
from utilities.sorting import find_topk_unique
from utilities.plot_utilities import PlotUtils

import matplotlib.pyplot as plt

def main():
    # ========================================
    # Set run settings
    # ========================================

    # Choose autoencoder model
    #model_name = "simpleAE"
    model_name = "convAE"
    process_and_save_images = False  # image preproc: resize images and save?
    train_autoencoder = False  # train from scratch?

    # ========================================
    # Automated pre-processing
    # ========================================
    ##   Set flatten properties   ###
    if model_name == "simpleAE":
        flatten_before_encode = True
        flatten_after_encode = False
    elif model_name == "convAE":
        flatten_before_encode = False
        flatten_after_encode = True
    else:
        raise Exception("Invalid model name which is not simpleAE/convAE")

    img_shape = (100, 100)  # force resize -> (ypixels, xpixels)
    ratio_train_test = 0.8
    seed = 100

    loss = "binary_crossentropy"
    optimizer = "adam"
    n_epochs = 100
    batch_size = 128

    save_reconstruction_on_load_model = False


    ###   KNN training parameters   ###
    n_neighbors = 5  # number of nearest neighbours
    metric = "cosine"  # kNN metric (cosine only compatible with brute force)
    algorithm = "brute"  # search algorithm
    recommendation_method = 2  # 1 = centroid kNN, 2 = all points kNN
    output_mode = 1  # 1 = output plot, 2 = output inventory db image clones


    # ========================================
    # Generate expected file/folder paths and settings
    # ========================================
    # Assume project root directory to be directory of file
    project_root = os.path.dirname(__file__)
    print("Project root: {0}".format(project_root))

    # Query and answer folder
    query_dir = os.path.join(project_root, 'test')
    answer_dir = os.path.join(project_root, 'output')

    # In database folder
    db_dir = os.path.join(project_root, 'db')
    img_train_raw_dir = os.path.join(project_root, 'db-raw')
    img_inventory_raw_dir = os.path.join(project_root, 'db-raw')
    img_train_dir = os.path.join(db_dir)
    img_inventory_dir = os.path.join(db_dir)

    # Run output
    models_dir = os.path.join('models')

    # Set info file
    info = {
        # Run settings
        "img_shape": img_shape,
        "flatten_before_encode": flatten_before_encode,
        "flatten_after_encode": flatten_after_encode,

        # Directories
        "query_dir": query_dir,
        "answer_dir": answer_dir,

        "img_train_raw_dir": img_train_raw_dir,
        "img_inventory_raw_dir": img_inventory_raw_dir,
        "img_train_dir": img_train_dir,
        "img_inventory_dir": img_inventory_dir,

        # Run output
        "models_dir": models_dir
    }

    # Initialize image utilities (and register encoder)
    IU = ImageUtils()
    IU.configure(info)

    # Initialize plot utilities
    PU = PlotUtils()

    # ========================================
    #
    # Pre-process save/load training and inventory images
    #
    # ========================================

    # Process and save
    if process_and_save_images:

        # Training images
        IU.raw2resized_load_save(raw_dir=img_train_raw_dir,
                                 processed_dir=img_train_dir,
                                 img_shape=img_shape)
        # Inventory images
        IU.raw2resized_load_save(raw_dir=img_inventory_raw_dir,
                                 processed_dir=img_inventory_dir,
                                 img_shape=img_shape)


    # ========================================
    #
    # Train autoencoder
    #
    # ========================================

    # Set up autoencoder base class
    MODEL = AE()

    MODEL.configure(model_name=model_name)

    if train_autoencoder:

        print("Training the autoencoder...")

        # Generate naming conventions
        dictfn = MODEL.generate_naming_conventions(model_name, models_dir)
        MODEL.start_report(dictfn)  # start report

        # Load training images to memory (resizes when necessary)
        x_data_all, all_filenames = \
            IU.raw2resizednorm_load(raw_dir=img_train_dir, img_shape=img_shape)
        print("\nAll data:")
        print(" x_data_all.shape = {0}\n".format(x_data_all.shape))

        # Split images to training and validation set
        x_data_train, x_data_test, index_train, index_test = \
            IU.split_train_test(x_data_all, ratio_train_test, seed)
        print("\nSplit data:")
        print("x_data_train.shape = {0}".format(x_data_train.shape))
        print("x_data_test.shape = {0}\n".format(x_data_test.shape))

        # Flatten data if necessary
        if flatten_before_encode:
            x_data_train = IU.flatten_img_data(x_data_train)
            x_data_test = IU.flatten_img_data(x_data_test)
            print("\nFlattened data:")
            print("x_data_train.shape = {0}".format(x_data_train.shape))
            print("x_data_test.shape = {0}\n".format(x_data_test.shape))

        # Set up architecture and compile model
        MODEL.set_arch(input_shape=x_data_train.shape[1:],
                       output_shape=x_data_train.shape[1:])
        MODEL.compile(loss=loss, optimizer=optimizer)
        MODEL.append_arch_report(dictfn)  # append to report

        # Train model
        MODEL.append_message_report(dictfn, "Start training")  # append to report
        MODEL.train(x_data_train, x_data_test,
                    n_epochs=n_epochs, batch_size=batch_size)
        MODEL.append_message_report(dictfn, "End training")  # append to report

        # Save model to file
        MODEL.save_model(dictfn)

        # Save reconstructions to file
        MODEL.plot_save_reconstruction(x_data_test, img_shape, dictfn, n_plot=10)

    else:

        # Generate naming conventions
        dictfn = MODEL.generate_naming_conventions(model_name, models_dir)

        # Load models
        MODEL.load_model(dictfn)

        # Compile model
        MODEL.compile(loss=loss, optimizer=optimizer)

        # Save reconstructions to file
        if save_reconstruction_on_load_model:
            x_data_all, all_filenames = \
                IU.raw2resizednorm_load(raw_dir=img_train_dir, img_shape=img_shape)
            if flatten_before_encode:
                x_data_all = IU.flatten_img_data(x_data_all)
            MODEL.plot_save_reconstruction(x_data_all, img_shape, dictfn, n_plot=10)

    # ========================================
    #
    # Perform clustering recommendation
    #
    # ========================================

    # Load inventory images to memory (resizes when necessary)
    x_data_inventory, inventory_filenames = \
        IU.raw2resizednorm_load(raw_dir=img_inventory_dir, img_shape=img_shape)
    print("\nx_data_inventory.shape = {0}\n".format(x_data_inventory.shape))

    # Explictly assign loaded encoder
    encoder = MODEL.encoder

    # Encode our data, then flatten to encoding dimensions
    # We switch names for simplicity: inventory -> train, query -> test
    print("Encoding data and flatten its encoding dimensions...")
    if flatten_before_encode:  # Flatten the data before encoder prediction
        x_data_inventory = IU.flatten_img_data(x_data_inventory)

    x_train_kNN = encoder.predict(x_data_inventory)

    if flatten_after_encode:  # Flatten the data after encoder prediction
        x_train_kNN = IU.flatten_img_data(x_train_kNN)
 
    print("\nx_train_kNN.shape = {0}\n".format(x_train_kNN.shape))
  
    x_mean = np.mean(x_train_kNN, axis=0)
    x_stds = np.std(x_train_kNN, axis=0)
    x_cov = np.cov((x_train_kNN - x_mean).T)
    e, v = np.linalg.eig(x_cov)
    
    e_list = e.tolist()
    e_list.sort(reverse=True)
    plt.clf()
    plt.figure(figsize=(1, 1))
    plt.bar(np.arange(e.shape[0]), e_list, align='center')
    plt.savefig(os.path.join(answer_dir, "1.png"), bbox_inches='tight')

    decoder = MODEL.decoder
    myev = np.zeros((16))
    myev[0] = 0
    print(myev)
    x = x_mean + np.dot(v, (myev).T).T
    print(x)
    print(x.shape)
    x = np.reshape(x, (2, 2, 4))
    print(x.shape)

    img = decoder.predict(np.expand_dims(x, axis=0))
    PU.save_image(np.squeeze(img, axis=0), os.path.join(answer_dir, "2.png"))
    return

    # =================================
    # Train kNN model
    # =================================
    print("Performing kNN to locate nearby items to user centroid points...")
    EMB = KNearestNeighbours()  # initialize embedding kNN class
    EMB.compile(n_neighbors=n_neighbors, algorithm=algorithm, metric=metric)  # compile kNN model
    EMB.fit(x_train_kNN)  # fit kNN


    # =================================
    # Perform kNN on query images
    # =================================

    # Read items in query folder
    print("Reading query images from query folder: {0}".format(query_dir))

    # Load query images to memory (resizes when necessary)
    x_data_query, query_filenames = \
        IU.raw2resizednorm_load(raw_dir=query_dir,
                                img_shape=img_shape)
    n_query = len(x_data_query)
    print("\nx_data_query.shape = {0}\n".format(x_data_query.shape))

    # Encode query images
    if flatten_before_encode:  # Flatten the data before encoder prediction
        x_data_query = IU.flatten_img_data(x_data_query)

    # Perform kNN on each query image
    for ind_query in range(n_query):

        # Encode query image (and flatten if needed)
        newshape = (1,) + x_data_query[ind_query].shape
        x_query_i_use = x_data_query[ind_query].reshape(newshape)
        x_test_kNN = encoder.predict(x_query_i_use)
        query_filename = query_filenames[ind_query]

        name, tag = IU.extract_name_tag(query_filename)  # extract name and tag
        print("({0}/{1}) Performing kNN on query '{2}'...".format(ind_query+1, n_query, name))

        if flatten_after_encode:  # Flatten the data after encoder prediction
            x_test_kNN = IU.flatten_img_data(x_test_kNN)

        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Compute distances and indices for recommendation
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        if recommendation_method == 1:  # kNN centroid transactions

            # Compute centroid point of the query encoding vectors (equal weights)
            x_test_kNN_centroid = np.mean(x_test_kNN, axis = 0)
            # Find nearest neighbours to centroid point
            distances, indices = EMB.predict(np.array([x_test_kNN_centroid]))

        elif recommendation_method == 2:  # kNN all transactions

            # Find k nearest neighbours to all transactions, then flatten the distances and indices
            distances, indices = EMB.predict(x_test_kNN)
            distances = distances.flatten()
            indices = indices.flatten()
            # Pick k unique training indices which have the shortest distances any transaction point
            indices, distances = find_topk_unique(indices, distances, n_neighbors)

        else:
            raise Exception("Invalid method for making recommendations")


        print("  x_test_kNN.shape = {0}".format(x_test_kNN.shape))
        print("  distances = {0}".format(distances))
        print("  indices = {0}\n".format(indices))

        # =============================================
        #
        # Output results
        #
        # =============================================
        if output_mode == 1:

            result_filename = os.path.join(answer_dir, "result_" + name + ".png")

            x_query_plot = x_data_query[ind_query].reshape((-1, img_shape[0], img_shape[1], 3))
            x_answer_plot = x_data_inventory[indices].reshape((-1, img_shape[0], img_shape[1], 3))
            PU.plot_query_answer(x_query=x_query_plot,
                                 x_answer=x_answer_plot,
                                 filename=result_filename)

        elif output_mode == 2:

            # Clone answer file to answer folder
            # Make k-recommendations and clone most similar inventory images to answer dir
            print("Cloning k-recommended inventory images to answer folder '{0}'...".format(answer_dir))
            for i, (index, distance) in enumerate(zip(indices, distances)):
                print("\n({0}): index = {1}".format(i, index))
                print("({0}): distance = {1}\n".format(i, distance))

                for k_rec, ind in enumerate(index):

                    # Extract inventory filename
                    inventory_filename = inventory_filenames[ind]

                    # Extract answer filename
                    name, tag = IU.extract_name_tag(inventory_filename)
                    answer_filename = os.path.join(answer_dir, name + '.' + tag)

                    print("Cloning '{0}' to answer directory...".format(inventory_filename))
                    shutil.copy(inventory_filename, answer_filename)

        else:
            raise Exception("Invalid output mode given!")

# Driver
if __name__ == "__main__":
    main()