-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathamaz_imagenetInspect.py
98 lines (80 loc) · 3.36 KB
/
amaz_imagenetInspect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import pickle
import glob
import os
import numpy as np
current = dir_path = os.path.dirname(os.path.realpath('__file__')) + "/"
import amaz_util as amaz_Util
import amaz_augumentation
from os import system
from PIL import Image
from bs4 import BeautifulSoup as Soup
import cv2
import os.path
from multiprocessing import Pool
class ImageNetInspector(object):
def __init__(self):
self.trainImageBasePath = "/home/codenext2/Downloads/ILSVRC/"
self.annotationsPath = self.trainImageBasePath + "Annotations/CLS-LOC/"
self.dataPath = self.trainImageBasePath + "Data/CLS-LOC/"
self.imgSetsPath = self.trainImageBasePath + "ImageSets/CLS-LOC/"
self.final_dataset_file = "imagenet.pkl"
self.utility = amaz_Util.Utility()
self.category_num = 0
self.meta = []
def arrangement(self):
#load data for Train
"""
* load data
"""
#get all categories meta
alllist = os.listdir(self.dataPath + "train/")
metalist = alllist
self.meta = metalist
category_num = len(metalist)
print("category_num: ",category_num)
self.category_num = category_num
#get annotation info
trainImageSetPath = self.imgSetsPath + "train_loc.txt"
valImageSetPath = self.imgSetsPath + "val.txt"
trainImgs = open(trainImageSetPath,"r")
trainImgs = trainImgs.readlines()
trainImgs = [info.split()[0] for info in trainImgs]
valImgs = open(valImageSetPath,"r")
valImgs = valImgs.readlines()
valImgs = [info.split()[0] for info in valImgs]
print("trainLength:",len(trainImgs))
print("valLength:",len(valImgs))
def imgExistsTest(self):
#get annotation info
trainImageSetPath = self.imgSetsPath + "train_cls.txt"
valImageSetPath = self.imgSetsPath + "val.txt"
trainImgs = open(trainImageSetPath,"r")
trainImgs = trainImgs.readlines()
trainImgs = [info.split()[0] for info in trainImgs]
print("train_cls Record Nubmer: ",len(trainImgs))
valImgs = open(valImageSetPath,"r")
valImgs = valImgs.readlines()
valImgs = [info.split()[0] for info in valImgs]
print("val Record Nubmer: ",len(valImgs))
# image check
trainImgPaths = [self.dataPath + "train/" + imgkey + ".JPEG" for imgkey in trainImgs]
valImgPaths = [self.dataPath + "val/" + imgkey + ".JPEG" for imgkey in valImgs]
trainCheck = [os.path.exists(path) for path in trainImgPaths]
trainCheck = np.array(trainCheck)
print(trainCheck)
number_of_nonexist_train = len(np.where(trainCheck == False))
print(np.where(trainCheck == False))
print("number_of_nonexist_train:",number_of_nonexist_train)
print(" --- non exist path --- ")
nonpath = [trainImgPaths[ind] for ind in np.where(trainCheck == False)[0]]
print(nonpath)
valCheck = [os.path.exists(path) for path in valImgPaths]
valCheck = np.array(valCheck)
number_of_nonexist_val = len(np.where(valCheck == False))
print("number_of_nonexist_val:",number_of_nonexist_val)
print(" --- non exist path --- ")
nonpath = [valImgPaths[ind] for ind in np.where(valCheck == False)[0]]
print(nonpath)
# if __name__ == "__main__":
# imageexists test on train
# imageexists test on test