csv_downloader.py
2.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
import sys
import time
import urllib.request
import pandas as pd
from modules.utils import bcolors as bc
OID_URL = 'https://storage.googleapis.com/openimages/2018_04/'
def TTV(csv_dir, name_file, args_y):
'''
Manage error_csv and read the correct .csv file.
:param csv_dir: folder of the .csv files
:param name_file: name of the correct .csv file
:return: None
'''
CSV = os.path.join(csv_dir, name_file)
error_csv(name_file, csv_dir, args_y)
df_val = pd.read_csv(CSV)
return df_val
def error_csv(file, csv_dir, args_y):
'''
Check the presence of the required .csv files.
:param file: .csv file missing
:param csv_dir: folder of the .csv files
:return: None
'''
if not os.path.isfile(os.path.join(csv_dir, file)):
print(bc.FAIL + "Missing the {} file.".format(os.path.basename(file)) + bc.ENDC)
if args_y:
ans = 'y'
print(bc.OKBLUE + "Automatic download." + bc.ENDC)
else:
ans = input(bc.OKBLUE + "Do you want to download the missing file? [Y/n] " + bc.ENDC)
if ans.lower() == 'y':
folder = str(os.path.basename(file)).split('-')[0]
if folder != 'class':
FILE_URL = str(OID_URL + folder + '/' + file)
else:
FILE_URL = str(OID_URL + file)
FILE_PATH = os.path.join(csv_dir, file)
save(FILE_URL, FILE_PATH)
print('\n' + bc.OKBLUE + "File {} downloaded into {}.".format(file, FILE_PATH) + bc.ENDC)
else:
exit(1)
def save(url, filename):
'''
Download the .csv file.
:param url: Google url for download .csv files
:param filename: .csv file name
:return: None
'''
urllib.request.urlretrieve(url, filename, reporthook)
def reporthook(count, block_size, total_size):
'''
Print the progression bar for the .csv file download.
:param count:
:param block_size:
:param total_size:
:return:
'''
global start_time
if count == 0:
start_time = time.time()
return
duration = time.time() - start_time
progress_size = int(count * block_size)
speed = int(progress_size / ((1024 * duration) + 1e-5))
percent = int(count * block_size * 100 / (total_size + 1e-5))
sys.stdout.write("\r...%d%%, %d MB, %d KB/s, %d seconds passed" %
(percent, progress_size / (1024 * 1024), speed, duration))
sys.stdout.flush()