forked from SpeechColab/Leaderboard
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupload_wav.py
46 lines (35 loc) · 1.47 KB
/
upload_wav.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/env python3
# doc: https://docs.azure.cn/zh-cn/storage/blobs/storage-quickstart-blobs-python?tabs=managed-identity%2Croles-azure-portal%2Csign-in-azure-cli
import codecs
import os
import sys
import time
from azure.storage.blob import BlobServiceClient
MAX_RETRY = 10
with open('CONNECTION_STRING', 'r') as f:
CONNECTION_STRING = f.readline().strip()
if __name__ == "__main__":
if len(sys.argv) != 3:
sys.stderr.write("upload_wav.py <wav_in_scp> <container_name>\n")
exit(-1)
wav_dict = {}
wav_in_scp = codecs.open(sys.argv[1], 'r', 'utf8')
container_name = sys.argv[2]
# Get meta
for meta in wav_in_scp:
wav_meta = meta.split()
wav_id = wav_meta[0]
wav_path = wav_meta[1]
wav_dict[wav_id] = wav_path
# Create the BlobServiceClient object
print("Create container {container_name}".format(container_name=container_name))
blob_service_client = BlobServiceClient.from_connection_string(CONNECTION_STRING)
# Create the container
container_client = blob_service_client.create_container(container_name)
time.sleep(5)
# Upload wav
for wav_id, wav_path in wav_dict.items():
blob_client = blob_service_client.get_blob_client(container=container_name+"/wav", blob=os.path.basename(wav_path))
with open(file=wav_path, mode="rb") as data:
blob_client.upload_blob(data)
print("Uploaded: {wav_id}\t{wav_path}".format(wav_id=wav_id, wav_path=wav_path) )