-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
40 lines (28 loc) · 1.53 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
import time
# os.environ['OMP_NUM_THREADS'] = '16'
# customize the input and output paths
input_files_path = '/uufs/chpc.utah.edu/common/HIPAA/IRB_00083132/studydata/'
output_files_path = '/uufs/chpc.utah.edu/common/HIPAA/IRB_00083132/new_diarized_transcription/'
# run the pipeline
# processed_files = [os.path.splitext(f)[0] for f in os.listdir(os.path.join(output_files_path, 'txt'))]
with open('timelog.txt', 'r') as f:
tmp = f.readlines()
processed_files = [f.split(':')[0] for f in tmp]
audio_files = [f for f in os.listdir(input_files_path) if f not in processed_files]
# audio_files = ['16035']
for audio_file in audio_files:
input_path = os.path.join(input_files_path, f"{os.path.splitext(audio_file)[0]}")
output_txt_path = os.path.join(output_files_path, 'txt', f"{os.path.splitext(audio_file)[0]}")
output_srt_path = os.path.join(output_files_path, 'srt', f"{os.path.splitext(audio_file)[0]}")
if not os.path.exists(os.path.join(output_files_path, 'txt')):
os.makedirs(os.path.join(output_files_path, 'txt'), exist_ok=True)
if not os.path.exists(os.path.join(output_files_path, 'srt')):
os.makedirs(os.path.join(output_files_path, 'srt'), exist_ok=True)
# apply pretrained pipeline and track runtime
start = time.time()
os.system(f'python3 nemo+fasterwhisper.py --input_path {input_path} --output_paths {output_txt_path} {output_srt_path}')
with open('timelog.txt', 'a') as f:
f.write(f'{audio_file}: {round(time.time() - start, 6)}\n')
if __name__ == '__main__':
pass