Skip to content

Commit

Permalink
[fix] Fix code style check
Browse files Browse the repository at this point in the history
  • Loading branch information
lsrami committed May 22, 2024
1 parent 9197232 commit 2cdde9a
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
2 changes: 1 addition & 1 deletion examples/aishell-3/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
$data/lexicon.txt \
$dataset_dir/data_aishell3 \
$data/all.txt

# Compute spec length (optional, but recommended)
python tools/compute_spec_length.py \
$data/all.txt \
Expand Down
12 changes: 10 additions & 2 deletions tools/compute_spec_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,21 @@ def load_filepaths_and_text(filename, split="|"):
filepaths_and_text = [line.strip().split(split) for line in f]
return filepaths_and_text


def process_item(item):
audiopath = item[0]
src_sampling_rate = sf.info(audiopath).samplerate
text = item[2]
text = text.strip().split()
if min_text_len <= len(text) and len(text) <= max_text_len:
length = int(os.path.getsize(audiopath) * sampling_rate / src_sampling_rate) // (2 * hop_length)
length = int(os.path.getsize(audiopath) * sampling_rate /
src_sampling_rate) // (2 * hop_length)
item.append(length)
return item
else:
return None


def main(in_file, out_file):
"""
Filter text & store spec lengths
Expand All @@ -34,7 +37,12 @@ def main(in_file, out_file):
audiopaths_sid_text = load_filepaths_and_text(in_file, split="|")

with ThreadPoolExecutor(max_workers=32) as executor:
results = list(tqdm(executor.map(process_item, audiopaths_sid_text), total=len(audiopaths_sid_text)))
results = list(
tqdm(
executor.map(process_item, audiopaths_sid_text),
total=len(audiopaths_sid_text),
)
)

# Filter out None results
results = [result for result in results if result is not None]
Expand Down

0 comments on commit 2cdde9a

Please sign in to comment.