forked from dylanswiggett/director-to-video
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvoice.py
executable file
·94 lines (76 loc) · 3.27 KB
/
voice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/python
import subprocess
import phonemes as ph
import ffmpeg_create_video as v
import ffmpeg_add_audio as a
import pipes
# Voices:
# 0: British Male
# 1:
#
#
voices = [
("en1", "50", "2"),
("us1", "50", "1"),
("us2", "50", "1"),
("us3", "50", "2")
]
mouth_images = dict()
def generate_mouths(voice_num, phones, fps=24, length=None):
framelen = 1000.0 / fps # ms
totaloffset = 0
curframeend = 0
mouths = []
scale = 1.0
if length:
actuallen = float(sum(phone[1] for phone in phones)) / 1000.0 # to seconds
scale = float(length) / actuallen
for phone in phones:
p, dur = phone
dur *= scale
if p in ph.phonemes:
face = ph.phonemes[p]
else:
face = ph.REST
#print "Unknown phoneme", p
while totaloffset + dur >= curframeend:
mouths.append(face)
curframeend += framelen
totaloffset += dur
return mouths
# Given a voice number and a line of dialog, returns an array of tuples phoneme, length (ms)
def generate_line(voice_num, line, length=None):
voice, pitch, volume = voices[voice_num]
line = line.replace("'", "")
phonemes = subprocess.check_output(["./voice.sh", voice, pitch, volume, pipes.quote(line)])
lines = phonemes.split("\n")
phones = []
for i in range(len(lines)):
phon = lines[i].split("\t")
if len(phon) < 2:
continue
p, time = phon[0], phon[1]
phones.append((p, int(time)))
mouths = generate_mouths(voice_num, phones, length=length)
mouth_img_list = []
for mouth in mouths:
if not mouth in mouth_images:
path = "mouths/" + mouth
mask_path = "mouths/mask_" + mouth
mask2_path = "mouths/mask2_" + mouth
mouth_images[mouth] = (v.load_image(path), v.load_image(mask_path), v.load_image(mask2_path))
mouth_img_list.append(mouth_images[mouth])
if len(mouth_img_list) == 0:
mouth_img_list.append(mouth_images["Rest.jpg"])
return mouth_img_list
if __name__ == "__main__":
# text = "Data says I am incapable of any feeling. I do not think that that is a correct statement, but the matter will require further analysis. Now I am simply talking for a long time, because I do not care if I experience emotion. I am rather more interested in whether or not my lips sync properly with my audio, since I do try to act human if possible."
text = "Space, the final frontier. These are the voyages of the starship enterprise. Its ongoing mission: to explore strange new worlds. To seek out new life, and new civilizations. To boldly go where no one has gone before. Dooooo dooooo.... doooo, dooo dooo dooo doooooooooooooooooooo... dooooo, dooooo.... dooooo, dooo doooo doooo dooooooooooo..... dooooo doooooooo, dooooo dooooooo, dooo dooo dooo dooo doooooooo..... dooooooo..... dooo doooo, doooooooooo....... dooo, dooo, dooo, dooooooooooooooooooooooooooooooo"
au = a.OutputAudio()
mouth_images = generate_line(0, text)
au.addAudio("tmp/tmp.wav", 0)
pipe = subprocess.Popen(v.ffmpeg_create_video_command, stdin = subprocess.PIPE)
for m in mouth_images:
pipe.stdin.write(v.as_background_image(m).tostring())
pipe.stdin.close()
au.combineWith("tmp/out.mp4", "final.mkv")