forked from dylanswiggett/director-to-video
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathffmpeg_create_video.py
executable file
·226 lines (205 loc) · 9.75 KB
/
ffmpeg_create_video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
#!/usr/bin/python
import subprocess
import numpy
import cv2
import copy
import re
import star_trek_parse
import pick_voice as pv
import google_images as gi
import ffmpeg_add_audio as ffaa
import voice
from script import Dialog, StageDirection, ENTER, EXIT, BACKGROUND
ASPECT_RATIO = 16.0 / 9.0
VERTICAL_RESOLUTION = 720
HORIZONTAL_RESOLUTION = int(VERTICAL_RESOLUTION * ASPECT_RATIO)
FRAME_RATE = 24
VIDEO_FILENAME = 'tmp/out'
ffmpeg_create_video_command = ['ffmpeg',
'-y', # overwrite file if it exists
'-f', 'rawvideo',
'-vcodec', 'rawvideo',
'-s', '%dx%d' % (HORIZONTAL_RESOLUTION, VERTICAL_RESOLUTION),
'-pix_fmt', 'rgb24',
'-r', '%d' % FRAME_RATE,
'-i', '-',
'-an',
'-b:v', '4M',
'-vcodec', 'mpeg4',
'%s.mp4' % VIDEO_FILENAME
]
def load_image(filename):
return cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2RGB)
def draw_image(src, dst, x, y, width=0, height=0):
if width == 0:
height, width = src.shape[0:2]
dst[y:(y+height), x:(x+width)] = src
else:
dst[y:(y+height), x:(x+width)] = cv2.resize(src, (width, height))
def fit_character(char, width, height):
char_height, char_width = char.shape[0:2]
char_ratio = float(char_width) / float(char_height)
fit_ratio = float(width) / float(height)
fit_image = None
if char_ratio > fit_ratio:
fit_image = cv2.resize(char, (width, int(width / char_ratio)))
elif char_ratio < fit_ratio:
fit_image = cv2.resize(char, (int(height * char_ratio), height))
else:
fit_image = cv2.resize(char, (width, height))
return fit_image
def draw_character(char, scene, x, y, width, height):
fit_image = fit_character(char, width, height)
fit_height, fit_width = fit_image.shape[0:2]
y_offset = y + (height - fit_height)
x_offset = x + (width - fit_width) / 2
scene[y_offset:(y_offset+fit_height), x_offset:(x_offset+fit_width)] = fit_image
def draw_mouth(mouth, character, x, y, width, height):
fit_image = fit_character(mouth[0], width, height)
fit_mask = fit_character(mouth[1], width, height)
fit_mask2 = fit_character(mouth[2], width, height)
fit_height, fit_width = fit_image.shape[0:2]
y_offset = y + fit_height / 6
y_offset = max(0, min(y_offset, character.shape[0] - fit_height))
x_offset = x + (width - fit_width) / 2
x_offset = max(0, min(x_offset, character.shape[1] - fit_width))
y0, y1 = y_offset, (y_offset+fit_height)
x0, x1 = x_offset, (x_offset+fit_width)
fit_mask = numpy.float32(fit_mask) / 255.0
fit_mask2 = numpy.float32(fit_mask2) / 255.0
char_region = numpy.float32(character[y0:y1,x0:x1])
inverse_fit_mask = fit_mask * -1 + 1.0
mul = cv2.multiply(char_region, fit_mask)
m1 = cv2.mean(mul)
m2 = cv2.mean(fit_mask)
avg = numpy.float32(map(lambda x, y: x/(y * 255.0) if y else 0.0, m1, m2))
r = numpy.ones((fit_width,fit_height),numpy.float32) * avg[0]
g = numpy.ones((fit_width,fit_height),numpy.float32) * avg[1]
b = numpy.ones((fit_width,fit_height),numpy.float32) * avg[2]
rgb = cv2.merge((r,g,b))
rgb += (rgb * -1.0 + 0.8) * fit_mask2
fit_image = cv2.multiply(numpy.float32(fit_image), rgb)
fit_image = cv2.multiply(fit_image, inverse_fit_mask)
character[y0:y1,x0:x1] = numpy.uint8(mul + fit_image)
def fit_dimensions(img, fit_width, fit_height):
image_height, image_width = img.shape[0:2]
image_ratio = float(image_width) / float(image_height)
fit_ratio = float(fit_width) / float(fit_height)
fit_image = None
if image_ratio > fit_ratio:
fit_image = cv2.resize(img, (int(fit_height * image_ratio), fit_height))
elif image_ratio < fit_ratio:
fit_image = cv2.resize(img, (fit_width, int(fit_width / image_ratio)))
else:
fit_image = cv2.resize(img, (fit_width, fit_height))
height, width = fit_image.shape[0:2]
y_offset = (height - fit_height) / 2
x_offset = (width - fit_width) / 2
return fit_image[y_offset:y_offset+fit_height, x_offset:x_offset+fit_width]
def as_background_image(image):
return fit_dimensions(image, HORIZONTAL_RESOLUTION, VERTICAL_RESOLUTION)
def draw_scene(background, characters_fg, characters_bg, speaking, mouth, first_line):
background = copy.copy(background)
speaking_img = copy.copy(speaking.image)
scale = 2
if not speaking.loc:
print "Error, could not find mouth location"
x, y, w, h = 0, 0, 100, 100
else:
x, y, w, h = speaking.loc['mouth']
draw_mouth(mouth, speaking_img, x-w/scale, y-w/scale, w*scale, h*scale)
# place characters in background
dx_bg = HORIZONTAL_RESOLUTION / (len(characters_bg) * 2 - 1)
for i in range(len(characters_bg)):
character = characters_bg[i]
c_img = speaking_img if character == speaking else character.image
background_space = int(0.4 * VERTICAL_RESOLUTION)
draw_character(c_img, background, dx_bg * (i * 2), 0, dx_bg, background_space)
# place characters in foreground
dx_fg = HORIZONTAL_RESOLUTION / max(1, len(characters_fg))
for i in range(len(characters_fg)):
character = characters_fg[i]
c_img = speaking_img if character == speaking else character.image
background_space = int(0.4 * VERTICAL_RESOLUTION)
draw_character(c_img, background, dx_fg * i, background_space, dx_fg, VERTICAL_RESOLUTION - background_space)
return background
def create_video(script):
pipe = subprocess.Popen(ffmpeg_create_video_command, stdin=subprocess.PIPE)
totalframes = 0
audioManager = ffaa.OutputAudio()
i = 0
for character in script.characters:
script.characters[character].voice = pv.pick_voice(script, character)
i += 1
for scene in script.scenes[:4]:
characters_on_stage = list(scene.characters)
characters_on_stage = sorted(characters_on_stage, key=lambda character: character.name)
characters_in_background = list()
setting_image = as_background_image(scene.setting.image)
first_line = True
for line in scene.directions:
if isinstance(line, StageDirection):
stage_direction = line
for action, character in stage_direction.actions:
if action == EXIT:
if character in characters_on_stage:
characters_on_stage.remove(character)
if character in characters_in_background:
characters_in_background.remove(character)
elif action == ENTER:
if not character in characters_on_stage:
if character in characters_in_background:
characters_in_background.remove(character)
characters_on_stage.append(character)
elif action == BACKGROUND:
if not character in characters_in_background and not character in characters_on_stage:
characters_in_background.append(character)
continue
elif not isinstance(line, Dialog):
raise Exception('Line is not dialog or stage direction')
text_full, character = line.text, line.character
for text in re.split(r"[.,!:;?]+", text_full):
if len(text) > 0:
# Begin hax to make voices line up
off = float(totalframes) / 24.0 - audioManager.curlen()
off -= .1
if off < 0:
off = 0
print "THIS SHOULD ACTUALLY NEVER HAPPEN."
voice.generate_line(character.voice, text)
starttime = audioManager.curlen() + off
audioManager.addAudio('tmp/tmp.wav', off)
length = audioManager.curlen() - starttime
mouths = voice.generate_line(character.voice, text, length=length)
# End hax
for mouth in mouths:
frame = draw_scene(setting_image, characters_on_stage, characters_in_background, character, mouth, first_line)
# supertitles
supertitle_left = text
supertitle_y = 0
while len(supertitle_left) > 0:
supertitle = supertitle_left
while True:
text_size = cv2.getTextSize(supertitle, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 2)
if text_size[0][0] > HORIZONTAL_RESOLUTION:
supertitle = supertitle.rsplit(' ', 1)[0]
else:
break
supertitle_y += 50
text_point = ((HORIZONTAL_RESOLUTION - text_size[0][0])/2, supertitle_y)
cv2.putText(frame, supertitle, text_point, cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 0), 2)
supertitle_left = supertitle_left[len(supertitle):]
# write out frame
pipe.stdin.write(frame.tostring())
totalframes += 1
while (float(totalframes) / 24.0 - audioManager.curlen()) < .1:
frame = draw_scene(setting_image, characters_on_stage, characters_in_background, character, mouths[-1], first_line)
pipe.stdin.write(frame.tostring())
totalframes += 1
first_line = False
pipe.stdin.close()
pipe.wait()
audioManager.combineWith('tmp/out.mp4', 'movie.mkv')
if __name__=="__main__":
script = star_trek_parse.parse('the-defector.txt')
create_video(script)