-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload_llff.py
399 lines (312 loc) · 15.4 KB
/
load_llff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
import numpy as np
# import os, imageio
import os
import imageio.v2 as imageio
########## Slightly modified version of LLFF data loading code
########## see https://github.com/Fyusion/LLFF for original
### 重新规范化图片的分辨率
### 将images文件夹里的图片缩小到原来的1/8并且保存到images_8文件夹中
# factors=[8]
def _minify(basedir, factors=[], resolutions=[]):
### 这一段判断文件夹data/nerf_llff_data/fern/images_8是否存在,若已存在则直接返回,不执行之后的代码了
needtoload = False
for r in factors:
# imgdir = data/nerf_llff_data/fern/images_8
imgdir = os.path.join(basedir, 'images_{}'.format(r))
if not os.path.exists(imgdir):
needtoload = True
for r in resolutions:
imgdir = os.path.join(basedir, 'images_{}x{}'.format(r[1], r[0]))
if not os.path.exists(imgdir):
needtoload = True
if not needtoload:
return
from shutil import copy
# check_output可以用于执行shell命令
from subprocess import check_output
### 这一段主要用于得到data/nerf_llff_data/fern/images文件夹里所有图片的地址,存在imgs里
# imgdir = data/nerf_llff_data/fern/images
imgdir = os.path.join(basedir, 'images')
# 得到images文件夹下所有图片的地址
imgs = [os.path.join(imgdir, f) for f in sorted(os.listdir(imgdir))]
imgs = [f for f in imgs if any([f.endswith(ex) for ex in ['JPG', 'jpg', 'png', 'jpeg', 'PNG']])]
# imgdir_orig = data/nerf_llff_data/fern/images
imgdir_orig = imgdir
# 得到当前进程的工作目录,wd=C:\Users\huang\Desktop\nerf-pytorch
wd = os.getcwd()
### 这一段将images文件夹里的图片缩小到原来的1/8并且保存到images_8文件夹中
for r in factors + resolutions:
if isinstance(r, int):
# name = images_8
name = 'images_{}'.format(r)
# resizearg = (100./8)% = 12.5%
resizearg = '{}%'.format(100./r)
else:
name = 'images_{}x{}'.format(r[1], r[0])
resizearg = '{}x{}'.format(r[1], r[0])
# imgdir = data/nerf_llff_data/fern/images_8
imgdir = os.path.join(basedir, name)
# 如果images_8已经存在,则不执行之后的代码
if os.path.exists(imgdir):
continue
print('Minifying', r, basedir)
# 创建文件夹data/nerf_llff_data/fern/images_8
os.makedirs(imgdir)
# check_output用于执行shell命令:cp data/nerf_llff_data/fern/images/* data/nerf_llff_data/fern/images_8
# 该指令将images文件夹里的所有图片复制到images_8文件夹中
check_output('cp {}/* {}'.format(imgdir_orig, imgdir), shell=True)
# ext = 'JPG'
ext = imgs[0].split('.')[-1]
# args = 'mogrify -resize 12.5% -format png *.JPG'
args = ' '.join(['mogrify', '-resize', resizearg, '-format', 'png', '*.{}'.format(ext)])
print(args)
# 改变当前目录到data/nerf_llff_data/fern/images_8
os.chdir(imgdir)
# 执行命令mogrify -resize 12.5% -format png *.JPG
# 该命令将jpg图片缩小到原来的12.5%并转化为png格式
check_output(args, shell=True)
# 改变当前目录到C:\Users\huang\Desktop\nerf-pytorch
os.chdir(wd)
if ext != 'png':
check_output('rm {}/*.{}'.format(imgdir, ext), shell=True)
print('Removed duplicates')
print('Done')
### 加载poses_bounds.npy,得到姿态矩阵poses,深度边界信息bds,所有读取的图片imgs
### 其中poses中高、宽和相机焦距都缩了8倍,图片的RGB值进行了归一化处理
def _load_data(basedir, factor=None, width=None, height=None, load_imgs=True):
# 读取poses_bounds.npy得到poses_arr:(N, 17), N为图片数量
# 其中,前15个为3*5姿态矩阵,后两个为远/近深度边界信息(视角到场景的最近和最远距离)
# 对于姿态矩阵,前3*3为旋转矩阵,中3*1为平移向量,(前中3*4矩阵为c2w矩阵)后3*1为图像的高height、宽度width和相机的焦距Focal
poses_arr = np.load(os.path.join(basedir, 'poses_bounds.npy'))
# poses:(N, 15)->(N, 3, 5)->(3, 5, N)
poses = poses_arr[:, :-2].reshape([-1, 3, 5]).transpose([1,2,0])
# bds:(N, 2)->(2, N)
bds = poses_arr[:, -2:].transpose([1,0])
# 从images文件夹里取出第一张图片的地址,存在img0中
img0 = [os.path.join(basedir, 'images', f) for f in sorted(os.listdir(os.path.join(basedir, 'images'))) \
if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')][0]
# 得到img0的shape
sh = imageio.imread(img0).shape
sfx = ''
# factor是下采样倍数,默认是8
if factor is not None:
# sfx = _8
sfx = '_{}'.format(factor)
# 将images文件夹里的图片缩小到原来的1/8并且保存到images_8文件夹中
_minify(basedir, factors=[factor])
factor = factor
elif height is not None:
factor = sh[0] / float(height)
width = int(sh[1] / factor)
_minify(basedir, resolutions=[[height, width]])
sfx = '_{}x{}'.format(width, height)
elif width is not None:
factor = sh[1] / float(width)
height = int(sh[0] / factor)
_minify(basedir, resolutions=[[height, width]])
sfx = '_{}x{}'.format(width, height)
else:
factor = 1
# imgdir = 'data/nerf_llff_data/fern/images_8'
imgdir = os.path.join(basedir, 'images' + sfx)
if not os.path.exists(imgdir):
print( imgdir, 'does not exist, returning' )
return
# 得到images_8文件夹里每一张图片的地址
imgfiles = [os.path.join(imgdir, f) for f in sorted(os.listdir(imgdir)) if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')]
if poses.shape[-1] != len(imgfiles):
print( 'Mismatch between imgs {} and poses {} !!!!'.format(len(imgfiles), poses.shape[-1]) )
return
# 得到images_8文件夹里第一张图片的shape,用来代表所有图片的shape
sh = imageio.imread(imgfiles[0]).shape
# 将3*5姿态矩阵中代表图像高和宽的数据换成下采样8倍后的高和宽
poses[:2, 4, :] = np.array(sh[:2]).reshape([2, 1])
# 将3*5姿态矩阵中代表相机焦距的数据缩小8倍
# 因为图像下采样了8倍,所以相机参数也需要改变,其中c2w矩阵不变,相机焦距等比例缩放
poses[2, 4, :] = poses[2, 4, :] * 1./factor
if not load_imgs:
return poses, bds
def imread(f):
if f.endswith('png'):
return imageio.imread(f, ignoregamma=True)
else:
return imageio.imread(f)
# 把images_8文件夹里每个图片的RGB值都/255进行归一化
imgs = [imread(f)[...,:3]/255. for f in imgfiles]
# imgs:(h,w,3,N)
imgs = np.stack(imgs, -1)
print('Loaded image data', imgs.shape, poses[:,-1,0])
# poses:(3,5,N),已下采样8倍
# bds:(2,N)
# imgs:(h,w,3,N)
return poses, bds, imgs
def normalize(x):
return x / np.linalg.norm(x)
### view_matrix是一个构造相机矩阵的的函数
### 输入是相机的Z轴朝向、up轴的朝向(即相机平面朝上的方向Y)、以及相机中心,输出3*4的c2w矩阵
def viewmatrix(z, up, pos):
vec2 = normalize(z)
vec1_avg = up
# 用Y轴与Z轴叉乘得到X轴
vec0 = normalize(np.cross(vec1_avg, vec2))
vec1 = normalize(np.cross(vec2, vec0))
# 构造出3*4的c2w矩阵
m = np.stack([vec0, vec1, vec2, pos], 1)
return m
def ptstocam(pts, c2w):
tt = np.matmul(c2w[:3,:3].T, (pts-c2w[:3,3])[...,np.newaxis])[...,0]
return tt
### 输入多个相机的位姿,得到多个相机的平均位姿
def poses_avg(poses):
# 取出图像的h、w和相机的焦距信息,每个相机的焦距都是一样的
hwf = poses[0, :3, -1:]
# poses的第三列就是相机在世界坐标系中的坐标
# 计算所有相机的坐标的平均值,得到所有相机的中心坐标center
center = poses[:, :3, 3].mean(0)
# 对所有相机的Z轴求求和然后归一化得到vec2向量(方向向量相加其实等效于平均方向向量)
vec2 = normalize(poses[:, :3, 2].sum(0))
# 对所有的相机的Y轴求平均得到up向量
up = poses[:, :3, 1].sum(0)
# 利用viewmatrix得到所有相机的平均c2w矩阵,然后和hwf合并,得到3*5的相机姿态矩阵
c2w = np.concatenate([viewmatrix(vec2, up, center), hwf], 1)
return c2w
### 这个函数和模型训练没有关系,主要是用来生成一个相机轨迹用于新视角的合成
### 生成一个螺旋式的相机轨迹
def render_path_spiral(c2w, up, rads, focal, zdelta, zrate, rots, N):
render_poses = []
rads = np.array(list(rads) + [1.])
hwf = c2w[:,4:5]
for theta in np.linspace(0., 2. * np.pi * rots, N+1)[:-1]:
c = np.dot(c2w[:3,:4], np.array([np.cos(theta), -np.sin(theta), -np.sin(theta*zrate), 1.]) * rads)
z = normalize(c - np.dot(c2w[:3,:4], np.array([0,0,-focal, 1.])))
render_poses.append(np.concatenate([viewmatrix(z, up, c), hwf], 1))
return render_poses
### 中心化相机位姿(包括位置和朝向),使每个相机坐标系的XYZ轴朝向和世界坐标系保持一致
def recenter_poses(poses):
poses_ = poses+0
bottom = np.reshape([0,0,0,1.], [1,4])
# 输入多个相机的位姿,得到多个相机的平均位姿
c2w = poses_avg(poses)
# 给c2w底下添加上[0,0,0,1.],得到完整的4*4的c2w矩阵,此时的c2w矩阵是4*4
c2w = np.concatenate([c2w[:3,:4], bottom], -2)
bottom = np.tile(np.reshape(bottom, [1,1,4]), [poses.shape[0],1,1])
# 为poses底下也添加上[0,0,0,1.],此时的poses变为(N,4,4)
poses = np.concatenate([poses[:,:3,:4], bottom], -2)
# 用多个相机的平均位姿c2w的逆左乘poses,目的是使每个相机坐标系的朝向和世界坐标系保持一致
poses = np.linalg.inv(c2w) @ poses
poses_[:,:3,:4] = poses[:,:3,:4]
poses = poses_
return poses
#####################
def spherify_poses(poses, bds):
p34_to_44 = lambda p : np.concatenate([p, np.tile(np.reshape(np.eye(4)[-1,:], [1,1,4]), [p.shape[0], 1,1])], 1)
rays_d = poses[:,:3,2:3]
rays_o = poses[:,:3,3:4]
def min_line_dist(rays_o, rays_d):
A_i = np.eye(3) - rays_d * np.transpose(rays_d, [0,2,1])
b_i = -A_i @ rays_o
pt_mindist = np.squeeze(-np.linalg.inv((np.transpose(A_i, [0,2,1]) @ A_i).mean(0)) @ (b_i).mean(0))
return pt_mindist
pt_mindist = min_line_dist(rays_o, rays_d)
center = pt_mindist
up = (poses[:,:3,3] - center).mean(0)
vec0 = normalize(up)
vec1 = normalize(np.cross([.1,.2,.3], vec0))
vec2 = normalize(np.cross(vec0, vec1))
pos = center
c2w = np.stack([vec1, vec2, vec0, pos], 1)
poses_reset = np.linalg.inv(p34_to_44(c2w[None])) @ p34_to_44(poses[:,:3,:4])
rad = np.sqrt(np.mean(np.sum(np.square(poses_reset[:,:3,3]), -1)))
sc = 1./rad
poses_reset[:,:3,3] *= sc
bds *= sc
rad *= sc
centroid = np.mean(poses_reset[:,:3,3], 0)
zh = centroid[2]
radcircle = np.sqrt(rad**2-zh**2)
new_poses = []
for th in np.linspace(0.,2.*np.pi, 120):
camorigin = np.array([radcircle * np.cos(th), radcircle * np.sin(th), zh])
up = np.array([0,0,-1.])
vec2 = normalize(camorigin)
vec0 = normalize(np.cross(vec2, up))
vec1 = normalize(np.cross(vec2, vec0))
pos = camorigin
p = np.stack([vec0, vec1, vec2, pos], 1)
new_poses.append(p)
new_poses = np.stack(new_poses, 0)
new_poses = np.concatenate([new_poses, np.broadcast_to(poses[0,:3,-1:], new_poses[:,:3,-1:].shape)], -1)
poses_reset = np.concatenate([poses_reset[:,:3,:4], np.broadcast_to(poses[0,:3,-1:], poses_reset[:,:3,-1:].shape)], -1)
return poses_reset, new_poses, bds
def load_llff_data(basedir, factor=8, recenter=True, bd_factor=.75, spherify=False, path_zflat=False):
poses, bds, imgs = _load_data(basedir, factor=factor) # factor=8 downsamples original imgs by 8x
print('Loaded', basedir, bds.min(), bds.max())
# Correct rotation matrix ordering and move variable dim to axis 0
# 将poses的第0列取负数并与第一列交换顺序,目的是把llff下的相机坐标系变换到nerf中的相机坐标系
poses = np.concatenate([poses[:, 1:2, :], -poses[:, 0:1, :], poses[:, 2:, :]], 1)
# poses:(N,3,5)
poses = np.moveaxis(poses, -1, 0).astype(np.float32)
# imgs:(N,h,w,3)
imgs = np.moveaxis(imgs, -1, 0).astype(np.float32)
images = imgs
# bds:(N,2)
bds = np.moveaxis(bds, -1, 0).astype(np.float32)
# Rescale if bd_factor is provided
# sc是进行边界放缩的比例
# 深度边界信息和平移向量一起缩放sc倍,平移向量的含义是相机中心在世界坐标系下的坐标
sc = 1. if bd_factor is None else 1./(bds.min() * bd_factor)
poses[:,:3,3] *= sc
bds *= sc
if recenter:
# 使每个相机坐标系的XYZ轴朝向和世界坐标系保持一致
poses = recenter_poses(poses)
if spherify:
poses, render_poses, bds = spherify_poses(poses, bds)
else:
# 求recenter后的poses的一个平均poses
c2w = poses_avg(poses)
print('recentered', c2w.shape)
print(c2w[:3,:4])
## Get spiral
# Get average pose
up = normalize(poses[:, :3, 1].sum(0))
# Find a reasonable "focus depth" for this dataset
close_depth, inf_depth = bds.min()*.9, bds.max()*5.
dt = .75
mean_dz = 1./(((1.-dt)/close_depth + dt/inf_depth))
focal = mean_dz
# Get radii for spiral path
shrink_factor = .8
zdelta = close_depth * .2
tt = poses[:,:3,3] # ptstocam(poses[:3,3,:].T, c2w).T
rads = np.percentile(np.abs(tt), 90, 0)
c2w_path = c2w
N_views = 120
N_rots = 2
if path_zflat:
# zloc = np.percentile(tt, 10, 0)[2]
zloc = -close_depth * .1
c2w_path[:3,3] = c2w_path[:3,3] + zloc * c2w_path[:3,2]
rads[2] = 0.
N_rots = 1
N_views/=2
# Generate poses for spiral path
render_poses = render_path_spiral(c2w_path, up, rads, focal, zdelta, zrate=.5, rots=N_rots, N=N_views)
# render_poses:(120,3,5),共120个视角,每个视角的shape是(3,5)
render_poses = np.array(render_poses).astype(np.float32)
c2w = poses_avg(poses)
print('Data:')
print(poses.shape, images.shape, bds.shape)
# 计算每个相机中心与平均相机中心的距离
dists = np.sum(np.square(c2w[:3,3] - poses[:,:3,3]), -1)
# 获得测试集的id,dists最小的那个相机
i_test = np.argmin(dists)
print('HOLDOUT view is', i_test)
images = images.astype(np.float32)
poses = poses.astype(np.float32)
return images, poses, bds, render_poses, i_test
# if __name__ == "__main__":
# poses, bds, imgs = _load_data("data/nerf_llff_data/fern", factor=8)
# images, poses, bds, render_poses, i_test = load_llff_data("data/nerf_llff_data/fern", factor=8,
# recenter=True, bd_factor=.75,
# spherify=False)