-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels.py
115 lines (103 loc) · 3.95 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import torch
from torch import nn, no_grad
from transformers import CLIPTokenizer, CLIPTextModel
class Encoder:
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
model = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32")
model.eval()
@classmethod
def encode_prompt(cls, prompts, device="cpu"):
tokens = cls.tokenizer(prompts, padding=True, return_tensors="pt", truncation=True)
cls.model.to(device)
tokens.to(device)
with no_grad():
encoded = cls.model(**tokens)
return encoded.pooler_output
class Generator(nn.Module):
def __init__(self):
super().__init__()
self.main = nn.Sequential(
# 16x16
nn.ConvTranspose2d(64 * 16 + 20, 64 * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(64 * 8),
nn.LeakyReLU(0.2, True),
# 32x32
nn.ConvTranspose2d(64 * 8, 64 * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(64 * 4),
nn.LeakyReLU(0.2, True),
# 64x64
nn.ConvTranspose2d(64 * 4, 64 * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(64 * 2),
nn.LeakyReLU(0.2, True),
# 128x128
nn.ConvTranspose2d(64 * 2, 64, 4, 2, 1, bias=False),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.2, True),
nn.Conv2d(64, 3, 4, 1, bias=False, padding="same"),
nn.Tanh()
)
self.feature_maps = nn.Sequential(
nn.ConvTranspose2d(100, 64 * 16, 8, 1, 0, bias=False),
nn.BatchNorm2d(64 * 16),
nn.LeakyReLU(0.2, True),
)
self.prompt_features = nn.Sequential(
nn.ConvTranspose2d(512, 20, 8, 1, 0, bias=False),
nn.BatchNorm2d(20),
nn.LeakyReLU(0.2, True),
)
def forward(self, latent, prompt):
latent_features = self.feature_maps(latent)
prompt_features = self.prompt_features(prompt)
combined = torch.cat((latent_features, prompt_features), dim=1)
return self.main(combined)
class Discriminator(nn.Module):
def __init__(self):
super().__init__()
self.image_features = nn.Sequential(
#64x64
nn.Conv2d(3, 64 * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(64 * 2),
nn.LeakyReLU(0.2, inplace=True),
# 32x32
nn.Conv2d(64 * 2, 64 * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(64 * 4),
nn.LeakyReLU(0.2, inplace=True),
# 16x16
nn.Conv2d(64 * 4, 64 * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(64 * 8),
nn.LeakyReLU(0.2, inplace=True),
# 8x8
nn.Conv2d(64 * 8, 64 * 16, 4, 2, 1, bias=False),
nn.BatchNorm2d(64 * 16),
nn.LeakyReLU(0.2, inplace=True),
)
self.prompt_features = nn.Sequential(
nn.ConvTranspose2d(512, 20, 8, 1, 0, bias=False),
nn.BatchNorm2d(20),
nn.LeakyReLU(0.2, True),
)
self.disc_out = nn.Sequential(
nn.Conv2d(64 * 16 + 20, 1, 8, 1, 0, bias=False),
nn.Sigmoid()
)
self.reduce_maps_classifier = nn.Sequential(
nn.Conv2d(64 * 16 + 20, 128, 4, 2, 1, bias=False),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace=True),
)
self.classifier = nn.Sequential(
nn.Linear(128 * 4 * 4, 512),
nn.ReLU(inplace=True),
nn.Linear(512, 102),
nn.Softmax(dim=1)
)
def forward(self, images, prompt):
image_features = self.image_features(images)
prompt_features = self.prompt_features(prompt)
combined = torch.cat([image_features, prompt_features], dim=1)
reduced = self.reduce_maps_classifier(combined)
flat = torch.flatten(reduced, 1)
classified = self.classifier(flat)
discriminated = self.disc_out(combined)
return discriminated, classified