init

Sleepychord · Sleepychord · commit 7e3f7fb2e29e · 2017-11-23T18:18:15.000+08:00
diff --git a/Datasets.py b/Datasets.py
@@ -0,0 +1,43 @@
+import torch
+from torch.utils.data import TensorDataset
+from torchvision import datasets, transforms
+import numpy as np
+def MnistLabel(class_num):
+    raw_dataset = datasets.MNIST('../data', train=True, download=True,
+                   transform=transforms.Compose([
+                       transforms.ToTensor(),
+                       #transforms.Normalize((0.1307,), (0.3081,))
+                   ]))
+    class_tot = [0] * 10
+    data = []
+    labels = []
+    positive_tot = 0
+    tot = 0
+    perm = np.random.permutation(raw_dataset.__len__())
+    for i in range(raw_dataset.__len__()):
+        datum, label = raw_dataset.__getitem__(perm[i])
+        if class_tot[label] < class_num:
+            data.append(datum.numpy())
+            labels.append(label)
+            class_tot[label] += 1
+            tot += 1
+            if tot >= 10 * class_num:
+                break
+    return TensorDataset(torch.FloatTensor(np.array(data)), torch.LongTensor(np.array(labels)))
+
+def MnistUnlabel():
+    return datasets.MNIST('../data', train=True, download=True,
+                   transform=transforms.Compose([
+                       transforms.ToTensor(),
+                       #transforms.Normalize((0.1307,), (0.3081,))
+                   ]))
+
+def MnistTest():
+    return datasets.MNIST('../data', train=False, download=True,
+                   transform=transforms.Compose([
+                       transforms.ToTensor(),
+                       transforms.Normalize((0.1307,), (0.3081,))
+                   ]))
+
+if __name__ == '__main__':
+    print dir(MnistTest())
diff --git a/ImprovedGAN.py b/ImprovedGAN.py
@@ -0,0 +1,141 @@
+# -*- coding:utf-8 -*-
+from __future__ import print_function 
+import torch
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torch.autograd import Variable
+from functional import log_sum_exp
+from torch.utils.data import DataLoader,TensorDataset
+import sys
+import argparse
+from Nets import Generator, Discriminator
+from Datasets import *
+import pdb
+class ImprovedGAN(object):
+    def __init__(self, G, D, labeled, unlabeled, test, args):
+        self.G = G
+        self.D = D
+	if args.cuda:
+	    self.G.cuda()
+	    self.D.cuda()
+        self.labeled = labeled
+        self.unlabeled = unlabeled
+        self.test = test
+        self.Doptim = optim.SGD(self.D.parameters(), lr=args.lr, momentum = args.momentum)
+        self.Goptim = optim.Adam(self.G.parameters(), lr=args.lr)
+        self.args = args
+    def trainD(self, x_label, y, x_unlabel):
+        x_label, x_unlabel, y = Variable(x_label), Variable(x_unlabel), Variable(y, requires_grad = False)
+        if self.args.cuda:
+            x_label, x_unlabel, y = x_label.cuda(), x_unlabel.cuda(), y.cuda()
+        output_label, output_unlabel, output_fake = self.D(x_label, cuda=self.args.cuda), self.D(x_unlabel, cuda=self.args.cuda), self.D(self.G(x_unlabel.size()[0], cuda = self.args.cuda).view(x_unlabel.size()), cuda=self.args.cuda)
+        logz_label, logz_unlabel, logz_fake = log_sum_exp(output_label), log_sum_exp(output_unlabel), log_sum_exp(output_fake) # log ∑e^x_i
+        prob_label = torch.gather(output_label, 1, y.unsqueeze(1)) # log e^x_label = x_label 
+        loss_supervised = -torch.mean(prob_label) + torch.mean(logz_label)
+        loss_unsupervised = 0.5 * (-torch.mean(logz_unlabel) + torch.mean(F.softplus(logz_unlabel))  + # real_data: log Z/(1+Z)
+                            torch.mean(F.softplus(logz_fake)) ) # fake_data: log 1/(1+Z)
+        loss = loss_supervised + self.args.unlabel_weight * loss_unsupervised
+        acc = torch.mean((output_label.max(1)[1] == y).float())
+        self.Doptim.zero_grad()
+        loss.backward()
+	if loss != loss:
+		pdb.set_trace()
+        self.Doptim.step()
+        return loss_supervised.data.cpu().numpy(), loss_unsupervised.data.cpu().numpy(), acc
+    
+    def trainG(self, x_unlabel):
+        mom_gen = torch.mean(self.D(self.G(x_unlabel.size()[0], cuda = self.args.cuda).view(x_unlabel.size()), feature=True, cuda=self.args.cuda), dim = 0)
+        mom_unlabel = torch.mean(self.D(Variable(x_unlabel), feature=True, cuda=self.args.cuda), dim = 0)
+        loss = torch.mean((mom_gen - mom_unlabel) ** 2)
+        self.Goptim.zero_grad()
+        loss.backward()
+        self.Goptim.step()
+	a = self.G.main[0].weight != self.G.main[0].weight
+	if torch.sum(a.float()) > 0:
+		pdb.set_trace()
+        return loss.data.cpu().numpy()
+
+    def train(self):
+        assert self.unlabeled.__len__() > self.labeled.__len__()
+        assert type(self.labeled) == TensorDataset
+        times = int(np.ceil(self.unlabeled.__len__() * 1. / self.labeled.__len__()))
+	t1 = self.labeled.data_tensor.clone()
+	t2 = self.labeled.target_tensor.clone()
+        #tile_labeled = TensorDataset(self.labeled.data_tensor.repeat(times, 1, 1, 1), self.labeled.target_tensor.repeat(times))
+	tile_labeled = TensorDataset(t1.repeat(times,1,1,1),t2.repeat(times))
+        for epoch in range(self.args.epochs):
+            self.G.train()
+            self.D.train()
+            unlabel_loader1 = DataLoader(self.unlabeled, batch_size = self.args.batch_size, shuffle=True, drop_last=True)
+            unlabel_loader2 = DataLoader(self.unlabeled, batch_size = self.args.batch_size, shuffle=True, drop_last=True).__iter__()
+            label_loader = DataLoader(tile_labeled, batch_size = self.args.batch_size, shuffle=True, drop_last=True).__iter__()
+            batch_num = loss_supervised = loss_unsupervised = loss_gen = accuracy = 0.
+            for (unlabel1, _label1) in unlabel_loader1:
+                batch_num += 1
+                unlabel2, _label2 = unlabel_loader2.next()
+                x, y = label_loader.next()
+		if args.cuda:
+		    x, y, unlabel1, unlabel2 = x.cuda(), y.cuda(), unlabel1.cuda(), unlabel2.cuda()
+                ll, lu, acc = self.trainD(x, y, unlabel1)
+                loss_supervised += ll
+                loss_unsupervised += lu
+                accuracy += acc
+                lg = self.trainG(unlabel2)
+                loss_gen += lg
+                if (batch_num + 1) % self.args.log_interval == 0:
+                    print('Training: %d / %d' % (batch_num + 1, len(unlabel_loader1)))
+	    	    print('Eval: correct %d/%d, %.4f' % (self.eval(), self.test.__len__(), acc))
+            loss_supervised /= batch_num
+            loss_unsupervised /= batch_num
+            loss_gen /= batch_num
+            accuracy /= batch_num
+            print("Iteration %d, loss_supervised = %.4f, loss_unsupervised = %.4f, loss_gen = %.4f train acc = %.4f" % (epoch, loss_supervised, loss_unsupervised, loss_gen, accuracy))
+            sys.stdout.flush()
+            if (epoch + 1) % self.args.eval_interval == 0:
+                print("Eval: correct %d / %d"  % (self.eval(), self.test.__len__()))
+
+    def predict(self, x):
+        return torch.max(self.D(Variable(x, volatile=True), cuda=self.args.cuda), 1)[1].data
+    def eval(self):
+        self.G.eval()
+        self.D.eval()
+        d, l = [], []
+        for (datum, label) in self.test:
+            d.append(datum)
+            l.append(label)
+        x, y = torch.stack(d), torch.LongTensor(l)
+	if self.args.cuda:
+		x, y = x.cuda(), y.cuda()
+        pred = self.predict(x)
+        return torch.sum(pred == y)
+    def draw(self, batch_size):
+	self.G.eval()
+	return self.G(batch_size, cuda=self.args.cuda)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='PyTorch Improved GAN')
+    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
+                        help='input batch size for training (default: 64)')
+    parser.add_argument('--epochs', type=int, default=10, metavar='N',
+                        help='number of epochs to train (default: 10)')
+    parser.add_argument('--lr', type=float, default=0.003, metavar='LR',
+                        help='learning rate (default: 0.003)')
+    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
+                        help='SGD momentum (default: 0.5)')
+    parser.add_argument('--cuda', action='store_true', default=False,
+                        help='CUDA training')
+    parser.add_argument('--seed', type=int, default=1, metavar='S',
+                        help='random seed (default: 1)')
+    parser.add_argument('--log-interval', type=int, default=100, metavar='N',
+                        help='how many batches to wait before logging training status')
+    parser.add_argument('--eval-interval', type=int, default=1, metavar='N',
+                        help='how many batches to wait before evaling training status')
+    parser.add_argument('--unlabel-weight', type=int, default=1, metavar='N',
+                        help='scale factor between labeled and unlabeled data')
+    args = parser.parse_args()
+    args.cuda = args.cuda and torch.cuda.is_available()
+    np.random.seed(args.seed)
+    gan = ImprovedGAN(Generator(100), Discriminator(), MnistLabel(10), MnistUnlabel(), MnistTest(), args)
+    gan.train()
+    
diff --git a/Nets.py b/Nets.py
@@ -0,0 +1,128 @@
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.autograd import Variable
+
+# class Discriminator(nn.Module):
+#     def __init__(self, output_units = 10):
+#         super(Discriminator, self).__init__()
+#         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
+#         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
+#         self.conv2_drop = nn.Dropout2d()
+#         self.fc1 = nn.Linear(320, 100)
+#         self.fc2 = nn.Linear(100, output_units)
+
+#     def forward(self, x, feature = False, cuda = False):
+#         x = F.leaky_relu(F.max_pool2d(self.conv1(x), 2))
+#         x = F.leaky_relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
+#         x = x.view(-1, 320)
+#         x_f = self.fc1(x)
+#         x = F.leaky_relu(x_f)
+#         x = F.dropout(x, training=self.training)
+#         x = self.fc2(x)
+#         return x if not feature else x_f
+
+#class Discriminator(nn.Module):
+#    def __init__(self, input_dim = 28 ** 2, output_dim = 10):
+#        super(Discriminator, self).__init__()
+#        self.input_dim = input_dim
+#        self.layers = torch.nn.ModuleList([
+#            nn.Linear(input_dim, 1000),
+#            nn.Linear(1000, 500),
+#            nn.Linear(500, 250)]
+#        )
+#	self.bns = torch.nn.ModuleList([
+#	    nn.BatchNorm1d(1000, affine=False),
+#	    nn.BatchNorm1d(500, affine=False),
+#	    nn.BatchNorm1d(250, affine=True)]
+#	)
+#        self.final = nn.Linear(250, output_dim)
+#    def forward(self, x, feature = False, cuda = False):
+#        x = x.view(-1, self.input_dim)
+#        noise = torch.randn(x.size()) * 0.3 if self.training else torch.Tensor([0])
+#        if cuda:
+#            noise = noise.cuda()
+#        x = x + Variable(noise, requires_grad = False)
+#        for i in range(len(self.layers)):
+#	    m = self.layers[i]
+#	    bn = self.bns[i]
+#            x_f = F.leaky_relu(m(x))
+#            noise = torch.randn(x_f.size()) * 0.5 if self.training else torch.Tensor([0])
+#            if cuda:
+#                noise = noise.cuda()
+#            x = (x_f + Variable(noise, requires_grad = False))
+#        if feature:
+#            return x_f
+#        return self.final(x)
+
+
+# class Generator(nn.Module):
+#     def __init__(self, z_dim, output_dim = 28 ** 2):
+#         super(Generator, self).__init__()
+#         self.z_dim = z_dim
+#         self.fc1 = nn.Linear(z_dim, 500)
+#         self.bn1 = nn.BatchNorm1d(500, affine = False)
+#         self.fc2 = nn.Linear(500, 500)
+#         self.bn2 = nn.BatchNorm1d(500, affine = False)
+#         self.fc3 = nn.Linear(500, output_dim)
+#     def forward(self, batch_size, cuda = False):
+#         x = Variable(torch.rand(batch_size, self.z_dim), requires_grad = False)
+#         if cuda:
+#             x = x.cuda()
+#         x = F.softplus(self.bn1(self.fc1(x)))
+#         #x = F.softplus(self.bn2(self.fc2(x)))
+#         x = F.sigmoid(self.fc3(x))
+#         return x
+
+class Discriminator(nn.Module):
+    def __init__(self, nc = 1, ndf = 64, output_units = 10):
+        super(Discriminator, self).__init__()
+        self.ndf = ndf
+        self.main = nn.Sequential(
+            # state size. (nc) x 28 x 28
+            nn.Conv2d(nc, ndf, 4, 2, 3, bias=False),
+            nn.BatchNorm2d(ndf),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf) x 16 x 16
+            nn.Conv2d(ndf, ndf * 4, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(ndf * 4),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf*2) x 8 x 8
+            nn.Conv2d(ndf * 4, ndf * 4, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(ndf * 4),
+            nn.LeakyReLU(0.2, inplace=True),
+            # state size. (ndf*4) x 4 x 4
+            nn.Conv2d(ndf * 4, ndf * 4, 4, 1, 0, bias=False),
+        )
+        self.final = nn.Linear(ndf * 4, output_units, bias=False)
+    def forward(self, x, feature = False, cuda = False):
+        x_f = self.main(x).view(-1, self.ndf * 4)
+        return x_f if feature else self.final(x_f)
+
+class Generator(nn.Module):
+    def __init__(self, z_dim, ngf = 64, output_dim = 28 ** 2):
+        super(Generator, self).__init__()
+        self.z_dim = z_dim
+        self.main = nn.Sequential(
+            # input is Z, going into a convolution
+            nn.ConvTranspose2d(z_dim, ngf * 4, 4, 1, 0, bias=False),
+            nn.BatchNorm2d(ngf * 4),
+            nn.ReLU(True),
+            # state size. (ngf*8) x 4 x 4
+            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(ngf * 2),
+            nn.ReLU(True),
+            # state size. (ngf*4) x 8 x 8
+            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(ngf),
+            nn.ReLU(True),
+            # state size. (ngf*2) x 16 x 16
+            nn.ConvTranspose2d(ngf, 1, 4, 2, 3, bias=False),
+            # state size. (ngf) x 32 x 32
+            nn.Sigmoid()
+        )
+    def forward(self, batch_size, cuda = False):
+        x = Variable(torch.rand(batch_size, self.z_dim, 1, 1), requires_grad = False, volatile = self.training)
+        if cuda:
+            x = x.cuda()
+        return self.main(x)
diff --git a/functional.py b/functional.py
@@ -0,0 +1,5 @@
+import torch
+
+def log_sum_exp(x, axis = 1):
+    m = torch.max(x, dim = 1)[0]
+    return m + torch.log(torch.sum(torch.exp(x - m.unsqueeze(1)), dim = axis))