I recently tried to run the offical code from the repository "Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration" by Yang He. I found that everything went well when I tried pruning the Resnet(the accuracy changes). However, as long as I pruned the VGG, The test accuracy reamained the same after being pruned. I don't think it is reasonable but I just don't know what is wrong with the code. How come the accuracy is not changing? And I have already tried almost every mean that I thought might be useful to get the problem tackled, but clearly, none of them worked.So I decided to reach out to you guys for help! So thankful of your help! Hope you guys may have a very well day.
I hope that the Mask will work(obviously this pruning technique is not working) The code is shown below, and I omitted the training and testing part.
class Mask:
def __init__(self, model):
self.model_size = {}
self.model_length = {}
self.compress_rate = {}
self.distance_rate = {}
self.mat = {}
self.model = model
self.mask_index = []
self.filter_small_index = {}
self.filter_large_index = {}
self.similar_matrix = {}
self.norm_matrix = {}
self.cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
def get_codebook(self, weight_torch, compress_rate, length):
#this def appears with no use
weight_vec = weight_torch.view(length)
weight_np = weight_vec.cpu().numpy()
weight_abs = np.abs(weight_np)
weight_sort = np.sort(weight_abs)
threshold = weight_sort[int(length * (1 - compress_rate))]
weight_np[weight_np <= -threshold] = 1
weight_np[weight_np >= threshold] = 1
weight_np[weight_np != 1] = 0
print("codebook done")
return weight_np
def get_filter_codebook(self, weight_torch, compress_rate, length):
# print("Entering get_filter_codebook...")
#theis print statement above is working properly
codebook = np.ones(length)
if len(weight_torch.size()) == 4:
# print("Entering the main branch of get_filter_codebook...")
#this print statement above is working properly
filter_pruned_num = int(weight_torch.size()[0] * (1 - compress_rate))
weight_vec = weight_torch.view(weight_torch.size()[0], -1)
norm2 = torch.norm(weight_vec, 2, 1)
norm2_np = norm2.cpu().numpy()
filter_index = norm2_np.argsort()[:filter_pruned_num]
kernel_length = weight_torch.size()[1] * weight_torch.size()[2] * weight_torch.size()[3]
for x in range(0, len(filter_index)):
codebook[filter_index[x] * kernel_length: (filter_index[x] + 1) * kernel_length] = 0
print("filter codebook done")
#this print statement above is working properly
else:
pass
return codebook
def get_filter_index(self, weight_torch, compress_rate, length):
#this def appears with no use
if len(weight_torch.size()) == 4:
print("filter index PREPARE")
filter_pruned_num = int(weight_torch.size()[0] * (1 - compress_rate))
weight_vec = weight_torch.view(weight_torch.size()[0], -1)
norm1 = torch.norm(weight_vec, 1, 1)
norm1_np = norm1.cpu().numpy()
norm2 = torch.norm(weight_vec, 2, 1)
norm2_np = norm2.cpu().numpy()
filter_small_index = []
filter_large_index = []
filter_large_index = norm2_np.argsort()[filter_pruned_num:]
filter_small_index = norm2_np.argsort()[:filter_pruned_num]
norm1_sort = np.sort(norm1_np)
threshold = norm1_sort[int (weight_torch.size()[0] * (1-compress_rate))]
kernel_length = weight_torch.size()[1] * weight_torch.size()[2] * weight_torch.size()[3]
print("filter index done")
else:
pass
return filter_small_index, filter_large_index
def get_filter_similar(self, weight_torch, compress_rate, distance_rate, length, dist_type="l2"):
codebook = np.ones(length)
if len(weight_torch.size()) == 4:
filter_pruned_num = int(weight_torch.size()[0] * (1 - compress_rate))
similar_pruned_num = int(weight_torch.size()[0] * distance_rate)
weight_vec = weight_torch.view(weight_torch.size()[0], -1)
if dist_type == "l2" or "cos":
norm = torch.norm(weight_vec, 2, 1)
norm_np = norm.cpu().numpy()
elif dist_type == "l1":
norm = torch.norm(weight_vec, 1, 1)
norm_np = norm.cpu().numpy()
filter_small_index = []
filter_large_index = []
filter_large_index = norm_np.argsort()[filter_pruned_num:]
filter_small_index = norm_np.argsort()[:filter_pruned_num]
indices = torch.LongTensor(filter_large_index).cuda()
weight_vec_after_norm = torch.index_select(weight_vec, 0, indices).cpu().numpy()
# for euclidean distance
if dist_type == "l2" or "l1":
similar_matrix = distance.cdist(weight_vec_after_norm, weight_vec_after_norm, 'euclidean')
elif dist_type == "cos": # for cos similarity
similar_matrix = 1 - distance.cdist(weight_vec_after_norm, weight_vec_after_norm, 'cosine')
similar_sum = np.sum(np.abs(similar_matrix), axis=0)
similar_large_index = similar_sum.argsort()[similar_pruned_num:]
similar_small_index = similar_sum.argsort()[: similar_pruned_num]
similar_index_for_filter = [filter_large_index[i] for i in similar_small_index]
kernel_length = weight_torch.size()[1] * weight_torch.size()[2] * weight_torch.size()[3]
for x in range(0, len(similar_index_for_filter)):
codebook[
similar_index_for_filter[x] * kernel_length: (similar_index_for_filter[x] + 1) * kernel_length] = 0
# print("similar index done")
#this print statement above is working properly
else:
pass
return codebook
def convert2tensor(self, x):
x = torch.FloatTensor(x)
return x
def init_length(self):
for index, (name, param) in enumerate(self.model.named_parameters()):
self.model_size[index] = param.size()
for index1 in self.model_size:
for index2 in range(0, len(self.model_size[index1])):
if index2 == 0:
self.model_length[index1] = self.model_size[index1][0]
else:
self.model_length[index1] *= self.model_size[index1][index2]
def init_rate(self, rate_norm_per_layer, rate_dist_per_layer, pre_cfg=True):
cfg = [64, 64, 128, 128, 256, 256, 256, 256, 512, 512, 512, 512, 512, 512, 512, 512]
cfg_index = 0
for index, item in enumerate(self.model.named_parameters()):
print(f"Layer {index} is in self.mask_index")
#this print statement above is working properly
self.compress_rate[index] = 1
self.distance_rate[index] = 1
if len(item[1].size()) == 4:
print(item[1].size())
if not pre_cfg:
self.compress_rate[index] = rate_norm_per_layer
self.distance_rate[index] = rate_dist_per_layer
self.mask_index.append(index)
print(item[0], "self.mask_index", self.mask_index)
else:
self.compress_rate[index] = rate_norm_per_layer
self.distance_rate[index] = 1 - cfg[cfg_index] / item[1].size()[0]
self.mask_index.append(index)
print(item[0], "self.mask_index", self.mask_index, cfg_index, cfg[cfg_index], item[1].size()[0],
self.distance_rate[index], )
print("self.distance_rate", self.distance_rate)
cfg_index += 1
def init_mask(self, rate_norm_per_layer, rate_dist_per_layer, dist_type):
print("Entering init_mask...")
#this print statement above is working properly
self.init_rate(rate_norm_per_layer, rate_dist_per_layer, pre_cfg=True)
for index, (name, param) in enumerate(self.model.named_parameters()):
if index in self.mask_index:
self.mat[index] = self.get_filter_codebook(param.data, self.compress_rate[index],
self.model_length[index])
self.mat[index] = self.convert2tensor(self.mat[index])
self.mat[index] = self.mat[index].cuda()
# print("Calling get_filter_similar...")
#this print statement above is working properly
self.similar_matrix[index] = self.get_filter_similar(param.data, self.compress_rate[index],
self.distance_rate[index],
self.model_length[index], dist_type=dist_type)
self.similar_matrix[index] = self.convert2tensor(self.similar_matrix[index])
self.similar_matrix[index] = self.similar_matrix[index].cuda()
print("mask Ready")
#this print statement above is working properly
def do_mask(self):
# print("Executing do_mask...")
for index, (name, param) in enumerate(self.model.named_parameters()):
if index in self.mask_index:
a = param.data.view(self.model_length[index])
b = a * self.mat[index]
param.data = b.view(self.model_size[index])
print("mask Done")
#this print statement above is working properly
def do_similar_mask(self):
for index, (name, param) in enumerate(self.model.named_parameters()):
if index in self.mask_index:
a = param.data.view(self.model_length[index])
b = a * self.similar_matrix[index]
param.data = b.view(self.model_size[index])
print("mask similar Done")
#this print statement above is working properly
def do_grad_mask(self):
for index, (name, param) in enumerate(self.model.named_parameters()):
if index in self.mask_index:
a = param.grad.data.view(self.model_length[index])
b = a * self.mat[index]
b = b * self.similar_matrix[index]
param.grad.data = b.view(self.model_size[index])
print("grad zero Done")
#this print statement above is working properly
def if_zero(self):
for index, (name, param) in enumerate(self.model.named_parameters()):
if (index in self.mask_index):
# if index == 0:
a = param.data.view(self.model_length[index])
b = a.cpu().numpy()
# print(
# "number of nonzero weight is %d, zero is %d" % (np.count_nonzero(b), len(b) - np.count_nonzero(b)))
m = Mask(model)
m.init_length()
print("-*-" * 10 + "\n\tone epoch begin\n" + "-*-" * 10)
print("remaining ratio of pruning : Norm is %f" % args.rate_norm)
print("reducing ratio of pruning : Distance is %f" % args.rate_dist)
print("total remaining ratio is %f" % (args.rate_norm - args.rate_dist))
#Test before pruning
val_acc_1, val_los_1 = test(1, model, ewc, test_loader)
print(" accu before is: %.3f %%" % val_acc_1)
#Test for a single pruning
m.model = model
m.init_mask(args.rate_norm, args.rate_dist, args.dist_type)
m.do_mask()
m.do_similar_mask()
model = m.model
model.cuda()
val_acc_2, val_los_2 = test(2, model, ewc, test_loader)
print(" accu after is: %s %%" % val_acc_2)
#Pruning
#i guess this is where the problem occurs. The test accuracy remains the same after applying the Mask
#However all of the print sentences work and they print properly, so i actually dont know what is the problem
best_prec1 = 0.
for epoch in range(args.epochs):
train_acc, train_los = train(epoch, model, ewc, train_loader)
prec1, test_los_mid = test(epoch, model, ewc, test_loader)
if epoch % args.epoch_prune == 0 or epoch == args.epochs - 1:
m.model = model
m.if_zero()
m.init_mask(args.rate_norm, args.rate_dist, args.dist_type)
m.do_mask()
m.do_similar_mask()
m.if_zero()
model = m.model
model = model.cuda()
val_acc, val_los = test(epoch, model, ewc, test_loader)
is_best = val_acc > best_prec1
best_prec1 = max(prec1, best_prec1)
state = {
'epoch': epoch + 1,
'state_dict': model.state_dict(),
'best_prec1': best_prec1,
'optimizer': optimizer.state_dict()
# 'cfg': model.cfg
}
torch.save(state, args.path + ('pruned_%s_%s%s_r%s.pth.tar' % ('cifar10', 'vgg', 19, 0.5)))
# measure elapsed time
# epoch_time.update(time.time() - start_time)
start_time = time.time()
here is the vgg network:
def weights_init(m):
# print('=> weights init')
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
# nn.init.normal_(m.weight, 0, 0.1)
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
# nn.init.xavier_normal(m.weight)
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
# Note that BN's running_var/mean are
# already initialized to 1 and 0 respectively.
if m.weight is not None:
m.weight.data.fill_(1.0)
if m.bias is not None:
m.bias.data.zero_()
defaultcfg = {
11: [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512],
13: [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512],
16: [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512],
19: [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512],
}
class VGG(nn.Module):
def __init__(self, dataset='cifar10', depth=19, init_weights=True, cfg=None, affine=True, batchnorm=True):
super(VGG, self).__init__()
if cfg is None:
cfg = defaultcfg[depth]
self._AFFINE = affine
self.feature = self.make_layers(cfg, batchnorm, dataset)
self.dataset = dataset
if dataset == 'cifar10' or dataset == 'cinic-10':
num_classes = 10
elif dataset == 'cifar100':
num_classes = 100
elif dataset == 'tiny_imagenet':
num_classes = 200
elif dataset == 'mnist':
num_classes = 10
else:
raise NotImplementedError("Unsupported dataset " + dataset)
self.classifier = nn.Linear(cfg[-1], num_classes)
if init_weights:
self.apply(weights_init)
# if pretrained:
# model.load_state_dict(model_zoo.load_url(model_urls['vgg11_bn']))
def make_layers(self, cfg, batch_norm=False, dataset='mnist'):
layers = []
in_channels = 1 if dataset == 'mnist' else 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1, bias=False)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v, affine=self._AFFINE), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
return nn.Sequential(*layers)
def forward(self, x):
x = self.feature(x)
if self.dataset == 'tiny_imagenet':
x = nn.AvgPool2d(4)(x)
elif self.dataset == 'mnist':
x = nn.AvgPool2d(2)(x)
else:
x = nn.AvgPool2d(2)(x)
x = x.view(x.size(0), -1)
y = self.classifier(x)
return y
the link of the offical code istext here