The tensor of images needs to be a fixed size, (N, C, H, W), for an image in the tensor with size (H', W') and another with (H, W)

Question

So i am trying to train a model in a specific dataset, Camus Dataset, but i am new to pytorch so i am a little it confused. I used the Camus Dataset code of https://www.kaggle.com/code/sontungtran/camus-eda . But when i run a part of code for display image i see that some images have different sizes of others, so when i go down the step for train the model and with batch size, i always have a problem.

So i used this code

class CamusIterator(Dataset):
def __init__(
self,
data_type,
global_transforms = [],
augment_transforms =[],
data_file = []

    ):


    super(CamusIterator, self).__init__()
    
    train_file='/content/drive/MyDrive/Echo_Segmentation/tries/training_set'
    test_file='/content/drive/MyDrive/Echo_Segmentation/tries/testing_set'
    
    if data_type == 'train':
        data_file = train_file
        self.images = os.listdir(train_file)
    elif data_type == 'test':
        data_file = test_file
        self.images = os.listdir(test_file)
    else:
        raise Exception('Wrong data_type for CamusIterator')

    self.data_type = data_type
    self.data_file = data_file
    self.global_transforms = global_transforms
    self.augment_transforms = augment_transforms

def __read_image( self, patient_file, suffix ):
    image_file = '{}/{}/{}'.format( self.data_file, patient_file, patient_file+suffix )
    # # Stolen from a StackOverflow answer
    # # https://stackoverflow.com/questions/37290631/reading-png-raw-format-in-python
    image = sitk.GetArrayFromImage( sitk.ReadImage(image_file, sitk.sitkFloat32) )
    #size = [1232,748]
    # plt.figure(figsize=(20,16))
    # plt.gray()
    # plt.subplots_adjust(0,0,1,1,0.01,0.01)
    # for i in range(image.shape[0]):
    #     plt.subplot(5,6,i+1), plt.imshow(image[i]), plt.axis('off')
    #     # use plt.savefig(...) here if you want to save the images as .jpg, e.g.
    #     plt.savefig('image.jpg')
    # #plt.show()    
    print(image.shape)
    return image
    
def __read_info( self, data_file ):
    info = {}
    with open( data_file, 'r' ) as f:
        for line in f.readlines():
            info_type, info_details = line.strip( '\n' ).split( ': ' )
            info[ info_type ] = info_details
    return info    

def __len__( self ):
    return len( os.listdir(self.data_file) )

def __getitem__( self, index ):
    #patient_file = 'patient{}'.format( f'{index+1:04}' ) # patient{0001}, patient{0002}, etc
    #patient_file= os.listdir(self.data_file)[index]
    patient_file= os.listdir(self.data_file)[index]

    image_2CH_ED = self.__read_image( patient_file, '_2CH_ED.mhd' )
    image_2CH_ES = self.__read_image( patient_file, '_2CH_ES.mhd' )
    image_4CH_ED = self.__read_image( patient_file, '_4CH_ED.mhd' )
    image_4CH_ES = self.__read_image( patient_file, '_4CH_ES.mhd' )
    image_2CH_sequence = self.__read_image( patient_file, '_2CH_sequence.mhd' )
    image_4CH_sequence = self.__read_image( patient_file, '_4CH_sequence.mhd' )

    
    if self.data_type == 'train':
    #for train in self.data_type:
        image_2CH_ED_gt = self.__read_image( patient_file, '_2CH_ED_gt.mhd' )
        image_2CH_ES_gt = self.__read_image( patient_file, '_2CH_ES_gt.mhd' )
        image_4CH_ED_gt = self.__read_image( patient_file, '_4CH_ED_gt.mhd' )
        image_4CH_ES_gt = self.__read_image( patient_file, '_4CH_ES_gt.mhd' )

    info_2CH = self.__read_info( '{}/{}/{}'.format(self.data_file, patient_file, 'Info_2CH.cfg') )
    info_4CH = self.__read_info( '{}/{}/{}'.format(self.data_file, patient_file, 'Info_4CH.cfg') )    

    
    if self.data_type == 'train':
        data = {
            'patient': patient_file,
            '2CH_ED': image_2CH_ED,
            '2CH_ES': image_2CH_ES,
            '4CH_ED': image_4CH_ED,
            '4CH_ES': image_4CH_ES,
            '2CH_sequence': image_2CH_sequence,
            '4CH_sequence': image_4CH_sequence,
            '2CH_ED_gt': image_2CH_ED_gt,
            '2CH_ES_gt': image_2CH_ES_gt,
            '4CH_ED_gt': image_4CH_ED_gt,
            '4CH_ES_gt': image_4CH_ES_gt,
            'info_2CH': info_2CH,    # Dictionary of infos
            'info_4CH': info_4CH     # Dictionary of infos 
        }
    elif self.data_type == 'test':
        data = {
            'patient': patient_file,
            '2CH_ED': image_2CH_ED,
            '2CH_ES': image_2CH_ES,
            '4CH_ED': image_4CH_ED,
            '4CH_ES': image_4CH_ES,
            '2CH_sequence': image_2CH_sequence,
            '4CH_sequence': image_4CH_sequence,
            'info_2CH': info_2CH,   # Dictionary of infos
            'info_4CH': info_4CH   # Dictionary of infos
        }
    # Transforms
    for transform in self.global_transforms:
        data = transform(data,self.data_type)
    for transform in self.augment_transforms:
        data = transform(data)    
        
    return data

def __iter__( self ):
    for i in range( len(self) ):
        yield self[ i ]

and then after this code i used:

def display_image(patient,fields):
 #fields=['2CH_ED', '2CH_ES', '4CH_ED', '4CH_ES','2CH_ED_gt', '2CH_ES_gt', '4CH_ED_gt', '4CH_ES_gt']
   fig = plt.figure(figsize=(40,60))
   for i,fname in enumerate(fields):
       test = fig.add_subplot(1, len(fields), i + 1)
       image = patient[fname]
       # print(len(np.unique(image)))
       plt.axis('off')
       # print(image.shape)
       plt.imshow(image.squeeze(),cmap = 'gray')
       test.set_title(fname)
   fig.tight_layout()
   plt.show()

for i in train_file:
     fields=[#'2CH_ED',
              '2CH_ED_gt',# '2CH_ES',
              '2CH_ES_gt',# '4CH_ED',
              '4CH_ED_gt',# '4CH_ES',
              '4CH_ES_gt']
     display_image(i,fields)
     break

and i take as a result something like that (i will not put all the values i get cause a lot of them are the same):

(1, 779, 417)
(1, 908, 551)
(1, 779, 417)
(1, 908, 582)
(1, 843, 512)
(1, 908, 582)
(1, 1168, 708)

so the images of my dataset have different (H,W). How can i change that? i gave try some things as you can see from the comment lines(this is just an example) but i can't figure out the right solution. Should i put the images and the masks in seperate ways somehow, and if yes how do i do that?

Or padding. All are standard approaches for training vision models — DerekG, Mar 09 '23 at 14:52
@Ivan yes i have tried this, in different places in my code but nothing changes. The last thing i am currently trying is that i erased global_transforms and augment_transforms and in their place a made a transform variable which can take as a value for example train_transform = A.Compose([A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH)], with IMAGE_HEIGHT = 160, IMAGE_WIDTH = 240 but it gives me, a little later in the code this error: KeyError: 'You have to pass data to augmentations as named arguments, for example: aug(image=image)' — Dimgai, Mar 09 '23 at 15:18
@DerekG i am not sure if i have done this. Now that you mention this i will search it, but i do not understand why padding will help solve this problem. — Dimgai, Mar 09 '23 at 15:20
Padding: tensor 1 is of size [1,y1,x1]. Tensor 2 is of size [1,y2,x2]. Select y3 s.t. y3 > y1 and y3 > y2. Select x3 s.t. x3 > x1 and x3 > x2. Now pad both tensors to shape [1,y3,x3]. You can now stack the tensors together into [2,1,y3,x3]. Padding offers the advantage over cropping that the entire image is still shown for each image. Cropping offers the advantage that no filler pixels (generally random or black) are added. Resizing offers both advantages but changes the scale and possibly the aspect ratio, and introduces pixel interpolation which may not be desirable for your learning task — DerekG, Mar 09 '23 at 16:03
So of course, padding, cropping and resizing ALL solve your problem by making the dimensions of each image the same — DerekG, Mar 09 '23 at 16:04
And your response to @Ivan doesn't indicate that the method does not work at a theoretical level, but rather an implementation level (i.e. it works if implemented correctly) — DerekG, Mar 09 '23 at 16:05

The tensor of images needs to be a fixed size, (N, C, H, W), for an image in the tensor with size (H', W') and another with (H, W)

0 Answers0