0

This is the input file which is of excel format:

A B C D
APP 1 210101 8.1
APP 2 210102 8.2
APP 3 210103 8.3
Data 4 210104 8.4
Data 5 210105 8.5
Data 6 210106 8.6

How to generate output files of excel in below way using Perl?

output file 1 (contains only data from rows of app):

A B C
1 210101 8.1
2 210102 8.2
3 210103 8.3

Output file 2 (contains only data from rows of data):

A B C
4 210104 8.4
5 210105 8.5
6 210106 8.6
18-454
  • 3
  • 2
  • You can probably use [`Spreadsheet::ParseXLSX`](https://metacpan.org/pod/Spreadsheet::ParseXLSX) to read the input file, then use [`Excel::Writer::XLSX`](https://metacpan.org/pod/Excel::Writer::XLSX) to write the two ouput files. If you want to preserve the formatting in the original document you could try [`Excel::CloneXLSX::Format`](https://metacpan.org/pod/Excel::CloneXLSX::Format) – Håkon Hægland Dec 21 '21 at 08:22
  • [Here](https://stackoverflow.com/a/68428108/2173773) is an example – Håkon Hægland Dec 21 '21 at 08:30
  • Thank you for sharing your comments Hakon. Here I have given the example for only two output files. But if there are many rows instead of just app and data how we can parse and generate the output files based on the rows – 18-454 Dec 21 '21 at 08:44

1 Answers1

1

Here is an example of how you can split the input file into multiple output files based on the value in column 0 of the input file:

package Main;
use v5.22.0;         # experimental signatures requires perl >= 5.22
use feature qw(say);
use strict;
use warnings;
use experimental qw(signatures);
use Spreadsheet::ParseXLSX;
use Excel::CloneXLSX::Format qw(translate_xlsx_format);
use Excel::Writer::XLSX;

{
    my $self = Main->new(
        input_file    => 'input.xlsx',
        output_prefix => 'out',
    );
    my $worksheet = $self->scan_input_file();
    $self->open_output_files();
    my ( $row_min, $row_max ) = $worksheet->row_range();
    my $col0 = 0; # column number that contains the save type
    my @cols_to_save = (1..3);
    for my $row ( $row_min .. $row_max ) {
        my $cell = $worksheet->get_cell( $row, $col0 );
        my $save_type = $cell->unformatted();
        my $row = $self->get_row($worksheet, $row, \@cols_to_save);
        $self->save_row( $save_type, $row);
    }
    $self->close_output_files();
    say "Done.";
}


sub close_output_files( $self  ) {
    for my $file (keys %{$self->{files}}) {
        my $workbook = $self->{files}{$file}{workbook};
        $workbook->close();
    }
}

sub save_row( $self, $save_type, $cells ) {
    my $file = $self->{save_types}{$save_type};
    my $workbook = $self->{files}{$file}{workbook};
    my $worksheet = $self->{files}{$file}{worksheet};
    my $row = $self->{files}{$file}{row};
    my $col = 0;
    for my $cell (@$cells) {
        my $fmt = $cell->get_format();
        my $fmt_props  = translate_xlsx_format( $fmt );
        my $new_format = $workbook->add_format(%$fmt_props);
        my $value = $cell->unformatted() || '';
        $worksheet->write($row, $col, $value, $new_format);
        $col++;
    }
    $self->{files}{$file}{row}++;
}

sub get_row( $self, $worksheet, $row, $cols_to_save ) {
    my @row;
    for my $col (@$cols_to_save) {
        my $cell = $worksheet->get_cell( $row, $col );
        push @row, $cell;
    }
    return \@row;
}

sub new( $class, %args ) { bless \%args, $class }

sub scan_input_file( $self ) {
    my $parser = Spreadsheet::ParseXLSX->new;
    my $workbook = $parser->parse($self->{input_file});
    my $worksheet = $workbook->worksheet(0);
    my ( $row_min, $row_max ) = $worksheet->row_range();

    my %save_types;
    for my $row ( $row_min .. $row_max ) {
        my $col0 = 0;
        my $cell0 = $worksheet->get_cell( $row, $col0 );
        my $save_type = $cell0->unformatted();
        $save_types{$save_type} = 1;
    }
    $self->{save_types} = \%save_types;
    return $worksheet;
}

sub open_output_files( $self  ) {
    my $save_types = $self->{save_types};
    my $prefix = $self->{output_prefix};
    my %file_info;
    for my $type (keys %$save_types) {
        my $fn = $prefix . "_" . $type . '.xlsx';
        $save_types->{$type} = $fn;
        my $workbook = Excel::Writer::XLSX->new( $fn );
        my $worksheet = $workbook->add_worksheet();
        $file_info{$fn} = {
            workbook  => $workbook,
            worksheet => $worksheet,
            row       => 0,  # current row number
        };
    }
    $self->{files} = \%file_info;
}
Håkon Hægland
  • 39,012
  • 21
  • 81
  • 174