0

I am trying to create a script that searches certain fields from the output of autorep -j $i -q and print out what is value of that field mentioned in the output. So basically autorep -j $i -q, when the script is executed will ask the user to input a JOBNAME or %SEARCHSTRING% and then it will give the job details in the below format:

    /tmp $ autorep -j Test_jobA -q
    
    
insert_job: Test_jobA   job_type: CMD
command: echo
machine: machinename
owner: owner
permission:
date_conditions: 1
run_calendar: Autosys_Calendar 
start_times: "09:09"
description: "test discription"
std_out_file: "/tmp/Test_jobA.out"
std_err_file: "/tmp/Test_jobA.err"
alarm_if_fail: 1
alarm_if_terminated: 1


insert_job: Test_JobB   job_type: CMD
command: echo
machine: machinename
owner: owner
permission:
date_conditions: 1
days_of_week: mo,tu,we,th,fr 
start_times: "21:05"
description: "test discription"
std_out_file: "/tmp/Test_JobB.out"
std_err_file: "/tmp/Test_JobB.err"
alarm_if_fail: 1
alarm_if_terminated: 1

insert_job: Test_JobC  job_type: BOX
command: echo
machine: machinename
owner: owner
permission:
date_conditions: 0
description: "test discription"
std_out_file: "/tmp/Test_JobC.out"
std_err_file: "/tmp/Test_JobC.err"
alarm_if_fail: 1
alarm_if_terminated: 1

insert_job: Test_JobD   job_type: CMD
command: echo
machine: machinename
owner: owner
permission:
date_conditions: 1
days_of_week: su 
start_times: "08:50"
description: "test discription"
std_out_file: "/tmp/Test_JobD.out"
std_err_file: "/tmp/Test_JobD.err"
alarm_if_fail: 1
alarm_if_terminated: 1

insert_job: Test_JobE   job_type: CMD
command: echo
machine: machinename
owner: owner
permission:
date_conditions: 1
days_of_week: su 
start_times: "08:20"
description: "test discription"
std_out_file: "/tmp/Test_JobE.out"
std_err_file: "/tmp/Test_JobE.err"
alarm_if_fail: 1
alarm_if_terminated: 1


insert_job: Test_JobF   job_type: CMD
command: echo
machine: machinename
owner: owner
permission:
date_conditions: 1
days_of_week: all 
start_mins: 0,10,20,30,40,50
description: "test discription"
std_out_file: "/tmp/Test_JobF.out"
std_err_file: "/tmp/Test_JobF.err"
alarm_if_fail: 1
alarm_if_terminated: 1

So as you can see if date_condition: 0 then the job may or may not have condition: in it and wont have days_of_week: start_mins: run_window: run_calendar: and if a job has date_condition: 1 then it may or may not have days_of_week: start_mins: run_window: run_calendar:

I have the below script that does the filtering well enough:

#!/bin/bash

  TXT=/tmp/test1.txt
  CSV=/tmp/test1.csv

   echo "Enter the JOB_NAME or %SEARCHSTRING%"

   while read -r i;
      do
  awk '
    /^insert_job/ {if (flag) {printf "\n"}; 
                   printf "%s %s ", $2, $4; 
                   flag = 1}; 
    /^date_conditions/ {printf "%s", $2}; 
    /^condition:|^days_of_week:|^run_calendar:|^start_times:|^start_mins:/ {printf "%s", $2}
' < <(autorep -j $i -q) > $TXT
  break
  done

  if [ -s $TXT ]
    then
       (echo "job_name,job_type,Date_Conditions,condition,days_of_week,start_times,Start_mins" ; cat test1.txt) | sed 's/ \+/,/g' > $CSV
    else
       echo "Please check the %SEARCHSTRING% or JOB_NAME"
  fi

The While loop in the above script gives me the below output:

Test_jobA CMD 1 Autosys_Calendar "09:09"
Test_JobB CMD 1 mo,tu,we,th,fr "21:05"
Test_Jobc BOX 0
Test_JobD CMD 1 su "08:50"
Test_JobE CMD 1 su "08:20"
Test_JobF CMD 1 all "02:02,04:04,06:06,08:08,10:10,12:12,14:14,16:16,18:18,20:20,22:22"

The IF condition in the above script is used for converting the output of While loop into a .csv file but as the output is not being lenear, i am getting wrong data in the wrong columns.

is there anyway i can smooth it out.

EDIT: The required output for the CSV file:

I am looking for the below output, if a field is missing from the job then instead of an empty field it should print "NA" instead so that the .csv formating could be in line

job_name     job_type  date_conditions   condition run_calendar       days_of_week    start_times    start_mins
Test_jobA      CMD             1            NA     Autosys_Calendar     NA             "09:09"        NA
Test_JobB      CMD             1            NA          NA              mo,tu,we,th,fr "21:05"        NA
Test_Jobc      BOX             0            NA          NA              NA             NA             NA
Test_JobD      CMD             1            NA          NA              su             "08:50"        NA
Test_JobE      CMD             1            NA          NA              su             "08:20"        NA
Test_JobF      CMD             1            NA          NA              all             NA          0,10,20,30,40,50
NecroCoder
  • 15
  • 4
  • Please provide an example of the output structure you want. – Itération 122442 Dec 15 '22 at 07:42
  • @Itération i have made the edits to my query – NecroCoder Dec 15 '22 at 08:10
  • That required output is not a CSV format as it doesn't have commas between fields and the data values in the 2nd and subsequent lines are centered under the first line header values. It also has an unquoted field containing commas, `mo,tu,we,th,fr`. Do you want CSV output or do you want the format you show? If you want CSV then show CSV. If you want the format you show then explain what the white space is between the fields (tabs and/or blanks or something else and how many of each). – Ed Morton Dec 15 '22 at 12:20
  • Do you REALLY want `date_conditions` converted to `Date_Conditions` and`start_mins` to `Start_mins` for the output but all of the other fields names to left all lower case? If not then please fix that in your expected output too. You cannot get the output you show from the input you show as the output contains 6 rows of `Test_jobA` (inconsistent casing again) and `Test_JobB`, etc. when the input only contains 2 records named `Test_Job` and `Test_Job2`. Make sure the expected output you show is **exactly** the output you expect given the input you provide otherwise we can't use it to test with. – Ed Morton Dec 15 '22 at 12:25
  • @EdMorton The column names doesnt matter, they can be kept as they are displayed in the job..Basically i want this data to be exported to into a excel file and the space between is a blank space. – NecroCoder Dec 15 '22 at 12:31
  • Yes, they do matter as we need to produce them. If they can remain as in the input then that's the simplest thing to implement so show that. The space shown in your question is not 1 blank. You would find CSVs easier for Excel to handle than blank separated fields as you can just double-click on a CSV and Excel knows how to read/write it. Please fix your provided expected output to be the exact output you expect from the input you provided so it demonstrates your needs and we can copy/paste your input and output to test a potential solution with. – Ed Morton Dec 15 '22 at 12:34
  • @EdMortoni have added the complete test data for you to reference to and have modified the Column names as is required. What kind of data i get is i have mentioned in my query. The `While loop` gives the above output and i wish to get the one i have mentioned in the end. If you see my `AWK` query, it adds a single blank space after every output. – NecroCoder Dec 15 '22 at 12:57
  • I'm sorry, I'm really struggling to believe you want the output format shown in your question as it's not CSV or any other format easily parsable by other tools and frankly it just doesn't make sense. I think you're trying to graphically show an idea of what you want with a bunch of extra spacing to make it clearer but we'd be FAR better off if you just posted exactly the output you want. – Ed Morton Dec 16 '22 at 01:01
  • I see in the expected output in [your followup question](https://stackoverflow.com/q/74867891/1745001) that you actually do want CSV output, not the output you show in this question. – Ed Morton Dec 20 '22 at 22:00

2 Answers2

0

The following awk might be an option for you. You redirect the output to a .xlsx file or pipe the output to column -t for column formatted output.

#!/bin/bash

awk ' 
    BEGIN {
    print "job_name\tjob_type\tdate_conditions\tcondition\t\
        run_calendar\tdays_of_week\tstart_times\tstart_mins"
    }
    /job_type/ { jn=$2; jt=$4; dc="NA"; c="NA"; rc="NA"; dow="NA"; st="NA"; sm="NA" }
    /^date_conditions/ {dc=$2}
    /^condition/ {c=$2}
    /^run_calendar/ {rc=$2}
    /^days_of_week/ {dow=$2}
    /^start_times/ {st=$2;}
    /^start_mins/ {sm=$2;}
    /_if_terminated/{printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", jn, jt, dc, c, rc, dow, st, sm}
' job.dat

Assuming all of your job data is contained in a file named job.dat, here is sample output:

./script job.dat | column -t
job_name   job_type  date_conditions  condition  run_calendar      days_of_week    start_times  start_mins
Test_jobA  CMD       1                NA         Autosys_Calendar  NA              "09:09"      NA
Test_JobB  CMD       1                NA         NA                mo,tu,we,th,fr  "21:05"      NA
Test_JobC  BOX       0                NA         NA                NA              NA           NA
Test_JobD  CMD       1                NA         NA                su              "08:50"      NA
Test_JobE  CMD       1                NA         NA                su              "08:20"      NA
Test_JobF  CMD       1                NA         NA                all             NA           0,10,20,30,40,50

Alternative version of script for generating a .csv output:

#!/bin/bash

data_file="${1}"

awk ' 
    BEGIN {
    print "\"job_name\",\"job_type\",\"date_conditions\",\"condition\",\"run_calendar\",\"days_of_week\",\"start_times\",\"start_mins\""
    }
    /job_type/ { jn="\""$2"\""; jt="\""$4"\""; dc="\"NA\""; c="\"NA\""; rc="\"NA\""; dow="\"NA\""; st="\"NA\""; sm="\"NA\"" }
    /^date_conditions/ {dc="\""$2"\""}
    /^condition/ {c="\""$2"\""}
    /^run_calendar/ {rc="\""$2"\""}
    /^days_of_week/ {dow="\""$2"\""}
    /^start_times/ {gsub("\"",""); st="\""$2"\""}
    /^start_mins/ {sm="\""$2"\""}
    /_if_terminated/{printf "%s,%s,%s,%s,%s,%s,%s,%s\n", jn, jt, dc, c, rc, dow, st, sm}
' "$data_file"

CSV output:

"job_name","job_type","date_conditions","condition","run_calendar","days_of_week","start_times","start_mins"
"Test_jobA","CMD","1","NA","Autosys_Calendar","NA","09:09","NA"
"Test_JobB","CMD","1","NA","NA","mo,tu,we,th,fr","21:05","NA"
"Test_JobC","BOX","0","NA","NA","NA","NA","NA"
"Test_JobD","CMD","1","NA","NA","su","08:50","NA"
"Test_JobE","CMD","1","NA","NA","su","08:20","NA"
"Test_JobF","CMD","1","NA","NA","all","NA","0,10,20,30,40,50"

Modified option to NOT rely on '_if_terminated' for output:

#!/bin/bash

data_file="${1}"

awk ' 
    BEGIN {
    print "\"job_name\",\"job_type\",\"date_conditions\",\"condition\",\"run_calendar\",\"days_of_week\",\"start_times\",\"start_mins\""
    }
    /job_type/ { 
    if (NR>1) {printf "%s,%s,%s,%s,%s,%s,%s,%s\n", jn, jt, dc, c, rc, dow, st, sm}
    jn="\""$2"\""; jt="\""$4"\""; dc="\"NA\""; c="\"NA\""; rc="\"NA\""; dow="\"NA\""; st="\"NA\""; sm="\"NA\"" }
    /^date_conditions/ {dc="\""$2"\""}
    /^condition/ {c="\""$2"\""}
    /^run_calendar/ {rc="\""$2"\""}
    /^days_of_week/ {dow="\""$2"\""}
    /^start_times/ {gsub("\"",""); st="\""$2"\""}
    /^start_mins/ {sm="\""$2"\""}
    END{printf "%s,%s,%s,%s,%s,%s,%s,%s\n", jn, jt, dc, c, rc, dow, st, sm}
' "$data_file"
j_b
  • 1,975
  • 3
  • 8
  • 14
  • your script works to some extent. i tried directing the output to a `.xlsx` file but once i move that file to my desktop and open it, i get a popup saying the file is corrupted. so i end up copying the data from the `.dat` file to a excel. any way to directly move the data to a `.csv` file? – NecroCoder Dec 15 '22 at 18:26
  • Updated answer with version of script for generating CSV output. Further, the .xlsx output opens fine in google sheets, so you could also export from Google sheets to excel format. – j_b Dec 15 '22 at 19:03
  • Is it possible that instead of `/_if_terminated/{printf "%s,%s,%s,%s,%s,%s,%s,%s\n", jn, jt, dc, c, rc, dow, st, sm}` we can use something else. what i am trying here is that there are many jobs that dont end at `_if_terminated`. so i want to use this script for any job no matter what that job ends with. – NecroCoder Jan 03 '23 at 15:00
  • Yes, it is possible. I added another version to my answer per your comment/question. – j_b Jan 03 '23 at 15:30
  • this is working nicely, thanks. But only issue is that this script is adding a blank line at the start of the data. and also it would be great if you could explain what does the line `if (NR>1) {printf "%s,%s,%s,%s,%s,%s,%s,%s\n", jn, jt, dc, c, rc, dow, st, sm} jn="\""$2"\""; jt="\""$4"\""; dc="\"NA\""; c="\"NA\""; rc="\"NA\""; dow="\"NA\""; st="\"NA\""; sm="\"NA\"" }` does, just trying to understand. – NecroCoder Jan 03 '23 at 16:25
  • I cannot reproduce the 'blank line at the start of the data' with the sample data you provided in your question. The `if (NR>1)....` line of code skips printing for the first data line. – j_b Jan 03 '23 at 17:07
0

I think you'll be much happier with this script that outputs CSV as a starting point rather than a script that'd produce the output you say you want:

$ cat tst.awk
BEGIN { OFS="," }
!NF { next }

match($0,/^[[:space:]]*insert_job: [^[:space:]]+[[:space:]]+/) {
    prt()
    delete tag2val
    numTags = 0
    set_tag2val(substr($0,1,RLENGTH))
    $0 = substr($0,RSTART+RLENGTH)
}
{ set_tag2val($0) }
END { prt() }

function set_tag2val(str,       tag,val) {
    gsub(/^[[:space:]]+|[[:space:]]+$/,"",str)
    tag = val = str
    sub(/[[:space:]]*:.*/,"",tag)
    sub(/[^:]*:[[:space:]]*/,"",val)
    if ( !(tag in tag2val) ) {
        tags[++numTags] = tag
    }
    tag2val[tag] = val
}

function prt() {
    if ( numTags && !doneHdr++ ) {
        for ( tagNr=1; tagNr<=numTags; tagNr++ ) {
            tag = tags[tagNr]
            printf "\"%s\"%s", tag, (tagNr<numTags ? OFS : ORS)
        }
    }
    for ( tagNr=1; tagNr<=numTags; tagNr++ ) {
        tag = tags[tagNr]
        val = tag2val[tag]
        gsub(/^"|"$/,"",val)
        printf "\"%s\"%s", val, (tagNr<numTags ? OFS : ORS)
    }
}

$ awk -f tst.awk file
"insert_job","job_type","command","machine","owner","permission","date_conditions","run_calendar","start_times","description","std_out_file","std_err_file","alarm_if_fail","alarm_if_terminated"
"Test_jobA","CMD","echo","machinename","owner","","1","Autosys_Calendar","09:09","test discription","/tmp/Test_jobA.out","/tmp/Test_jobA.err","1","1"
"Test_JobB","CMD","echo","machinename","owner","","1","mo,tu,we,th,fr","21:05","test discription","/tmp/Test_JobB.out","/tmp/Test_JobB.err","1","1"
"Test_JobC","BOX","echo","machinename","owner","","0","test discription","/tmp/Test_JobC.out","/tmp/Test_JobC.err","1","1"
"Test_JobD","CMD","echo","machinename","owner","","1","su","08:50","test discription","/tmp/Test_JobD.out","/tmp/Test_JobD.err","1","1"
"Test_JobE","CMD","echo","machinename","owner","","1","su","08:20","test discription","/tmp/Test_JobE.out","/tmp/Test_JobE.err","1","1"
"Test_JobF","CMD","echo","machinename","owner","","1","all","0,10,20,30,40,50","test discription","/tmp/Test_JobF.out","/tmp/Test_JobF.err","1","1"
Ed Morton
  • 188,023
  • 17
  • 78
  • 185