30

I have written an awk script that converts a distributor flatfile into a CSV importable into Magento. This file is semi-colon delimited.

It is not putting quotes around each field like the importer requires. It works fairly well, but is causing some issues on the data import without the enclosing double quotes. I spent a couple hours trying to figure out how to add this to the existing script, without much luck. Any help would be greatly appreciated - I am pretty new to AWK.

Current Output

store;websites;attribute_set;type;category_ids;sku;has_options;name;meta_title;meta_description;image;small_image;thumbnail;url_key;url_path;config_attributes;custom_design;page_layout;options_container;country_of_manufacture;msrp_enabled;msrp_display_actual_price_type;gift_message_available;rsr_pn;manufacturer_pn;price;special_price;cost;weight;msrp;status;visibility;manufacturer;enable_googlecheckout;tax_class_id;is_recurring;description;short_description;meta_keyword;custom_layout_update;news_from_date;news_to_date;special_from_date;special_to_date;custom_design_from;custom_design_to;qty;min_qty;use_config_min_qty;is_qty_decimal;backorders;use_config_backorders;min_sale_qty;use_config_min_sale_qty;max_sale_qty;use_config_max_sale_qty;is_in_stock;low_stock_date;notify_stock_qty;use_config_notify_stock_qty;manage_stock;use_config_manage_stock;stock_status_changed_auto;use_config_qty_increments;qty_increments;use_config_enable_qty_inc;enable_qty_increments;is_decimal_divided;stock_status_changed_automatically;use_config_enable_qty_increments;product_name;store_id;product_type_id;product_status_changed;product_changed_websites;gallery;related;upsell;crosssell;tier_prices;associated;bundle_options;grouped;group_price_price;downloadable_options;super_attribute_pricing;product_tags
admin;base;Default;simple;2,35,36;844802016148;0;5.11 HOLSTER SHIRT L WHITE;;;/5/1/511-40011-010-L_1.jpg;/5/1/511-40011-010-L_1.jpg;/5/1/511-40011-010-L_1.jpg;511-40011-010-L;511-40011-010-L.html;;;No layout updates;Block after Info Column;;Use config;Use config;No;511-40011-010-L;40011;74.99;;48.00;5;74.99;Enabled;Catalog, Search;5.11 Tactical;Yes;Taxable Goods;No;5.11 Tactical Short Sleeve Shirt L White Holster Shirt Crew 40011;5.11 Tactical Short Sleeve Shirt L White Holster Shirt Crew 40011;;;;;;;;;0;0;1;0;0;1;1;1;0;1;1;;;1;0;1;0;1;0;1;0;0;0;1;5.11 HOLSTER SHIRT L WHITE;0;simple;;;;;;;;;;;;;;

Desired Output

"store";"websites";"attribute_set";"type";"category_ids";"sku";"has_options";"name";"meta_title";"meta_description";"image";"small_image";"thumbnail";"url_key";"url_path";"config_attributes";"custom_design";"page_layout";"options_container";"country_of_manufacture";"msrp_enabled";"msrp_display_actual_price_type";"gift_message_available";"rsr_pn";"manufacturer_pn";"price";"special_price";"cost";"weight";"msrp";"status";"visibility";"manufacturer";"enable_googlecheckout";"tax_class_id";"is_recurring";"description";"short_description";"meta_keyword";"custom_layout_update";"news_from_date";"news_to_date";"special_from_date";"special_to_date";"custom_design_from";"custom_design_to";"qty";"min_qty";"use_config_min_qty";"is_qty_decimal";"backorders";"use_config_backorders";"min_sale_qty";"use_config_min_sale_qty";"max_sale_qty";"use_config_max_sale_qty";"is_in_stock";"low_stock_date";"notify_stock_qty";"use_config_notify_stock_qty";"manage_stock";"use_config_manage_stock";"stock_status_changed_auto";"use_config_qty_increments";"qty_increments";"use_config_enable_qty_inc";"enable_qty_increments";"is_decimal_divided";"stock_status_changed_automatically";"use_config_enable_qty_increments";"product_name";"store_id";"product_type_id";"product_status_changed";"product_changed_websites";"gallery";"related";"upsell";"crosssell";"tier_prices";"associated";"bundle_options";"grouped";"group_price_price";"downloadable_options";"super_attribute_pricing";"product_tags"
"admin";"base";"Default";"simple";"2,35,36";"844802016148";"0";"5.11 HOLSTER SHIRT L WHITE";"";"";"/5/1/511-40011-010-L_1.jpg";"/5/1/511-40011-010-L_1.jpg";"/5/1/511-40011-010-L_1.jpg";"511-40011-010-L";"511-40011-010-L.html";"";"";"No layout updates";"Block after Info Column";"";"Use config";"Use config";"No";"511-40011-010-L";"40011";"74.99";"";"48.00";"5";"74.99";"Enabled";"Catalog, Search";"5.11 Tactical";"Yes";"Taxable Goods";"No";"5.11 Tactical Short Sleeve Shirt L White Holster Shirt Crew 40011";"5.11 Tactical Short Sleeve Shirt L White Holster Shirt Crew 40011";"";"";"";"";"";"";"";"";"0";"0";"1";"0";"0";"1";"1";"1";"0";"1";"1";"";"";"1";"0";"1";"0";"1";"0";"1";"0";"0";"0";"1";"5.11 HOLSTER SHIRT L WHITE";"0";"simple";"";"";"";"";"";"";"";"";"";"";"";"";"";"

Script - rsrimport.awk

#!/bin/awk -f
# ----------------------------------------------------------------------------------------
# Copyright (c) 2012 - 2013 John Steensen <john.steensen@live.com>
# All rights reserved. No warranty, explicit or implicit, provided.
# ----------------------------------------------------------------------------------------
# AWK Processing
# Updated 03DEC2012@1552 MST
# ----------------------------------------------------------------------------------------
# Warnings/Dependancy Notes
# AWK
# ----------------------------------------------------------------------------------------
BEGIN {
FS=";";
OFS=";";
CATEGORY="47";
IMAGE="imagepathfail";
URLKEY="urlkeyfail";
URLPATH="urlpathfail";
print "store", "websites", "attribute_set", "type", "category_ids", "sku", "has_options", "name", "image", "small_image", "thumbnail", "url_key", "url_path", "page_layout", "options_container", "msrp_enabled", "msrp_display_actual_price_type", "gift_message_available", "rsr_pn", "manufacturer_pn", "price", "cost", "weight", "msrp", "manufacturer", "status", "is_recurring", "visibility", "enable_googlecheckout", "tax_class_id", "description", "short_description", "qty", "min_qty", "use_config_min_qty", "is_qty_decimal", "backorders", "use_config_backorders", "min_sale_qty", "use_config_min_sale_qty", "max_sale_qty", "use_config_max_sale_qty", "is_in_stock", "notify_stock_qty", "use_config_notify_stock_qty", "manage_stock", "use_config_manage_stock", "stock_status_changed_auto", "use_config_qty_increments", "qty_increments", "use_config_enable_qty_inc", "enable_qty_increments", "is_decimal_divided", "stock_status_changed_automatically", "use_config_enable_qty_increments", "product_name", "store_id", "product_type_id";
}
{
# DEFINE CATEGORY
if ($4=="1") CATEGORY="2,3,4";
else if ($4=="2") CATEGORY="2,3,7";
else if ($4=="3") CATEGORY="2,3,8";
else if ($4=="4") CATEGORY="2,3,22,23";
else if ($4=="5") CATEGORY="2,3,5";
else if ($4=="7") CATEGORY="2,3,6";
else if ($4=="8") CATEGORY="2,27,28";
else if ($4=="9") CATEGORY="2,27,29";
else if ($4=="10") CATEGORY="2,9,13";
else if ($4=="11") CATEGORY="2,9,14";
else if ($4=="12") CATEGORY="2,35,38";
else if ($4=="13") CATEGORY="2,9,16";
else if ($4=="14") CATEGORY="2,35,37";
else if ($4=="15") CATEGORY="2,19,21";
else if ($4=="16") CATEGORY="2,9,15";
else if ($4=="17") CATEGORY="2,9,16";
else if ($4=="18") CATEGORY="2,19,20";
else if ($4=="20") CATEGORY="2,27,33";
else if ($4=="21") CATEGORY="2,9,17";
else if ($4=="22") CATEGORY="2,3,22,24";
else if ($4=="23") CATEGORY="2,3,22,25";
else if ($4=="24") CATEGORY="2,9,13";
else if ($4=="25") CATEGORY="2,40,43";
else if ($4=="26") CATEGORY="2,40,44";
else if ($4=="27") CATEGORY="2,3,22,26";
else if ($4=="28") CATEGORY="2,27,31";
else if ($4=="29") CATEGORY="2,27,32";
else if ($4=="30") CATEGORY="2,27,30";
else if ($4=="31") CATEGORY="2,27,34";
else if ($4=="32") CATEGORY="2,9,11";
else if ($4=="33") CATEGORY="2,35,36";
else if ($4=="34") CATEGORY="2,9,10";
else if ($4=="35") CATEGORY="2,9,18";
else if ($4=="36") CATEGORY="2,40,42";
else if ($4=="38") CATEGORY="2,40,41";
else if ($4=="39") CATEGORY="2,40,45";
else if ($4=="40") CATEGORY="2,35,39";
else if ($4=="41") CATEGORY="2,9,12";
else if ($4=="43") CATEGORY="2,9,12";
else if ($4=="01") CATEGORY="2,3,4";
else if ($4=="02") CATEGORY="2,3,7";
else if ($4=="03") CATEGORY="2,3,8";
else if ($4=="04") CATEGORY="2,3,22,23";
else if ($4=="05") CATEGORY="2,3,5";
else if ($4=="07") CATEGORY="2,3,6";
else if ($4=="08") CATEGORY="2,27,28";
else if ($4=="09") CATEGORY="2,27,29";
else CATEGORY="47";
# DEFINE IMAGE WITH PATH.
IMAGE="/5/1/"$1"_1.jpg";
# DEFINE URL KEY
URLKEY=$1;
# DEFINE URL PATH
URLPATH=$1".html";
print "admin", "base", "Default", "simple", CATEGORY, $1, "0", $3, IMAGE, IMAGE, IMAGE, URLKEY, URLPATH, "No layout updates", "Block after Info Column", "Use config", "Use config", "No", $1, $12, $6, $7, $8, $6, $11, "Enabled", "No", "Catalog, Search", "Yes", "Taxable Goods", $14, $14, $9, "0", "1", "0", "0", "1", "1", "1", "0", "1", "1", "0", "1", "0", "1", "0", "1", "0", "1", "0", "0", "0", "1", $3, "0", "simple";
}
END {}
John Steensen
  • 357
  • 1
  • 4
  • 9

6 Answers6

28

If you want:

add this to the existing script.

You can insert additional \"\" in each argument of print like this:

print "\"admin\"", "\"base\"", ...

Edited:

Yes, perhaps seting OFS is better solution:

BEGIN { OFS="\";\""; } ... print "\"admin", ...., "simple\"";

Nikolai Popov
  • 5,397
  • 1
  • 17
  • 18
24
awk '{for (i=1;i<=NF;i++) $i="\""$i"\""}1' FS=";" OFS=";" input
jaypal singh
  • 74,723
  • 23
  • 102
  • 147
5

To add quotes around the entries you could use a simple AWK loop:

Script - simple_loop.awk

BEGIN {FS=";"}
{
  for(i=1;i<NF;i++){
       printf("\"%s\";", $i); 
  }
  printf("\"%s\"\n",$NF);
}

For instance

echo "admin;base;5.11 HOLSTER SHIRT L WHITE;;" | awk -f simple_loop.awk

Should output

"admin";"base";"5.11 HOLSTER SHIRT L WHITE";"";""
Dima Chubarov
  • 16,199
  • 6
  • 40
  • 76
  • Works perfect, however I did come up with a way to keep it inside the original script, versus adding a second operation. Thanks for the input though! – John Steensen Jan 21 '13 at 20:21
  • @JohnSteensen When reading the question for the first time I expected to see an example of input data and an example of expected output and actually mistook your "current output" sample for the input data. Seeing the three samples together would have helped me and possibly others to quickly understand the question. – Dima Chubarov Jan 22 '13 at 04:58
  • Definitely. I really appreciate the input and will keep that in mind in the future. My apologizes. – John Steensen Jan 22 '13 at 16:43
5

In this case I would use a sed expression instead of AWK.

If your data is in a file called data.txt, you can get it writing:

sed "s/;/\";\"/g;s/^/\"/;s/$/\"/" data.txt

That will print the result to the std output, but if you want to replace the content of the file just use sed -i this way:

sed -i "s/;/\";\"/g;s/^/\"/;s/$/\"/" data.txt

And that is all !!

Explanation: The sed expression consists in three sed commands separated by ";" that you can run separately:

sed "s/;/\";\"/g

It makes a substitution (that is what means the first "s"), then the "/" (the default separator), ";" that is what we want to replace. Then the second separator "/", and the replacement: \";\" It is a sequence: escaped quote, a semicolon and a escaped quote. So with this command we will replace semicolons ; by ";". The last /g means that each ; will be replaced (not only the first smicolon).

If the input was a;b;c after this running the first command it will be a";"b";"c.

Now we need to add quotes in the beginning (^ in a regular expression) and in the end ($). So that is what it means:

sed "s/^/\"/" // the first quote

And

sed "s/$/\"/" // the last quote

Getting the desired output:

"a";"b";"c"
arutaku
  • 5,937
  • 1
  • 24
  • 38
  • So add it to the bash script versus modifying the AWK output? (Curious why that would be preferable) – John Steensen Jan 20 '13 at 18:51
  • Yes, you use `sed` as a bash command instead of `AWK` command. You can run `sed` and the output will be the replacement in the std output. If you want to replace the content of the file just use `sed -i` (it will not produce any output to the std). If you have any doubt about the `sed` expression, I can explain it more in detail, just let me know ;-) – arutaku Jan 20 '13 at 19:06
  • Yes, actually I am very curious about what means what in the description. Doesn't quite look to be regular expressions. – John Steensen Jan 20 '13 at 19:17
  • I have edited my answer, explaining the `sed` commands step by step. You can run them separately to check its result. Check it !! – arutaku Jan 20 '13 at 19:55
  • I went with the AWK, as it let me relatively keep things language consistant. Thanks for the explanation - I'm using it as a diving off point into better familiarizing myself with SED. – John Steensen Jan 21 '13 at 20:20
1

Let me refactor your program a bit:

/#!/bin/awk -f
BEGIN {
    FS=";";
    OFS="\";\"";
    IMAGE="imagepathfail";
    URLKEY="urlkeyfail";
    URLPATH="urlpathfail";
    # DEFINE CATEGORY
    CATEGORY["1"] ="2,3,4";
    CATEGORY["2"] ="2,3,7";
    CATEGORY["3"] ="2,3,8";
    CATEGORY["4"] ="2,3,22,23";
    CATEGORY["5"] ="2,3,5";
    CATEGORY["7"] ="2,3,6";
    CATEGORY["8"] ="2,27,28";
    CATEGORY["9"] ="2,27,29";
    CATEGORY["10"]="2,9,13";
    CATEGORY["11"]="2,9,14";
    CATEGORY["12"]="2,35,38";
    CATEGORY["13"]="2,9,16";
    CATEGORY["14"]="2,35,37";
    CATEGORY["15"]="2,19,21";
    CATEGORY["16"]="2,9,15";
    CATEGORY["17"]="2,9,16";
    CATEGORY["18"]="2,19,20";
    CATEGORY["20"]="2,27,33";
    CATEGORY["21"]="2,9,17";
    CATEGORY["22"]="2,3,22,24";
    CATEGORY["23"]="2,3,22,25";
    CATEGORY["24"]="2,9,13";
    CATEGORY["25"]="2,40,43";
    CATEGORY["26"]="2,40,44";
    CATEGORY["27"]="2,3,22,26";
    CATEGORY["28"]="2,27,31";
    CATEGORY["29"]="2,27,32";
    CATEGORY["30"]="2,27,30";
    CATEGORY["31"]="2,27,34";
    CATEGORY["32"]="2,9,11";
    CATEGORY["33"]="2,35,36";
    CATEGORY["34"]="2,9,10";
    CATEGORY["35"]="2,9,18";
    CATEGORY["36"]="2,40,42";
    CATEGORY["38"]="2,40,41";
    CATEGORY["39"]="2,40,45";
    CATEGORY["40"]="2,35,39";
    CATEGORY["41"]="2,9,12";
    CATEGORY["43"]="2,9,12";
    CATEGORY["01"]="2,3,4";
    CATEGORY["02"]="2,3,7";
    CATEGORY["03"]="2,3,8";
    CATEGORY["04"]="2,3,22,23";
    CATEGORY["05"]="2,3,5";
    CATEGORY["07"]="2,3,6";
    CATEGORY["08"]="2,27,28";
    CATEGORY["09"]="2,27,29";
    # header
    print "store", "websites", "attribute_set", "type", "category_ids", "sku", "has_options", "name", "image", "small_image", "thumbnail", "url_key", "url_path", "page_layout", "options_container", "msrp_enabled", "msrp_display_actual_price_type", "gift_message_available", "rsr_pn", "manufacturer_pn", "price", "cost", "weight", "msrp", "manufacturer", "status", "is_recurring", "visibility", "enable_googlecheckout", "tax_class_id", "description", "short_description", "qty", "min_qty", "use_config_min_qty", "is_qty_decimal", "backorders", "use_config_backorders", "min_sale_qty", "use_config_min_sale_qty", "max_sale_qty", "use_config_max_sale_qty", "is_in_stock", "notify_stock_qty", "use_config_notify_stock_qty", "manage_stock", "use_config_manage_stock", "stock_status_changed_auto", "use_config_qty_increments", "qty_increments", "use_config_enable_qty_inc", "enable_qty_increments", "is_decimal_divided", "stock_status_changed_automatically", "use_config_enable_qty_increments", "product_name", "store_id", "product_type_id";
}

function getCategory(val) {
    return (val in CATEGORY) ? CATEGORY[val] : "47";
}

{
    # DEFINE IMAGE WITH PATH.
    IMAGE="/5/1/"$1"_1.jpg";
    # DEFINE URL KEY
    URLKEY=$1;
    # DEFINE URL PATH
    URLPATH=$1".html";
    print "\" "admin", "base", "Default", "simple", getCategory($4), $1, "0", $3, IMAGE, IMAGE, IMAGE, URLKEY, URLPATH, "No layout updates", "Block after Info Column", "Use config", "Use config", "No", $1, $12, $6, $7, $8, $6, $11, "Enabled", "No", "Catalog, Search", "Yes", "Taxable Goods", $14, $14, $9, "0", "1", "0", "0", "1", "1", "1", "0", "1", "1", "0", "1", "0", "1", "0", "1", "0", "1", "0", "0", "0", "1", $3, "0", "simple" "\"";
}
glenn jackman
  • 238,783
  • 38
  • 220
  • 352
0

In my opinion, we could use printf (formated output) and double quote is obtain with \" into format string.

e.g.

gawk 'BEGIN{print "WKT,punto";}{printf "\"LINESTRING Z (%f %f 0,%f %f 0)\",\"%d"\n",$3,$2,$4,$5,$1}' Frecce_geoloc_12-24.txt

output:

                  $3         $2           $4         $5            $1

"LINESTRING Z (-72.319686 -50.609328 0,-50.609309 -72.319499 0)","6582"

"LINESTRING Z (-72.319245 -50.609215 0,-50.609195 -72.319052 0)","6583"

"LINESTRING Z (-72.318799 -50.609101 0,-50.609081 -72.318607 0)","6584"

"LINESTRING Z (-72.318366 -50.608990 0,-50.608969 -72.318169 0)","6585"

natan
  • 11
  • 3