0

I like to export large amount of text data from db to file. The characterset in db is UTF8. The excepted result in the file is ISO8859P2 or MSWIN1250.

My db settings:

SELECT * FROM v$nls_parameters;
1   NLS_LANGUAGE    HUNGARIAN   0
2   NLS_TERRITORY   HUNGARY 0
9   NLS_CHARACTERSET    UTF8    0
10  NLS_SORT    HUNGARIAN   0
16  NLS_NCHAR_CHARACTERSET  UTF8    0
17  NLS_COMP    BINARY  0
18  NLS_LENGTH_SEMANTICS    CHAR    0
19  NLS_NCHAR_CONV_EXCP FALSE   0

select * from nls_database_parameters;
1   NLS_RDBMS_VERSION   12.1.0.2.0
2   NLS_NCHAR_CONV_EXCP FALSE
15  NLS_NCHAR_CHARACTERSET  UTF8
16  NLS_CHARACTERSET    UTF8
19  NLS_TERRITORY   AMERICA
20  NLS_LANGUAGE    AMERICAN

select * from nls_session_parameters;
1   NLS_LANGUAGE    HUNGARIAN
2   NLS_TERRITORY   HUNGARY
9   NLS_SORT    HUNGARIAN
15  NLS_COMP    BINARY
16  NLS_LENGTH_SEMANTICS    CHAR
17  NLS_NCHAR_CONV_EXCP FALSE

The file created at server directory (linux). I haven't more information for linux characterset settings.

The PLSQL code:

DECLARE
   v_fh     UTL_FILE.FILE_TYPE;
   v_eol    VARCHAR2(2);
   v_eollen PLS_INTEGER;
   CURSOR cur_sql IS

SELECT T3.ID_RESULT
      ,T3.column1 
FROM   table1
WHERE  id_result = 999999
  ;
   "ID_RESULT" DBMS_SQL.NUMBER_TABLE;
   "column1" DBMS_SQL.VARCHAR2A;

BEGIN
   EXECUTE IMMEDIATE 'ALTER SESSION SET NLS_DATE_FORMAT = ''YYYY.MM.DD HH24:MI:SS''';
   v_eol := CHR(13)||CHR(10);
   v_eollen := LENGTH(v_eol);
   v_fh := UTL_FILE.FOPEN('REP_DIR','result_test.csv','W', 32000);

   OPEN cur_sql;
   LOOP
      FETCH cur_sql
      BULK COLLECT INTO "ID_RESULT",
                        "column1",
                        LIMIT 1000;
      IF "ID_RESULT".COUNT > 0 THEN
         FOR i IN "ID_RESULT".FIRST .. "ID_RESULT".LAST LOOP

           UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8ISO8859P2','UTF8'));
           UTL_FILE.PUT_nchar(v_fh, v_eol);

           UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8MSWIN1250','UTF8'));
           UTL_FILE.PUT(v_fh, v_eol);

           UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8ISO8859P2'));
           UTL_FILE.PUT(v_fh, v_eol);

           UTL_FILE.PUT(v_fh, CONVERT("column1"(i),'EE8MSWIN1250'));
           UTL_FILE.PUT(v_fh, v_eol);

           UTL_FILE.PUT(v_fh, "column1"(i));
           UTL_FILE.PUT(v_fh, v_eol);             

           UTL_FILE.PUT(v_fh, utl_raw.cast_to_varchar2(utl_raw.convert(utl_raw.cast_to_raw("column1"(i) ),'HUNGARIAN_HUNGARY.EE8MSWIN1250', 'ENGLISH_UNITED KINGDOM.UTF8')));
           UTL_FILE.PUT(v_fh, v_eol);               

           UTL_FILE.fflush(v_fh);
         END LOOP;
      END IF;
      EXIT WHEN cur_sql%NOTFOUND;
   END LOOP;
   CLOSE cur_sql;
   UTL_FILE.FCLOSE(v_fh);
EXCEPTION
   WHEN 
        .........
      RAISE;
END;

The original value in db: value in hexa

The result in Notepad++ (encode in UTF8):

Csere Lajosn

Csere Lajosn

Csere Lajosn

Csere Lajosn

Csere Lajosné

Csere Lajosn

The result in Notepad++ (encode in ANSI, char set: windows-1250):

Csere Lajosn

Csere Lajosn

Csere Lajosn

Csere Lajosn

Csere Lajosné

Csere Lajosn

The result in Notepad++ (encode in ANSI, char set: iso-8859-2):

Csere Lajosn

Csere Lajosn

Csere Lajosn

Csere Lajosn

Csere LajosnĂŠ

Csere Lajosn

When i use CONVERT i lost "é" character. How can i do convert the string from UTF8 to ANSI?

Thanks, Zoltan

1 Answers1

0

This is from mine toolbox package - try it/tweak. Works with different code pages in-out.

function dump_dsv_fast(p_query in varchar2
                  ,p_filename in varchar2
                  ,p_dir in varchar2 default c_DEFAULT_DIRECTORY
                  ,p_separator in varchar2 default ';'
                  ,p_text_qualifier in varchar2 default ''
                  ,p_header in boolean default true
                  ,p_eol_format in varchar2 default chr(13) || chr(10)
                  ,p_characterset in varchar2 default 'EE8MSWIN1250'
                  ,p_write_mode in varchar2 default 'WB') return number


  /****************************************************
  Formats:
  AL32UTF8        --> full utf
  WE8MSWIN1252    --> no pl chars
  WE8MSWIN1250    --> pl ansi
  Limitations:   this one is faster filing buffer once

  * 2014/05/13 --> change to raw (end line character)
  * 2016/02/12 --> text qualifier
               --> header on/off
               --> line length adjust
               --> text q
  * 2016/03/09 --> codepage
  *****************************************************/
 is
  l_output      utl_file.file_type;
  l_theCursor   integer default dbms_sql.open_cursor;
  l_columnValue varchar2(32760);
  l_status      integer;
  l_separator   varchar2(10);
  l_colCnt      number default 0;
  l_descTbl     dbms_sql.desc_tab;
  l_cnt         number default 0;
  l_buffer      raw(32767) := null;
begin
  l_output := utl_file.fopen(p_dir, p_filename, p_write_mode, 32760);
  dbms_sql.parse(l_theCursor, p_query, dbms_sql.native);
  if p_header
  then
    dbms_sql.describe_columns(l_theCursor, l_colCnt, l_descTbl);
    l_separator := '';
    for i in 1 .. l_colCnt
    loop
      if p_characterset = 'EE8MSWIN1250'
      then
        utl_file.put_raw(l_output,
                         utl_raw.cast_to_raw(l_separator ||
                                              p_text_qualifier || l_descTbl(i)
                                              .col_name || p_text_qualifier));
      else
        utl_file.put_raw(l_output,
                         utl_i18n.string_to_raw(l_separator ||
                                                 p_text_qualifier || l_descTbl(i)
                                                 .col_name ||
                                                 p_text_qualifier,
                                                 p_characterset));
      end if;
      dbms_sql.define_column(l_theCursor, i, l_columnValue, 32760);
      l_separator := p_separator;
    end loop;
    utl_file.put_raw(l_output, utl_raw.cast_to_raw(p_eol_format));
  end if;
  for i in 1 .. 255
  loop
    begin
      dbms_sql.define_column(l_theCursor, i, l_columnValue, 32760);
      l_colCnt := i;
    exception
      when others then
        if (sqlcode = -1007)
        then
          exit;
        else
          raise;
        end if;
    end;
  end loop;

  dbms_sql.define_column(l_theCursor, 1, l_columnValue, 32760);

  l_status := dbms_sql.execute(l_theCursor);

  loop
    exit when(dbms_sql.fetch_rows(l_theCursor) <= 0);
    l_separator := '';
    for i in 1 .. l_colCnt
    loop
      dbms_sql.column_value(l_theCursor, i, l_columnValue);
      if p_characterset = 'EE8MSWIN1250'
      then
        l_buffer := l_buffer ||
                    utl_raw.cast_to_raw(l_separator || p_text_qualifier ||
                                        l_columnValue || p_text_qualifier);
      else
        l_buffer := l_buffer ||
                    utl_i18n.string_to_raw(l_separator || p_text_qualifier ||
                                           l_columnValue ||
                                           p_text_qualifier, p_characterset);
      end if;
      l_separator := p_separator;
    end loop;
    utl_file.put_raw(l_output,
                     l_buffer || utl_raw.cast_to_raw(p_eol_format));
    l_buffer := '';
    l_cnt    := l_cnt + 1;
  end loop;
  dbms_sql.close_cursor(l_theCursor);

  utl_file.fclose(l_output);
  return l_cnt;
end dump_dsv_fast;
jareeq
  • 311
  • 2
  • 9