0

I have a SQL table:

 CREATE TABLE "text_files"
( "FILE_NAME" VARCHAR2(4000 BYTE),
"FILE_CONTENT" CLOB
);

I am trying to run the following query:

WITH rsqfc ( file_content, line, rn, max_rn, nm1, prv, depth ) AS (
  SELECT file_content,
         REGEXP_SUBSTR( file_content, '.+?(' || CHR(10) || '|$)', 1, 1 ),
         1,
         REGEXP_COUNT( file_content, '.+?(' || CHR(10) || '|$)' ) - 1,
         CASE SUBSTR( file_content, 1, 4 ) WHEN 'NM1*' THEN 1 ELSE 0 END,
         CASE SUBSTR( file_content, 1, 4 ) WHEN 'PRV*' THEN 1 ELSE 0 END,
         CASE SUBSTR( file_content, 1, 4 ) WHEN 'NM1*' THEN 1 WHEN 'PRV*' THEN 2 ELSE 0 END
  FROM   text_files
  UNION ALL
  SELECT file_content,
         REGEXP_SUBSTR( file_content, '.+?(' || CHR(10) || '|$)', 1, rn + 1 ),
         rn + 1,
         max_rn,
         CASE SUBSTR( REGEXP_SUBSTR( file_content, '.+?(' || CHR(10) || '|$)', 1, rn + 1 ), 1, 4 ) WHEN 'NM1*' THEN nm1 + 1 ELSE nm1 END,
         CASE SUBSTR( REGEXP_SUBSTR( file_content, '.+?(' || CHR(10) || '|$)', 1, rn + 1 ), 1, 4 ) WHEN 'PRV*' THEN prv + 1 ELSE prv END,
         CASE SUBSTR( REGEXP_SUBSTR( file_content, '.+?(' || CHR(10) || '|$)', 1, rn + 1 ), 1, 4 ) WHEN 'NM1*' THEN 1 WHEN 'PRV*' THEN 2 ELSE depth END
  FROM   rsqfc
  WHERE  rn < max_rn
)
SELECT CASE depth
       WHEN 2 THEN '2010A' || CHR( 64 + prv )
       WHEN 1 THEN '1000' || CHR( 64 + nm1 )
       WHEN 0 THEN '000'
       END AS "LOOP",
       line
FROM   rsqfc;

However it gives

ORA-00932: inconsistent datatypes:expected CLOB got CHAR error

I understand that we cannot use CLOB for regex and where clause. But the content in CLOB is more than 4000 characters and when I try to alter table with VARCHAR2(4000) field and copy CLOB to the new field, it doesn't copy.

It gives me

ORA-22835: Buffer too small for CLOB to CHAR or BLOB to RAW conversion (actual: 4436, maximum: 4000)

I cannot break the text into multiple fields. I need the whole text in 1 field.

Any help would be appreciated.

hotfix
  • 3,376
  • 20
  • 36
dang
  • 2,342
  • 5
  • 44
  • 91
  • Not a solution but it is discussed here as well https://stackoverflow.com/questions/7357999/displaying-clob-column-with-4000-characters . Must it be SQL or can your use pl/sql? – F.Madsen Mar 15 '19 at 08:31
  • It should be SQL. Is it possible to tweak my query to support it? – dang Mar 15 '19 at 08:43
  • I don't thing you will be able to get more than 4000 chars in one column. pl/sql would give you 32K. – F.Madsen Mar 15 '19 at 08:46
  • But you could still create a pl/sql function and call that from your select statement. The function would do your regexp handling and you can still get the data using a select statement as long as you dont have to return more than 4000 bytes. Would that be ok? – F.Madsen Mar 15 '19 at 08:55
  • Sure, let's try that. – dang Mar 15 '19 at 08:59
  • By the way, you might regret naming the table `"text_files"`. If you just name it the standard way as `text_files` you won't be forced to repeat the exact quotes and lowercase every time you refer to it. – William Robertson Mar 16 '19 at 09:22

3 Answers3

1

Use a pipelined function:

Oracle Setup - Test Data:

CREATE TABLE text_files (
  FILE_NAME    VARCHAR2(4000 BYTE),
  FILE_CONTENT CLOB
);

INSERT INTO text_files VALUES (
  'test1',
  'ISA*00*AUTHORIZAT*00*SECURITY I*ZZ*000000060000000*ZZ*000000010000000*110705*1132*^*00501*110705001*0*T*:~
GS*HC*00000006*00000001*20110705*113253*110705001*X*005010X222A1~
ST*837*0021*005010X222~
BHT*0019*00*244579*20061015*1023*CH~
NM1*41*2*PREMIER BILLING SERVICE*****46*TGJ23~
PER*IC*JERRY*TE*3055552222*EX*231~
NM1*40*2*KEY INSURANCE COMPANY*****46*66783JJT~
HL*1**20*1~
PRV*BI*PXC*203BF0100Y~
NM1*85*2*BEN KILDARE SERVICE*****XX*9876543210~
N3*234 SEAWAY ST~
N4*MIAMI*FL*33111~'
);

Oracle Setup - Pipelined Function:

CREATE TYPE line_detail AS OBJECT(
  line_no NUMBER(38,0),
  line    CLOB,
  loop    VARCHAR2(6)
);
/

CREATE TYPE line_detail_array AS TABLE OF line_detail;
/

CREATE FUNCTION get_Line_Detail(
  file_content IN CLOB
) RETURN line_detail_array PIPELINED
AS
  p_line       CLOB;
  p_loop       VARCHAR2(6) := '000';
  p_substr     CHAR(4);
  p_line_no    PLS_INTEGER := 0;
  p_line_start PLS_INTEGER := 1;
  p_line_end   PLS_INTEGER;
  p_line_len   PLS_INTEGER;
  p_length     PLS_INTEGER;
  p_nm1        PLS_INTEGER := 0;
  p_prv        PLS_INTEGER := 0;
BEGIN
  IF file_content IS NULL THEN
    RETURN;
  END IF;

  p_length := DBMS_LOB.GETLENGTH( file_content );

  LOOP
    p_line       := EMPTY_CLOB();
    p_line_no    := p_line_no + 1;
    p_substr     := DBMS_LOB.SUBSTR( file_content, 4, p_line_start );
    IF p_substr = 'PRV*' THEN
      p_prv := p_prv + 1;
      p_loop := '2010A' || CHR( 64 + p_prv );
    ELSIF p_substr = 'NM1*' THEN
      p_nm1 := p_nm1 + 1;
      p_loop := '1000' || CHR( 64 + p_nm1 );
    END IF;

    p_line_end   := DBMS_LOB.INSTR( file_content, CHR(10), p_line_start );
    IF p_line_end = 0 THEN
      p_line_end := p_length;
    END IF;

    LOOP
      EXIT WHEN p_line_start > p_line_end;
      p_line_len := LEAST( 4000, p_line_end - p_line_start + 1 );
      p_line := p_line || DBMS_LOB.SUBSTR( file_content, p_line_len, p_line_start );
      p_line_start := p_line_start + p_line_len;
    END LOOP;
    PIPE ROW( line_detail( p_line_no, p_line, p_loop ) );
    EXIT WHEN p_line_end >= p_length;
  END LOOP;
END;
/

Query:

SELECT c.*
FROM   text_files t
       CROSS JOIN TABLE( get_Line_Detail( t.file_content ) ) c;

Output:

LINE_NO | LINE                                                                                                           | LOOP  
------: | :------------------------------------------------------------------------------------------------------------- | :-----
      1 | ISA*00*AUTHORIZAT*00*SECURITY I*ZZ*000000060000000*ZZ*000000010000000*110705*1132*^*00501*110705001*0*T*:~<br> | 000   
      2 | GS*HC*00000006*00000001*20110705*113253*110705001*X*005010X222A1~<br>                                          | 000   
      3 | ST*837*0021*005010X222~<br>                                                                                    | 000   
      4 | BHT*0019*00*244579*20061015*1023*CH~<br>                                                                       | 000   
      5 | NM1*41*2*PREMIER BILLING SERVICE*****46*TGJ23~<br>                                                             | 1000A 
      6 | PER*IC*JERRY*TE*3055552222*EX*231~<br>                                                                         | 1000A 
      7 | NM1*40*2*KEY INSURANCE COMPANY*****46*66783JJT~<br>                                                            | 1000B 
      8 | HL*1**20*1~<br>                                                                                                | 1000B 
      9 | PRV*BI*PXC*203BF0100Y~<br>                                                                                     | 2010AA
     10 | NM1*85*2*BEN KILDARE SERVICE*****XX*9876543210~<br>                                                            | 1000C 
     11 | N3*234 SEAWAY ST~<br>                                                                                          | 1000C 
     12 | N4*MIAMI*FL*33111~                                                                                             | 1000C 

db<>fiddle here

MT0
  • 143,790
  • 11
  • 59
  • 117
0

Assuming that no individual line of the file will be more than 4000 bytes long then:

Oracle Setup:

CREATE TABLE text_files (
  FILE_NAME    VARCHAR2(4000 BYTE),
  FILE_CONTENT CLOB
);

INSERT INTO text_files VALUES (
  'test1',
  'ISA*00*AUTHORIZAT*00*SECURITY I*ZZ*000000060000000*ZZ*000000010000000*110705*1132*^*00501*110705001*0*T*:~
GS*HC*00000006*00000001*20110705*113253*110705001*X*005010X222A1~
ST*837*0021*005010X222~
BHT*0019*00*244579*20061015*1023*CH~
NM1*41*2*PREMIER BILLING SERVICE*****46*TGJ23~
PER*IC*JERRY*TE*3055552222*EX*231~
NM1*40*2*KEY INSURANCE COMPANY*****46*66783JJT~
HL*1**20*1~
PRV*BI*PXC*203BF0100Y~
NM1*85*2*BEN KILDARE SERVICE*****XX*9876543210~
N3*234 SEAWAY ST~
N4*MIAMI*FL*33111~'
);

Query:

WITH rsqfc ( file_name, file_content, line_no, line, line_end, nm1, prv, depth ) AS (
  SELECT file_name,
         file_content,
         1,
         CASE
         WHEN DBMS_LOB.INSTR( file_content, CHR(10) ) = 0
         THEN DBMS_LOB.SUBSTR( file_content, DBMS_LOB.GETLENGTH( file_content ), 1 )
         ELSE DBMS_LOB.SUBSTR( file_content, DBMS_LOB.INSTR( file_content, CHR(10) ), 1 )
         END,
         DBMS_LOB.INSTR( file_content, CHR(10) ),
         CASE DBMS_LOB.SUBSTR( file_content, 4, 1 ) WHEN 'NM1*' THEN 1 ELSE 0 END,
         CASE DBMS_LOB.SUBSTR( file_content, 4, 1 ) WHEN 'PRV*' THEN 1 ELSE 0 END,
         CASE DBMS_LOB.SUBSTR( file_content, 4, 1 ) WHEN 'NM1*' THEN 1 WHEN 'PRV*' THEN 2 ELSE 0 END
  FROM   text_files
  UNION ALL
  SELECT file_name,
         file_content,
         line_no + 1,
         CASE
         WHEN DBMS_LOB.INSTR( file_content, CHR(10), line_end + 1 ) = 0
         THEN DBMS_LOB.SUBSTR(
                file_content,
                DBMS_LOB.GETLENGTH( file_content ) - line_end,
                line_end + 1
              )
         ELSE DBMS_LOB.SUBSTR(
                file_content,
                DBMS_LOB.INSTR( file_content, CHR(10), line_end + 1 ) - line_end,
                line_end + 1
              )
         END,
         DBMS_LOB.INSTR( file_content, CHR(10), line_end + 1 ),
         CASE DBMS_LOB.SUBSTR( file_content, 4, line_end + 1 ) WHEN 'NM1*' THEN nm1 + 1 ELSE nm1 END,
         CASE DBMS_LOB.SUBSTR( file_content, 4, line_end + 1 ) WHEN 'PRV*' THEN prv + 1 ELSE prv END,
         CASE DBMS_LOB.SUBSTR( file_content, 4, line_end + 1 ) WHEN 'NM1*' THEN 1 WHEN 'PRV*' THEN 2 ELSE depth END
  FROM   rsqfc
  WHERE  line_end > 0
)
SELECT line_no,
       CASE depth
       WHEN 2 THEN '2010A' || CHR( 64 + prv )
       WHEN 1 THEN '1000' || CHR( 64 + nm1 )
       WHEN 0 THEN '000'
       END AS "LOOP",
       line
FROM   rsqfc;

Output:

LINE_NO | LOOP   | LINE                                                                                                          
------: | :----- | :-------------------------------------------------------------------------------------------------------------
      1 | 000    | ISA*00*AUTHORIZAT*00*SECURITY I*ZZ*000000060000000*ZZ*000000010000000*110705*1132*^*00501*110705001*0*T*:~<br>
      2 | 000    | GS*HC*00000006*00000001*20110705*113253*110705001*X*005010X222A1~<br>                                         
      3 | 000    | ST*837*0021*005010X222~<br>                                                                                   
      4 | 000    | BHT*0019*00*244579*20061015*1023*CH~<br>                                                                      
      5 | 1000A  | NM1*41*2*PREMIER BILLING SERVICE*****46*TGJ23~<br>                                                            
      6 | 1000A  | PER*IC*JERRY*TE*3055552222*EX*231~<br>                                                                        
      7 | 1000B  | NM1*40*2*KEY INSURANCE COMPANY*****46*66783JJT~<br>                                                           
      8 | 1000B  | HL*1**20*1~<br>                                                                                               
      9 | 2010AA | PRV*BI*PXC*203BF0100Y~<br>                                                                                    
     10 | 1000C  | NM1*85*2*BEN KILDARE SERVICE*****XX*9876543210~<br>                                                           
     11 | 1000C  | N3*234 SEAWAY ST~<br>                                                                                         
     12 | 1000C  | N4*MIAMI*FL*33111~                                                                                            

db<>fiddle here

MT0
  • 143,790
  • 11
  • 59
  • 117
-2

Then here is a stub for it..

create or replace function get_depth(sFileName in VARCHAR2)
return varchar2 as
    sBuffer varchar2(32000);
    iResult integer := 0;
begin
    --  
    for rec in (select file_content from "text_files" where file_name = sFileName) loop
          sBuffer := dbms_lob.substr(rec.file_content,32000);
          exit;
    end loop;
    if sBuffer is not null then
    -- Your regexp on sBuffert



    end if;
    return iResult;
end get_depth;

And the select:

select get_depth('myfile') depth from dual;
F.Madsen
  • 702
  • 4
  • 6