How to parse a specific part of string that starts with a specific str to the first space in Oracle

Question

I have dataset like this:

SELECT 
    1 as text_id,
    'The first is A.ACCOUNT_ID and the second one is B.IDENTITY_NO and third one is plate_number .' as full_text
FROM DUAL
UNION
SELECT 
    2,
    'The first is ARC.PREV_RECORD and the second one is ARC.NEXT_RECORD .'
FROM DUAL

I should parse all the phrases starts with " is " ; ends with first space character for each row.

So the result that I want to achieve from full_text is:

text_id	parsed_part
1	A.ACCOUNT_ID
1	B.IDENTITY_NO
1	plate_number
2	ARC.PREV_RECORD
2	ARC.NEXT_RECORD

It could be less or more than 3 phrases so the row count of result could be change.

I tried to reverse text first and find the part between " si " and space but couldn't succeed

reverse(regexp_substr(reverse(full_text), ' si ([^_]*) ',1, 1))

Littlefoot · Answer 1 · 2022-01-28T06:08:55.157

Based on what you posted, see if this helps.

Sample data

SQL> with test as
  2  (SELECT
  3      1 as text_id,
  4      'The first is A.ACCOUNT_ID and the second one is B.IDENTITY_NO and third one is plate_number .' as full_text
  5  FROM DUAL
  6  UNION
  7  SELECT
  8      2,
  9      'The first is ARC.PREV_RECORD and the second one is ARC.NEXT_RECORD .'
 10  FROM DUAL
 11  )

Query itself

 12  select text_id,
 13    ltrim(regexp_substr(full_text, 'is \S+', 1, column_value), 'is ') parsed_part
 14  from test cross join
 15    table(cast(multiset(select level from dual
 16                        connect by level <= regexp_count(full_text, ' is ')
 17                       ) as sys.odcinumberlist))
 18  order by text_id, column_value;

   TEXT_ID PARSED_PART
---------- --------------------
         1 A.ACCOUNT_ID
         1 B.IDENTITY_NO
         1 plate_number
         2 ARC.PREV_RECORD
         2 ARC.NEXT_RECORD

SQL>

regexp_substr searches for the is string followed by two words split by a dot
ltrim removes leading is

`\w+.\w+` should be `\S+`, because the requirement was "ends with first space character" — astentx, Jan 27 '22 at 23:16

score 0 · Answer 2 · answered Jan 27 '22 at 21:08

Although Littlefoot beat me to it, here's my approach:

WITH tbl(text_id, full_text) AS (
  SELECT 1, 'The first is A.ACCOUNT_ID and the second one is B.IDENTITY_NO and third one is plate_number .' FROM DUAL UNION ALL
  SELECT 2, 'The first is ARC.PREV_RECORD and the second one is ARC.NEXT_RECORD .' FROM DUAL
)
SELECT text_id,  
       REGEXP_SUBSTR(full_text, ' is (.*?)( |$)', 1, LEVEL, NULL, 1) parsed_part
FROM tbl
CONNECT BY LEVEL <= REGEXP_COUNT(full_text, ' is .*?( |$)')
  AND PRIOR text_id = text_id
  AND PRIOR SYS_GUID() IS NOT NULL; 


   TEXT_ID PARSED_PART         
---------- --------------------
         1 A.ACCOUNT_ID        
         1 B.IDENTITY_NO       
         1 plate_number        
         2 ARC.PREV_RECORD     
         2 ARC.NEXT_RECORD     

5 rows selected.

score 0 · Accepted Answer · answered Jan 27 '22 at 21:28

With a lateral join on the unfolding of the dotted words via a connect by.

WITH DATA AS (
SELECT 1 as textid, 'The first is A.ACCOUNT_ID and the second one is B.IDENTITY_NO and third one is plate_number .' as full_text FROM DUAL
UNION ALL SELECT 2, 'The first is ARC.PREV_RECORD and the second one is ARC.NEXT_RECORD .' FROM DUAL
) 
SELECT t.textid, w.word
FROM DATA t
CROSS JOIN LATERAL (
  SELECT level AS lvl, REGEXP_SUBSTR(full_text, ' is ([A-Z._]+)',1, LEVEL, 'i', 1) AS word 
  FROM DUAL
  CONNECT BY LEVEL <= REGEXP_COUNT(full_text, ' is ([A-Z._]+)', 1, 'i')
) w
ORDER BY t.textid;

TEXTID | WORD           
-----: | :--------------
     1 | A.ACCOUNT_ID   
     1 | B.IDENTITY_NO  
     1 | plate_number   
     2 | ARC.PREV_RECORD
     2 | ARC.NEXT_RECORD

db<>fiddle here

score 0 · Answer 4 · answered Jan 27 '22 at 23:20

You do not need to use (slow) regular expressions and can do it with simple string functions in a recursive sub-query:

WITH bounds (text_id, full_text, start_pos, end_pos) AS (
  SELECT text_id,
         full_text,
         INSTR(full_text, ' is ', 1),
         INSTR(full_text, ' ', INSTR(full_text, ' is ', 1) + 4 )
  FROM   table_name
  WHERE  INSTR(full_text, ' is ', 1) > 0
UNION ALL
  SELECT text_id,
         full_text,
         INSTR(full_text, ' is ', end_pos),
         INSTR(full_text, ' ', INSTR(full_text, ' is ', end_pos) + 4 )
  FROM   bounds
  WHERE  start_pos > 0
  AND    end_pos > 0
)
SEARCH DEPTH FIRST BY text_id SET order_rn
SELECT text_id,
       SUBSTR(full_text, start_pos + 4, end_pos - start_pos - 4) AS parsed_part
FROM   bounds
WHERE  start_pos > 0
AND    end_pos > 0;

Which, for the sample data:

CREATE TABLE table_name (text_id, full_text) AS
SELECT 1,
       'The first is A.ACCOUNT_ID and the second one is B.IDENTITY_NO and third one is plate_number .'
FROM   DUAL
UNION ALL
SELECT 2,
       'The first is ARC.PREV_RECORD and the second one is ARC.NEXT_RECORD .'
FROM   DUAL

Outputs:

TEXT_ID PARSED_PART

1 A.ACCOUNT_ID

1 B.IDENTITY_NO

1 plate_number

2 ARC.PREV_RECORD

2 ARC.NEXT_RECORD

db<>fiddle here

How to parse a specific part of string that starts with a specific str to the first space in Oracle

4 Answers4