2

I want to split the string by space and with special characters if any. Ex: For expressing mobile switching center (signal strength).

Currently I am using the regex to split the string and I am not able to achieve both space and special characters split.

insert into tmp(word)
    select     regexp_substr('For expressing mobile switching center 
    (signal strength).', '(.*?)([[:space:]]|$)', 1, level, null, 1 ) as token
    from       dual
    connect by level <= regexp_count('For expressing mobile switching center (signal strength).', '[[:space:]/:]+') + 1

CREATE TABLE TMP(WORD VARCHAR2(4000));

Current Output: For
expressing
mobile
switching
center
(signal
strength).

Expected Output: For
expressing
mobile
switching
center
(
signal
strength
)
.

Updated Code:

insert into tmp(word)
select     regexp_substr('For expressing mobile switching center (signal strength).', '(.*?)([[:space:]()]|$)', 1, level, null, 1 ) as token
from       dual
connect by level <= regexp_count('For expressing mobile switching center (signal strength).', '(.*?)([[:space:]()]|$)')+ 1

Result:
For
expressing
mobile
switching
center
(null)
signal
strength
.
(null)
(null)
ABY
  • 393
  • 2
  • 11
  • you need to include `()` in you character class, [`Regex Demo`](https://regex101.com/r/BcdAKM/1/) – Code Maniac Sep 13 '19 at 11:50
  • I tried this and updated the code above. Looks like I still need to modify. Could you please take a look? – ABY Sep 13 '19 at 12:02

3 Answers3

0

This is DB2 but hopefully you can translate it:

with data (s) as (values
('For expressing mobile switching center (signal strength).')
),
     tally (n) as (
select row_number() over (order by 1)
from   (values (0),(0),(0),(0),(0),(0),(0),(0)) x (n)
cross  join (values (0),(0),(0),(0),(0),(0),(0),(0)) y (n)
)
select regexp_substr(s,'([A-Za-z]+|\(|\)|\.)', 1, n)
from   data
cross  join tally 
where  n <= regexp_count(s,'([A-Za-z]+|\(|\)|\.)')
user2398621
  • 86
  • 1
  • 3
0

I tweaked your regexp so it's searching for (a) a parenthesis character or (b) a series of characters which aren't spaces or parentheses.

select     regexp_substr('For expressing mobile switching center (signal strength).', '[()]|[^[:space:]()]+', 1, level, null) as token
from       dual
connect by level <= regexp_count('For expressing mobile switching center (signal strength).', '[()]|[^[:space:]()]+');
kfinity
  • 8,581
  • 1
  • 13
  • 20
0

This will split at any character that is not alpha or numeric:

WITH
    aset AS( SELECT 'abc def;ghi|hkl () 0W-' AS tobesplit FROM DUAL ),
    splitup ( VALUE, rest ) AS
        (SELECT SUBSTR( tobesplit
                      , 1
                      , REGEXP_INSTR( tobesplit || ' ',  '[^a-zA-Z0-9]' ) - 1 )
              , SUBSTR( tobesplit, REGEXP_INSTR( tobesplit || ' ',  '[^a-zA-Z0-9]' ) + 1 ) || ' '
           FROM aset
         UNION ALL
         SELECT SUBSTR( rest
                      , 1
                      , REGEXP_INSTR( rest,  '[^a-zA-Z0-9]' ) - 1 )
              , SUBSTR( rest, REGEXP_INSTR( rest || ' ',  '[^a-zA-Z0-9]' ) + 1 )
           FROM splitup
          WHERE rest IS NOT NULL)
SELECT value 
  FROM splitup where value is not null;
Brian Leach
  • 2,025
  • 1
  • 11
  • 14