You can use:
SELECT TRIM(
REGEXP_SUBSTR(
LOWER(value),
-- Leading space
' '
-- Dosage form
|| '((caps|tab|powd|syr|aero|liq|susp)[a-z]*|solution|lotion|spray)'
-- Optional trailing punctuation then "extended release"
|| '([^a-z0-9]*extended release)?'
)
) AS dosage_form
FROM table_name;
Which, for your sample data:
CREATE TABLE table_name (value) AS
SELECT 'Concerta 18 mg/24 hr oral tablet, extended release' FROM DUAL UNION ALL
SELECT 'glipiZIDE 10 mg oral tablet, extended release' FROM DUAL UNION ALL
SELECT 'Adderall XR 10 mg oral capsule, extended release' FROM DUAL;
Outputs:
DOSAGE_FORM |
tablet, extended release |
tablet, extended release |
capsule, extended release |
Alternatively, you can list all the dosage forms in a table:
CREATE TABLE dosage_forms (form) AS
SELECT 'capsule' FROM DUAL UNION ALL
SELECT 'capslet' FROM DUAL UNION ALL
SELECT 'caps' FROM DUAL UNION ALL
SELECT 'tablet' FROM DUAL UNION ALL
SELECT 'powder' FROM DUAL UNION ALL
SELECT 'syringe' FROM DUAL UNION ALL
SELECT 'aerosol' FROM DUAL UNION ALL
SELECT 'liquid' FROM DUAL UNION ALL
SELECT 'suspension' FROM DUAL UNION ALL
SELECT 'solution' FROM DUAL UNION ALL
SELECT 'lotion' FROM DUAL UNION ALL
SELECT 'spray' FROM DUAL;
CREATE TABLE dosage_form_modifiers (form_modifier) AS
SELECT 'extended release' FROM DUAL;
then use the query (from Oracle 12):
SELECT *
FROM table_name t
LEFT OUTER JOIN LATERAL (
SELECT REGEXP_SUBSTR(
t.value,
'( |^)'
|| '('
|| form
|| form_modifier
|| ')',
1,
1,
'i',
2
) AS match
FROM dosage_forms df
CROSS JOIN (
SELECT '[^a-z0-9]*' || form_modifier AS form_modifier
FROM dosage_form_modifiers
UNION ALL
SELECT NULL FROM DUAL
) dfm
ORDER BY
LENGTH(match) DESC NULLS LAST,
match NULLS LAST
FETCH FIRST ROW ONLY
)
ON (1 = 1)
db<>fiddle here