I am trying to extract everything to the left of pin_delim. This works fine for id 1,2,3,6 but fails for 5. id 4 is expected to return no results. I can figure out how to stop pin_delim spliting on a dash -?
Any ideas on how to fix this and why is - being treated like a space?
I always want the last pin_delim string to the left.
The university of de pere,delaware,de,usa
RETURN 'The university of de pere,delaware,'
NOT 'The university of '
saint-vincent-de-paul,delaware,de,usa
RETURN 'saint-vincent-de-paul,delaware,de'
NOT 'saint-vincent-'
I have tried the following
WITH t2 AS (
WITH t1 AS (
SELECT 1 id,'middleDEword French,de, Polynesia.' pin_senetence,'de' pin_delimiter,'middleDEword French,' expected
UNION ALL
SELECT 2 id, 'Saint-Vincent-de-Paul,de,usa','de','Saint-Vincent-de-Paul,'
UNION ALL
SELECT 3 id,'HopiDEtal-de Saint Vincent de Paul,de,usa','de','HopiDEtal-de Saint Vincent de Paul,'
UNION ALL
SELECT 4 id,'middleDEword French, Polynesia.' pin_snetence,'de','middleDEword French, Polynesia.'
UNION ALL
SELECT 5 id,'Saint-Vincent-de-Paul,usa','de','Saint-Vincent-de-Paul,usa'
UNION ALL
SELECT 6 id,'HopiDEtal-de Saint DE Vincent de Paul,usa','de','HopiDEtal-de Saint DE Vincent ')
SELECT id,
pin_senetence,
pin_delimiter,
expected,
LENGTH(pin_delimiter) substr_sec_param,
LENGTH(REGEXP_EXTRACT(pin_senetence, CONCAT( r'(?i)\b', r'^(.*)', pin_delimiter, r'\b.*'))) AS pos
FROM t1
WHERE 1 = 1
AND REGEXP_CONTAINS(pin_senetence, CONCAT( r'(?i)\b', r'^(.*)', pin_delimiter, r'\b.*')))
SELECT id,
pin_senetence,
SUBSTR(pin_senetence, pos + 1, substr_sec_param) substr_val,
SUBSTR(pin_senetence, 1, pos) actual,
expected,
IF(SUBSTR(pin_senetence, 1, pos) = expected, TRUE, FALSE) is_correct,
pos,
substr_sec_param
FROM t2;
|id |pin_senetence |substr_val|actual |expected |is_correct|pos|substr_sec_param|
|---|-----------------------------------------|----------|-----------------------------------|-----------------------------------|----------|---|----------------|
|1 |middleDEword French,de, Polynesia. |de |middleDEword French, |middleDEword French, |TRUE |20 |2 |
|2 |Saint-Vincent-de-Paul,de,usa |de |Saint-Vincent-de-Paul, |Saint-Vincent-de-Paul, |TRUE |22 |2 |
|3 |HopiDEtal-de Saint Vincent de Paul,de,usa|de |HopiDEtal-de Saint Vincent de Paul,|HopiDEtal-de Saint Vincent de Paul,|TRUE |35 |2 |
|5 |Saint-Vincent-de-Paul,usa |de |Saint-Vincent- |Saint-Vincent-de-Paul,usa |FALSE |14 |2 |
|6 |HopiDEtal-de Saint DE Vincent de Paul,usa|de |HopiDEtal-de Saint DE Vincent |HopiDEtal-de Saint DE Vincent |TRUE |30 |2 |