It seems you need:
#if need only values where mask is True, else get NaNs
mask = df.text.str.contains('(', regex=False)
df.loc[mask, 'RegionName'] = df.loc[mask, 'text'].str.extract(r'(.*?)\s*[\(\[]+.*[\n]*',
expand=False)
Or:
#if need processes values only where mask is True, else get original data
mask = df.text.str.contains('(', regex=False)
df['RegionName'] = df['text'].mask(mask, df['text'].str.extract(r'(.*?)\s*[\(\[]+.*[\n]*',
expand=False))
Or:
#if need processes values only if mask is True, else get another value like aaa or df['col']
mask = df.text.str.contains('(', regex=False)
df['RegionName']=np.where(mask,df['text'].str.extract(r'(.*?)\s*[\(\[]+.*[\n]*',expand=0),
'aaa')
For better understanding:
df = pd.DataFrame({'text':[' (1', '(', '4', '[7', '{8', '{7', ' [1']})
print (df)
text
0 (1
1 (
2 4
3 [7
4 {8
5 {7
6 [1
mask1 = df.text.str.contains('(', regex=False)
mask2 = df.text.str.contains('{', regex=False)
mask3 = df.text.str.contains('[', regex=False)
df['d'] = np.where(mask1, 1,
np.where(mask2, 3,
np.where(mask3, 2, 4)))
print (df)
text d
0 (1 1
1 ( 1
2 4 4
3 [7 2
4 {8 3
5 {7 3
6 [1 2
Another more complicated sample:
df = pd.DataFrame({'text':[' (1', '(', '4', '[ur', '{dFd', '{fGf', ' [io']})
print (df)
mask1 = df.text.str.contains('(', regex=False)
mask2 = df.text.str.contains('{', regex=False)
mask3 = df.text.str.contains('[', regex=False)
df['parsed'] = np.where(mask1, df.text.str.extract(r'(\d+)', expand=False),
np.where(mask2, df.text.str.extract(r'([A-Z]+)', expand=False),
np.where(mask3, df.text.str.extract('([uo])+', expand=False), 4)))
print (df)
text parsed
0 (1 1
1 ( NaN
2 4 4
3 [ur u
4 {dFd F
5 {fGf G
6 [io o