0

I have a table like this (first column contains id adn second column is varchar which contains string (some formulas)) -

column_id       column_formula
4686         4686 = 4684 - 4685 
4687         4687 = ( 4681 / 1.205 / 4684 * 1000 ) 
4717         4717 = ( 4711 + 4712 + 4713 + 4714 + 4715 + 4716 )/6 
4719         4719 = abs( 4716 - 4715 ) 
4787         4787 = max(max(max(max(max( 4780 , 4781 ), 4782 ), 4783 ), 4784 ), 4785 ) - min(min(min(min(min( 4780 , 4781 ), 4782 ), 4783 ), 4784 ), 4785 ) 

Now need some query which will give an output as follows -

col1    col2
4686    4684      
4686    4685      
4687    4681 
4687    4684 
4717    4711
4717    4712

and so on.. (only numbers except the numbers which are less than 4 digits like 1.205 or so .. and 1000)

Its very difficult to use patindex on this one

Can any1 give me a solution on this

omkar patade
  • 1,442
  • 9
  • 34
  • 66

2 Answers2

1

You need to have a function for splitting comma-delimited strings into separate rows. Here is the DelimitedSplit8K function by Jeff Moden:

CREATE FUNCTION [dbo].[DelimitedSplit8K](
    @pString NVARCHAR(4000), @pDelimiter NCHAR(1)
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH E1(N) AS (
    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL 
    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)
,E2(N) AS (SELECT 1 FROM E1 a, E1 b)
,E4(N) AS (SELECT 1 FROM E2 a, E2 b)
,cteTally(N) AS(
    SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
)
,cteStart(N1) AS(
    SELECT 1 UNION ALL 
    SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
),
cteLen(N1,L1) AS(
SELECT 
    s.N1,
    ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
SELECT 
    ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
    Item       = SUBSTRING(@pString, l.N1, l.L1)
FROM cteLen l

The idea is to REPLACE first all mathematical characters: +, -, *, /, (, ), = with a comma ,. This is needed so we can split the operands later. Next call the splitter function to split the row using comma , as the delimiter. Once the split is done, filter all numeric values using NOT LIKE '[^0-9]%' AND Item <> ''. You want to INSERT the results of this to a Temp Table. Then, you SELECT from that Temp Table with the appropriate WHERE clause:

;WITH cteSanitized AS(
    SELECT
        t.Column_Id,
        Item = 
            REPLACE(
                REPLACE(
                    REPLACE(
                        REPLACE(
                            REPLACE(
                                REPLACE(
                                    REPLACE(
                                        SUBSTRING(t.Column_Formula, CHARINDEX('=', t.Column_Formula), LEN(t.Column_Formula) - CHARINDEX('=', t.Column_Formula) + 1)
                                        ,'+',','
                                    ), '-',','
                                ), '/',','
                            ), '*',','
                        ), ')', ',)'
                    ), '(',','
                ), '=',','
            )
    FROM Test t
)
,CteSplitted AS(
    SELECT
        s.Column_Id,
        ItemNumber,
        Item = LTRIM(RTRIM(x.Item))
    FROM cteSanitized s
    CROSS APPLY dbo.DelimitedSplit8K(s.Item, ',') x
)
SELECT 
    Column_Id,
    ItemNumber,
    Item = CONVERT(NUMERIC, ITEM)
INTO #TempTable
FROM CteSplitted
WHERE 
    Item  NOT LIKE '[^0-9]%'
    AND Item <> ''

SELECT DISTINCT
    Col1 = Column_Id,
    Col2 = Item
FROM #TempTable
WHERE Item > 1000

DROP TABLE #TempTable

RESULT

Col1        Col2
----------- ---------
4686        4684
4686        4685
4687        4681
4687        4684
4717        4711
4717        4712
4717        4713
4717        4714
4717        4715
4717        4716
4719        4715
4719        4716
4787        4780
4787        4781
4787        4782
4787        4783
4787        4784
4787        4785
Felix Pamittan
  • 31,544
  • 7
  • 41
  • 67
1

Here is solution without user defined function. It should be much more faster:

DECLARE @t TABLE ( ID INT, F NVARCHAR(MAX) )

INSERT  INTO @t
VALUES  ( 4686, '4686 = 4684 - 4685' ),
        ( 4687, '4687 = ( 4681 / 1.205 / 4684 * 1000 )' ),
        ( 4717, '4717 = ( 4711 + 4712 + 4713 + 4714 + 4715 + 4716 )/6' ),
        ( 4719, '4719 = abs( 4716 - 4715 ) ' ),
        ( 4787,
          '4787 = max(max(max(max(max( 4780 , 4781 ), 4782 ), 4783 ), 4784 ), 4785 ) - min(min(min(min(min( 4780 , 4781 ), 4782 ), 4783 ), 4784 ), 4785 )' )

DECLARE @chars TABLE ( ID INT, c NVARCHAR(MAX) )
INSERT  INTO @chars
VALUES  ( 1, ' ' ),
        ( 2, '(' ),
        ( 3, ')' ),
        ( 4, '/' ),
        ( 5, '*' ),
        ( 6, '+' ),
        ( 7, '-' ),
        ( 8, 'max' ),
        ( 9, 'min' ),
        ( 10, 'abs' ),
        ( 11, '=' )

DECLARE @count INT
SELECT  @count = COUNT(*) FROM    @chars;

WITH    recursion
          AS ( SELECT   t.ID ,
                        REPLACE(F, ' ', ',') + ',' AS F ,
                        1 AS CharID
               FROM     @t t
               UNION ALL
               SELECT   t.ID ,
                        REPLACE(t.F, c.c, ',') AS F ,
                        t.CharID + 1 AS CharID
               FROM     recursion t
                        JOIN @chars c ON c.ID = t.CharID + 1
               WHERE    t.CharID < @count
             ),
        commastrings
          AS ( SELECT   ID ,
                        STUFF(F, LEN(F), 1, '') AS F
               FROM     ( SELECT    ID ,
                                    REPLACE(REPLACE(REPLACE(F, ',', '{}'), '}{', ''), '{}', ',') AS F
                          FROM      recursion
                          WHERE     CharID = @count
                        ) a
             ),
        final ( ID, Number, rght, idx )
          AS ( SELECT   t.ID ,
                        LEFT(t.F, CHARINDEX(',', t.F) - 1) ,
                        SUBSTRING(t.F, CHARINDEX(',', t.F) + 1, LEN(t.F)) ,
                        0
               FROM     commastrings t
               UNION ALL
               SELECT   c.id ,
                        CASE WHEN CHARINDEX(',', c.rght) = 0 THEN c.rght
                             ELSE LEFT(c.rght, CHARINDEX(',', c.rght) - 1)
                        END ,
                        CASE WHEN CHARINDEX(',', c.rght) > 0
                             THEN SUBSTRING(c.rght, CHARINDEX(',', c.rght) + 1, DATALENGTH(c.rght))
                             ELSE ''
                        END ,
                        idx + 1
               FROM     final c
               WHERE   LENGTH(c.rght) > 0
             )
    SELECT DISTINCT
            ID ,
            Number
    FROM    final
    WHERE   idx <> 0
            AND CAST(Number AS DECIMAL(20, 10)) > 1000
Giorgi Nakeuri
  • 35,155
  • 8
  • 47
  • 75