0
select      
    a.COUNTY_FIPS
    ,COUNT(e.PROPERTY_ID) as house_count
    ,AVG(cast(e.AVM_FINAL_VALUE as bigint)) as avg_avm
    ,max(cast(e.AVM_FINAL_VALUE as bigint)) as max_avm
    ,min(cast(e.AVM_FINAL_VALUE as bigint)) as min_avm
from 
    RAW_Equity e
left join 
    (SELECT           
         SA_PROPERTY_ID, MM_FIPS_STATE_CODE, MM_FIPS_MUNI_CODE, 
         CASE 
            WHEN MM_FIPS_STATE_CODE < 10 
              THEN '0' + CAST(MM_FIPS_STATE_CODE as VARCHAR) 
              ELSE CAST(MM_FIPS_STATE_CODE as VARCHAR) 
         END
         + CASE  
             WHEN MM_FIPS_MUNI_CODE < 10 
               THEN '00' + CAST(MM_FIPS_MUNI_CODE as VARchar)
             WHEN MM_FIPS_MUNI_CODE < 100 
               THEN '0' + CAST(MM_FIPS_MUNI_CODE as VARchar)
             ELSE CAST(MM_FIPS_MUNI_CODE as VARchar) 
         END AS COUNTY_FIPS
     FROM  
         RAW_Address) a ON a.SA_PROPERTY_ID = e.PROPERTY_ID
where 
    AVM_CONFIDENCE_SCORE >= 70
group by 
    a.COUNTY_FIPS

Is there any way I can improve the performance of this query? Schema for both the tables are shown below. I am was thinking about creating non clustered index on AVM_CONFIDENCE_SCORE but I think it will only going to increase the query time. Any help will be greatly appreciated.

RAWADDRESS table:

CREATE TABLE [dbo].[RAW_Address]
(
[SA_PROPERTY_ID] [int] NOT NULL,
[SA_SCM_ID] [int] NOT NULL,
[MM_STATE_CODE] [varchar](2) NOT NULL,
[MM_MUNI_NAME] [varchar](24) NOT NULL,
[MM_FIPS_STATE_CODE] [tinyint] NOT NULL,
[MM_FIPS_MUNI_CODE] [smallint] NOT NULL,
[MM_FIPS_COUNTY_NAME] [varchar](35) NOT NULL,
[SA_SITE_HOUSE_NBR] [varchar](20) NULL,
[SA_SITE_FRACTION] [varchar](10) NULL,
[SA_SITE_DIR] [varchar](2) NULL,
[SA_SITE_STREET_NAME] [varchar](40) NULL,
[SA_SITE_SUF] [varchar](4) NULL,
[SA_SITE_POST_DIR] [varchar](2) NULL,
[SA_SITE_UNIT_PRE] [varchar](10) NULL,
[SA_SITE_UNIT_VAL] [varchar](6) NULL,
[SA_SITE_CITY] [varchar](30) NULL,
[SA_SITE_STATE] [varchar](2) NOT NULL,
[SA_SITE_ZIP] [int] NULL,
[SA_SITE_PLUS_4] [smallint] NULL,
[SA_SITE_CRRT] [varchar](4) NULL,
[SA_MAIL_HOUSE_NBR] [varchar](20) NULL,
[SA_MAIL_FRACTION] [varchar](10) NULL,
[SA_MAIL_DIR] [varchar](2) NULL,
[SA_MAIL_STREET_NAME] [varchar](50) NULL,
[SA_MAIL_SUF] [varchar](4) NULL,
[SA_MAIL_POST_DIR] [varchar](2) NULL,
[SA_MAIL_UNIT_PRE] [varchar](10) NULL,
[SA_MAIL_UNIT_VAL] [varchar](6) NULL,
[SA_MAIL_CITY] [varchar](50) NULL,
[SA_MAIL_STATE] [varchar](2) NULL,
[SA_MAIL_ZIP] [int] NULL,
[SA_MAIL_PLUS_4] [smallint] NULL,
[SA_MAIL_CRRT] [varchar](4) NULL,
[SA_SITE_MAIL_SAME] [varchar](1) NULL
) ON [PRIMARY]

RAW Equity table:

CREATE TABLE [dbo].[RAW_Equity]
(
[PROPERTY_ID] [int] NOT NULL,
[SCM_ID] [int] NOT NULL,
[MM_STATE_CODE] [varchar](2) NOT NULL,
[MM_MUNI_NAME] [varchar](24) NOT NULL,
[MM_FIPS_STATE_CODE] [int] NOT NULL,
[MM_FIPS_MUNI_CODE] [int] NOT NULL,
[MM_FIPS_COUNTY_NAME] [varchar](35) NOT NULL,
[AVM_FINAL_VALUE] [int] NULL,
[AVM_LOW_VALUE] [int] NULL,
[AVM_HIGH_VALUE] [int] NULL,
[AVM_CONFIDENCE_SCORE] [int] NULL,
[FINAL_VALUE] [float] NULL,
[FIRST_POSITION_SR_UNIQUE_ID] [int] NULL,
[FIRST_POSITION_LOAN_DATE] [int] NULL,
[FIRST_POSITION_DOC_NBR] [varchar](20) NULL,
[FIRST_POSITION_LOAN_VAL] [int] NULL,
[FIRST_POSITION_LENDER_CODE] [int] NULL,
[FIRST_POSITION_LNDR_LAST_NAME] [varchar](50) NULL,
[FIRST_POSITION_LNDR_FIRST_NAME] [varchar](50) NULL,
[FIRST_POSITION_LENDER_TYPE] [varchar](1) NULL,
[FIRST_POSITION_LOAN_TYPE] [varchar](1) NULL,
[FIRST_POSITION_INTEREST_RATE_TYPE] [varchar](1) NULL,
[FIRST_POSITION_ESTIMATED_INTEREST_RATE] [float] NULL,
[FIRST_POSITION_LNDR_CREDIT_LINE] [varchar](1) NULL,
[FIRST_POSITION_MODELED_MORTGAGE_TYPE] [varchar](1) NULL,
[SECOND_POSITION_SR_UNIQUE_ID] [int] NULL,
[SECOND_POSITION_LOAN_DATE] [int] NULL,
[SECOND_POSITION_DOC_NBR] [varchar](20) NULL,
[SECOND_POSITION_LOAN_VAL] [int] NULL,
[SECOND_POSITION_LENDER_CODE] [int] NULL,
[SECOND_POSITION_LNDR_LAST_NAME] [varchar](50) NULL,
[SECOND_POSITION_LNDR_FIRST_NAME] [varchar](50) NULL,
[SECOND_POSITION_LENDER_TYPE] [varchar](1) NULL,
[SECOND_POSITION_LOAN_TYPE] [varchar](1) NULL,
[SECOND_POSITION_INTEREST_RATE_TYPE] [varchar](1) NULL,
[SECOND_POSITION_ESTIMATED_INTEREST_RATE] [float] NULL,
[SECOND_POSITION_LNDR_CREDIT_LINE] [varchar](1) NULL,
[SECOND_POSITION_MODELED_MORTGAGE_TYPE] [varchar](1) NULL,
[THIRD_POSITION_SR_UNIQUE_ID] [int] NULL,
[THIRD_POSITION_LOAN_DATE] [int] NULL,
[THIRD_POSITION_DOC_NBR] [varchar](20) NULL,
[THIRD_POSITION_LOAN_VAL] [int] NULL,
[THIRD_POSITION_LENDER_CODE] [int] NULL,
[THIRD_POSITION_LNDR_LAST_NAME] [varchar](50) NULL,
[THIRD_POSITION_LNDR_FIRST_NAME] [varchar](50) NULL,
[THIRD_POSITION_LENDER_TYPE] [varchar](1) NULL,
[THIRD_POSITION_LOAN_TYPE] [varchar](1) NULL,
[THIRD_POSITION_INTEREST_RATE_TYPE] [varchar](1) NULL,
[THIRD_POSITION_ESTIMATED_INTEREST_RATE] [float] NULL,
[THIRD_POSITION_LNDR_CREDIT_LINE] [varchar](1) NULL,
[THIRD_POSITION_MODELED_MORTGAGE_TYPE] [varchar](1) NULL,
[TOTAL_OUTSTANDING_LOANS] [bigint] NULL,
[LTV] [int] NULL,
[AVAILABLE_EQUITY] [int] NULL,
[LENDABLE_EQUITY] [int] NULL,
[PROCESS_ID] [int] NOT NULL,
[FILLER] [varchar](4) NULL,
CONSTRAINT [PK_RAW_Equity] PRIMARY KEY CLUSTERED 
(
[PROPERTY_ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY =    OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
marc_s
  • 732,580
  • 175
  • 1,330
  • 1,459
python
  • 4,403
  • 13
  • 56
  • 103

3 Answers3

0

I would put an index on:

Table: RAW_Equity

Columns: PROPERTY_ID, AVM_CONFIDENCE_SCORE

and

Table: RAW_Address

Columns: SA_PROPERTY_ID

Include: MM_FIPS_STATE_CODE,MM_FIPS_MUNI_CODE

Community
  • 1
  • 1
Tab Alleman
  • 31,483
  • 7
  • 36
  • 52
0

Your query can be simplified, which will likely make it faster:

select 
     REPLICATE('0',2-LEN(RTRIM(a.MM_FIPS_STATE_CODE))) 
         + RTRIM(a.MM_FIPS_STATE_CODE)
         + REPLICATE('0',3-LEN(RTRIM(a.MM_FIPS_MUNI_CODE))) 
         + RTRIM(a.MM_FIPS_MUNI_CODE) 
         AS COUNTY_FIPS
    ,COUNT(e.PROPERTY_ID) as house_count
    ,AVG(cast(e.AVM_FINAL_VALUE as bigint)) as avg_avm
    ,max(cast(e.AVM_FINAL_VALUE as bigint)) as max_avm
    ,min(cast(e.AVM_FINAL_VALUE as bigint)) as min_avm
from 
    RAW_Equity e
    left join RAW_Address a 
          ON a.SA_PROPERTY_ID = e.PROPERTY_ID
where 
    e.AVM_CONFIDENCE_SCORE >= 70
group by 
    a.MM_FIPS_STATE_CODE, a.MM_FIPS_MUNI_CODE

If RAW_Address doesn't already have a CLUSTERED INDEX with SA_PROPERTY_ID as the first key of the index, then this may help:

CREATE INDEX IX_RAW_Address_SA_PROPERTY_ID ON RAW_Address(SA_PROPERTY_ID) 
INCLUDE (MM_FIPS_STATE_CODE, MM_FIPS_MUNI_CODE)
Diego
  • 18,035
  • 5
  • 62
  • 66
0

You could try this way :- (willing to know, how much this one, helpful to you)

Declare @result Table
(
     RowId              Int Identity(1,1) Primary Key
    ,COUNTY_FIPS        Varchar(100)
    ,MM_FIPS_STATE_CODE Int
    ,MM_FIPS_MUNI_CODE  Int
    ,house_count        Int
    ,avg_avm            Int
    ,max_avm            Int
    ,min_avm            Int
)

Insert Into @result(MM_FIPS_STATE_CODE,MM_FIPS_MUNI_CODE,house_count,avg_avm,max_avm,min_avm)
Select   a.MM_FIPS_STATE_CODE
        ,a.MM_FIPS_MUNI_CODE
        ,Count(e.PROPERTY_ID) as house_count
        ,Avg(Cast(e.AVM_FINAL_VALUE as bigint)) as avg_avm
        ,Max(Cast(e.AVM_FINAL_VALUE as bigint)) as max_avm
        ,Min(Cast(e.AVM_FINAL_VALUE as bigint)) as min_avm
From    RAW_Equity As e With (Nolock)
        Left Join RAW_Address As a With (Nolock) On e.PROPERTY_ID = a.SA_PROPERTY_ID
Where   e.AVM_CONFIDENCE_SCORE >= 70
Group by a.MM_FIPS_STATE_CODE
        ,a.MM_FIPS_MUNI_CODE

Update  r
Set     r.COUNTY_FIPS = REPLICATE('0',2-LEN(RTRIM(r.MM_FIPS_STATE_CODE))) + RTRIM(r.MM_FIPS_STATE_CODE) + REPLICATE('0',3-LEN(RTRIM(r.MM_FIPS_MUNI_CODE))) + RTRIM(r.MM_FIPS_MUNI_CODE)
From    @result As r

Select   r.COUNTY_FIPS
        ,r.house_count
        ,r.avg_avm
        ,r.max_avm
        ,r.min_avm
From    @result As r

1st try without any index, and after that create clustered index as mentioned and try AGAIN the above same query

CREATE INDEX IX_RAW_Address_SA_PROPERTY_ID ON RAW_Address(SA_PROPERTY_ID)
Mihir Shah
  • 948
  • 10
  • 17
  • Dude this is awesome :) query time is 12 mins and initially it was 23mins. Could you explain me your query? – python Jul 09 '15 at 13:58
  • our local database tables and #table will remain on Disk, while '@table' variable will be reside on Ram, So, 1st I stored all necessary fields along with applied filter (where clause) in '@table' variable, so once all the disk operation will be finished, then string concatenation operation perform on limited filters data which are reside in Ram by update statement, and final retrieve final result from '@table' variable. – Mihir Shah Jul 10 '15 at 04:11