Thursday, March 15, 2018

Generic Query to find a relevant column....

I wanted to find out how well populated my name columns were in a particular database... so here's a query I ran against the system tables:

/************
TO DO: 
Note that you'll need to update the Total and PCT logic to be dynamic as well... Or sum by row number and update based on RowNBR
*/

Use Projects /*database name specific for where tables live */
GO
---------------------------------------
--STEP1: Create table to insert analysis output using Dynamic SQL
---------------------------------------

IF (object_id ('MasterReference.TestData') is not null)
BEGIN
DROP TABLE MasterReference.TestData
END

create table MasterReference.TestData
(RowNBR int
,ColumnNM varchar(255)
,TableNM varchar(255)
,CNT int
,PCT decimal(35,2)
,TotalRowCNT int
,LoadDTS datetime2
)
create clustered index RowNBR on MasterReference.TestData(RowNBR)

---------------------------------
-- STEP 2:  Insert ColumnNames
--------------------------------

insert into MasterReference.TestData (RowNBR, ColumnNM, TableNM, TotalRowCNT)

(

/* Need permissions to run against sys tables */
SELECT     
    RowNBR = row_number() over(order by sch.name+'.'+o.name+'.'+sa.name)
    ,sa.Name as ColumnNM
    ,FullTableNM = sch.name+'.'+o.name
    ,TotalRowCNT = ddps.row_count
FROM            sys.indexes i
INNER JOIN sys.objects o ON i.OBJECT_ID = o.OBJECT_ID
INNER JOIN sys.dm_db_partition_stats ddps ON i.OBJECT_ID = ddps.OBJECT_ID AND i.index_id = ddps.index_id
INNER JOIN sys.schemas sch on o.schema_id = sch.schema_id
INNER JOIN (Select object_id, ColumnCount = count(1) from sys.all_columns group by object_id) c
   on o.object_id = c.object_id
INNER JOIN sys.all_columns sa
    ON i.OBJECT_ID = sa.OBJECT_ID
WHERE i.index_id < 2
AND o.is_ms_shipped = 0
and sa.NAME like '%NM%' and o.name not like '%backup%'
AND (case when sch.name = 'MasterReference' and o.name = 'TestData' then 'Exclude' else 'Keep' end) = 'Keep'
)

---------------------------------------
--STEP3: Setup Loop piece
---------------------------------------

declare @count int; --NumberOfLoops
set @count = 0


while @count < (select MAX(RowNBR) from MasterReference.TestData) -- this is the total number of times it should loop
begin

set @count = @count + 1


declare @ColumnName varchar(100)
set @ColumnName = (Select ColumnNM from MasterReference.TestData where RowNBR = @count)

declare @TableName varchar(500)
set @TableName = (Select TableNM from MasterReference.TestData where RowNBR = @count)


--Select @ColumnName
--Select @TableName

--end
------------------------------------
-- Dynamic SQL Setup
------------------------------------
declare @SQL varchar(max)
set @SQL = ''

set @SQL  =
'
insert into MasterReference.TestData (RowNBR, ColumnNM, TableNM, CNT, LoadDTS)
Select a.RowNBR
,b.ColumnPopulation as ColumnNM
,a.TableNM
,b.CNT
,LoadDTS = getdate()
FROM MasterReference.TestData a
INNER JOIN (
SELECT
ColumnNM as ColumnPopulation
,'+''''+@TableName+''''+' as TableNM 
,SUBSTRING(ColumnNM, 1, (PATINDEX('+''''+'%[:]%'+''''+', ColumnNM + '+''''+' '+''''+')-1))  as ColumnNM
,CNT
FROM
(
SELECT
ColumnNM = '+''''+@ColumnName+''''+'+'+''''+': Populated'+''''+'
,count(1) as CNT
FROM '+@TableName+'
WHERE '+@ColumnName+' is not null

UNION ALL

SELECT
ColumnNM = '+''''+@ColumnName+''''+'+'+''''+': NULL'+''''+'
,count(1) as CNT
FROM '+@TableName+'
WHERE '+@ColumnName+' is null
) as VT
) b
ON  a.ColumnNM = b.ColumnNM
AND a.TableNM = b.TableNM
WHERE a.RowNBR = '+CAST( @count as varchar)



--Select @SQL

exec (@SQL)

end

----------------
-- STEP  : Update PCT
----------------


Update a
set TotalRowCNT =  b.TotalRowCNT
FROM MasterReference.TestData a
INNER JOIN (Select DISTINCT TotalRowCNT, TableNM from MasterReference.TestData WHERE TotalRowCNT is not null) b
on a.TableNM = b.TableNM

DELETE MasterReference.TestData
WHERE LoadDTS is null

Update MasterReference.TestData
set PCT =  case when TotalRowCNT <> 0 then cast(((CNT*1.000)/TotalRowCNT)*100  as decimal(35,2)) else NULL end



Select * from MasterReference.TestData



/* COPY QUERY BELOW INTO ANOTHER QUERY WINDOW TO WATCH IT LOAD:
Select * from MasterReference.TestData
WHERE CNT is not null
order by RowNBR desc
*/

No comments:

Post a Comment