Thursday, March 15, 2018

Database tables row count with column count

SELECT     
SchemaNM = sch.name
,TableNM = o.name
--,FullTableNM = sch.name+'.'+o.name,
,RowCNT = ddps.row_count
,ColumnCNT = c.ColumnCount
FROM            sys.indexes i
INNER JOIN sys.objects o ON i.OBJECT_ID = o.OBJECT_ID
INNER JOIN sys.dm_db_partition_stats ddps ON i.OBJECT_ID = ddps.OBJECT_ID AND i.index_id = ddps.index_id
INNER JOIN sys.schemas sch on o.schema_id = sch.schema_id
INNER JOIN (Select object_id, ColumnCount = count(1) from sys.all_columns group by object_id) c
   on o.object_id = c.object_id
WHERE i.index_id < 2
AND o.is_ms_shipped = 0
ORDER BY ddps.row_count desc

1.00011e+006 fix PATINDEX and POWER summary.sql

It appears that someone loaded an extra from a vendor system into Excel and then loaded it into a SQL database... Means that we ended up with scientific notation in a cost/dollar amount column.  As such, a previously numeric datatype column became varchar and broke some reports...

Here's how I fixed it (so many years of doing SQL and this was the first time I used the POWER function!):

SELECT
PatientID
...
,PriorTotalCostAMT =
CASE WHEN CTE.PRIOR_TOTAL_COSTS is not null
THEN CAST(CAST(PriorTotalCostAMT_1 as decimal(38,6))*Power(10,PriorTotalCostAMT_2) as numeric(28,4))
                  ELSE cast(Costs.PRIOR_TOTAL_COSTS as numeric(28,4)) END
...
FROM CostData Costs
...
LEFT JOIN
(
            SELECT
                PRIOR_TOTAL_COSTS
                ,PriorTotalCostAMT_1 = SUBSTRING(PRIOR_TOTAL_COSTS, 1, (PATINDEX('%[e+]%', PRIOR_TOTAL_COSTS + ' ')-1))
                ,PriorTotalCostAMT_2 = SUBSTRING(PRIOR_TOTAL_COSTS, (PATINDEX('%[e+]%', PRIOR_TOTAL_COSTS + ' ')+2),
LEN(PRIOR_TOTAL_COSTS))
            FROM CostData
            WHERE PRIOR_TOTAL_COSTS like '%e%'
                )  CTE

            ON Costs.PRIOR_TOTAL_COSTS = CTE.PRIOR_TOTAL_COSTS


However, even easier is the string function:  STR(Prior_TOTAL_COSTS)

Generic Query to find a relevant column....

I wanted to find out how well populated my name columns were in a particular database... so here's a query I ran against the system tables:

/************
TO DO: 
Note that you'll need to update the Total and PCT logic to be dynamic as well... Or sum by row number and update based on RowNBR
*/

Use Projects /*database name specific for where tables live */
GO
---------------------------------------
--STEP1: Create table to insert analysis output using Dynamic SQL
---------------------------------------

IF (object_id ('MasterReference.TestData') is not null)
BEGIN
DROP TABLE MasterReference.TestData
END

create table MasterReference.TestData
(RowNBR int
,ColumnNM varchar(255)
,TableNM varchar(255)
,CNT int
,PCT decimal(35,2)
,TotalRowCNT int
,LoadDTS datetime2
)
create clustered index RowNBR on MasterReference.TestData(RowNBR)

---------------------------------
-- STEP 2:  Insert ColumnNames
--------------------------------

insert into MasterReference.TestData (RowNBR, ColumnNM, TableNM, TotalRowCNT)

(

/* Need permissions to run against sys tables */
SELECT     
    RowNBR = row_number() over(order by sch.name+'.'+o.name+'.'+sa.name)
    ,sa.Name as ColumnNM
    ,FullTableNM = sch.name+'.'+o.name
    ,TotalRowCNT = ddps.row_count
FROM            sys.indexes i
INNER JOIN sys.objects o ON i.OBJECT_ID = o.OBJECT_ID
INNER JOIN sys.dm_db_partition_stats ddps ON i.OBJECT_ID = ddps.OBJECT_ID AND i.index_id = ddps.index_id
INNER JOIN sys.schemas sch on o.schema_id = sch.schema_id
INNER JOIN (Select object_id, ColumnCount = count(1) from sys.all_columns group by object_id) c
   on o.object_id = c.object_id
INNER JOIN sys.all_columns sa
    ON i.OBJECT_ID = sa.OBJECT_ID
WHERE i.index_id < 2
AND o.is_ms_shipped = 0
and sa.NAME like '%NM%' and o.name not like '%backup%'
AND (case when sch.name = 'MasterReference' and o.name = 'TestData' then 'Exclude' else 'Keep' end) = 'Keep'
)

---------------------------------------
--STEP3: Setup Loop piece
---------------------------------------

declare @count int; --NumberOfLoops
set @count = 0


while @count < (select MAX(RowNBR) from MasterReference.TestData) -- this is the total number of times it should loop
begin

set @count = @count + 1


declare @ColumnName varchar(100)
set @ColumnName = (Select ColumnNM from MasterReference.TestData where RowNBR = @count)

declare @TableName varchar(500)
set @TableName = (Select TableNM from MasterReference.TestData where RowNBR = @count)


--Select @ColumnName
--Select @TableName

--end
------------------------------------
-- Dynamic SQL Setup
------------------------------------
declare @SQL varchar(max)
set @SQL = ''

set @SQL  =
'
insert into MasterReference.TestData (RowNBR, ColumnNM, TableNM, CNT, LoadDTS)
Select a.RowNBR
,b.ColumnPopulation as ColumnNM
,a.TableNM
,b.CNT
,LoadDTS = getdate()
FROM MasterReference.TestData a
INNER JOIN (
SELECT
ColumnNM as ColumnPopulation
,'+''''+@TableName+''''+' as TableNM 
,SUBSTRING(ColumnNM, 1, (PATINDEX('+''''+'%[:]%'+''''+', ColumnNM + '+''''+' '+''''+')-1))  as ColumnNM
,CNT
FROM
(
SELECT
ColumnNM = '+''''+@ColumnName+''''+'+'+''''+': Populated'+''''+'
,count(1) as CNT
FROM '+@TableName+'
WHERE '+@ColumnName+' is not null

UNION ALL

SELECT
ColumnNM = '+''''+@ColumnName+''''+'+'+''''+': NULL'+''''+'
,count(1) as CNT
FROM '+@TableName+'
WHERE '+@ColumnName+' is null
) as VT
) b
ON  a.ColumnNM = b.ColumnNM
AND a.TableNM = b.TableNM
WHERE a.RowNBR = '+CAST( @count as varchar)



--Select @SQL

exec (@SQL)

end

----------------
-- STEP  : Update PCT
----------------


Update a
set TotalRowCNT =  b.TotalRowCNT
FROM MasterReference.TestData a
INNER JOIN (Select DISTINCT TotalRowCNT, TableNM from MasterReference.TestData WHERE TotalRowCNT is not null) b
on a.TableNM = b.TableNM

DELETE MasterReference.TestData
WHERE LoadDTS is null

Update MasterReference.TestData
set PCT =  case when TotalRowCNT <> 0 then cast(((CNT*1.000)/TotalRowCNT)*100  as decimal(35,2)) else NULL end



Select * from MasterReference.TestData



/* COPY QUERY BELOW INTO ANOTHER QUERY WINDOW TO WATCH IT LOAD:
Select * from MasterReference.TestData
WHERE CNT is not null
order by RowNBR desc
*/

Thursday, November 12, 2009

Loop to create a Date Dimension

Alas, I continue to find loops and cursors a challenge so I thought I'd try it out with the basics... the syntax below can be used to create a date dimension on any server; it does not reference any database tables.

NOTE: Additional columns could be added by using numerous other SQL datepart (etc.) functions.

/******************************************************

* FOR DEVELOPERS who have the rights to build a table

*******************************************************

* For testing purposes, I like to create my own schema;

* the syntax below could be done as a temp table as well

* (scroll towards the bottom of this SQL).

*******************************************************/

/* SETUP: I like to create my own schema for testing...

*

* create schema epriddyTEST

*

* --drop table epriddyTEST.dimDate

* -- create table epriddyTEST.dimDate

* -- (FullDate datetime,

* -- Year int,

* -- DayOfWeek varchar(20))

* -- create clustered index FullDateIndex on epriddyTEST.dimDate(FullDate)

********************************************************/

--I like to make the calculations relative to today

declare @Today datetime

set @Today = convert(datetime,convert(char(10),getdate(),101))

---------------------------------------

--STEP1: set up the date range desired

---------------------------------------

declare @StartDate datetime

set @StartDate = dateadd(yy,-5,@Today) -- 5 years ago today

set @StartDate = '01/01/'+cast((datepart(yy,@StartDate)) as varchar) -- starting Jan. 1st

declare @EndDate datetime

set @EndDate = dateadd(yy,5,@Today) -- 5 years from now today

set @EndDate = '12/31/'+cast((datepart(yy,@EndDate)) as varchar) -- ending Dec. 31st

--USE QUERIES BELOW TO DOUBLE-CHECK Range Selected...

--Select Today = @Today, StartDate = @StartDate, EndDate = @EndDate

--select datediff(d,@StartDate,@EndDate) -- number of loops

---------------------------------------

--STEP2: Setup Loop piece

---------------------------------------

declare @count int; --NumberOfLoops

set @count = -1

while @count < (select datediff(d,@StartDate,@EndDate)) -- this is the total number of times it should loop

begin

set @count = @count + 1

insert into epriddyTEST.dimDate (FullDate, [Year],[DayOfWeek])

Select FullDate = @StartDate+@count

,[Year] = datepart(yy,(@StartDate+@count ))

,[DayOfWeek]=datename(weekday,(@StartDate+@count ))

end

Select * from epriddyTEST.dimDate

--All done!

/******************************************************

* FOR ANALYSTS and Others who may not have permissions

* to create permanent tables; here's a temp table option

*******************************************************/

--I like to make the calculations relative to today

declare @Today datetime

set @Today = convert(datetime,convert(char(10),getdate(),101))

---------------------------------------

--STEP1: set up the date range desired

---------------------------------------

declare @StartDate datetime

set @StartDate = dateadd(yy,-5,@Today) -- 5 years ago today

set @StartDate = '01/01/'+cast((datepart(yy,@StartDate)) as varchar) -- starting Jan. 1st

declare @EndDate datetime

set @EndDate = dateadd(yy,5,@Today) -- 5 years from now today

set @EndDate = '12/31/'+cast((datepart(yy,@EndDate)) as varchar) -- ending Dec. 31st

--USE QUERIES BELOW TO DOUBLE-CHECK Range Selected...

--Select Today = @Today, StartDate = @StartDate, EndDate = @EndDate

--select datediff(d,@StartDate,@EndDate) -- number of loops

---------------------------------------

--STEP2: Setup Loop piece

---------------------------------------

declare @count int; --NumberOfLoops

set @count = -1

IF (object_id ('tempdb.dbo.#dimDate') is not null)

BEGIN

DROP TABLE #dimDate

END

create table #dimDate

(FullDate datetime,

[Year] int,

[DayOfWeek] varchar(20),

MonthNum int,

MonthName varchar(20))

create clustered index FullDateIndex on #dimDate(FullDate) -- I always create a index if I have to use a temp table

while @count < (select datediff(d,@StartDate,@EndDate)) -- this is the total number of times it should loop

begin

set @count = @count + 1

insert into #dimDate (FullDate, [Year],[DayOfWeek],MonthNum, MonthName)

Select FullDate = @StartDate+@count

,[Year] = datepart(yy,(@StartDate+@count ))

,[DayOfWeek]=datename(weekday,(@StartDate+@count ))

,MonthNum = datepart(mm,(@StartDate+@count ))

,[MonthName] = DATENAME(month, (@StartDate+@count ))

end

Select * from #dimDate

--All Done!

Monday, November 2, 2009

I'm not dead...

Sorry that I haven't posted in quite awhile. I've got a list of items I want to add to this blog but have been pushing through our OLAP cube conversion project and just haven't posted anything! Good news: We're almost done converting our 2000 DTS and Analysis Services cubes to the new 2005 Integration Services & Analysis Services packages (sweet baby!)....

... just wanted to let you know that I haven't abandoned this post altogether; just busy with development work ;-)

Monday, August 10, 2009

Checking for and dropping temp tables

IF (object_id ('tempdb.dbo.#fact_stg') is not null)

BEGIN

DROP TABLE #fact_stg

END


Again, me with the syntax ;-)

Wednesday, August 5, 2009

Proper case function

Many thanks to kselvia who posted this at: http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=37760

I'm adding it here in case the forum I found this on loses the posting:

"

CREATE FUNCTION dbo.fCapFirst(@input NVARCHAR(4000)) RETURNS NVARCHAR(4000)

AS

BEGIN

DECLARE @position INT

WHILE IsNull(@position,Len(@input)) > 1

SELECT @input = Stuff(@input,IsNull(@position,1),1,upper(substring(@input,IsNull(@position,1),1))), @position = charindex(' ',@input,IsNull(@position,1)) + 1

RETURN (@input)

END

It's not as sophisticated as the others but it's about 3 times as fast:

select dbo.fCapFirst(Lower(Column)) From MyTable

"

. . . Totally awesome!