/* */ NTILE ranking functions in SQL Server |SQL Works

Tuesday, October 9, 2012

NTILE ranking functions in SQL Server

Following the previous post on the ROW_NUMBER function, lets look at other rank type functions in SQL Server and how you can use them.
Books Online does a good job of explaining the syntax, so I will link to it here and not go into too much detail beside to place this link. Ranking functions via MSDN

So, now that you have the syntax, where would you use these functions? what kind of questions can they help you answer with your data? Here is a scenario, suppose your boss asks you to apply the 10-80-10 rule (placing employees into performance groups comprising 10% (high) 80% (medium) 10% (low) ) to a group of employees every week, so they can evaluate who is performing above and below expectations. You could use any of the ranking functions to achieve this, but the quartile function works the best because it will always divide the group of employees into the number of quartiles you specify, so if people come and go you wont have to worry about counting how many are in each rank to determine over all percentile. Example below.

Here is the test data first:

CREATE TABLE dbo.EmployeePerf
(
employeeid INT NOT NULL,
employeename VARCHAR(25) NOT NULL,
sales INT,
timeperiod INT
)

INSERT INTO employeeperf VALUES (1, 'Tony', 100, 1)
INSERT INTO employeeperf VALUES (1, 'Tony', 300, 2)
INSERT INTO employeeperf VALUES (1, 'Tony', 200, 3)
INSERT INTO employeeperf VALUES (1, 'Tony', 150, 4)
INSERT INTO employeeperf VALUES (1, 'Meg', 900, 1)
INSERT INTO employeeperf VALUES (1, 'Meg', 400, 2)
INSERT INTO employeeperf VALUES (1, 'Meg', 500, 3)
INSERT INTO employeeperf VALUES (1, 'Meg', 850, 4)
INSERT INTO employeeperf VALUES (1, 'Ben', 100, 1)
INSERT INTO employeeperf VALUES (1, 'Ben', 40, 2)
INSERT INTO employeeperf VALUES (1, 'Ben', 50, 3)
INSERT INTO employeeperf VALUES (1, 'Ben', 90, 4)
INSERT INTO employeeperf VALUES (1, 'Jim', 1000, 1)
INSERT INTO employeeperf VALUES (1, 'Jim', 3000, 2)
INSERT INTO employeeperf VALUES (1, 'Jim', 2000, 3)
INSERT INTO employeeperf VALUES (1, 'Jim', 1500, 4)
INSERT INTO employeeperf VALUES (1, 'Abigail', 90, 1)
INSERT INTO employeeperf VALUES (1, 'Abigail', 40, 2)
INSERT INTO employeeperf VALUES (1, 'Abigail', 500, 3)
INSERT INTO employeeperf VALUES (1, 'Abigail', 80, 4)
INSERT INTO employeeperf VALUES (1, 'Mike', 1000, 1)
INSERT INTO employeeperf VALUES (1, 'Mike', 840, 2)
INSERT INTO employeeperf VALUES (1, 'Mike', 350, 3)
INSERT INTO employeeperf VALUES (1, 'Mike', 190, 4)
INSERT INTO employeeperf VALUES (1, 'John', 10, 1)
INSERT INTO employeeperf VALUES (1, 'John', 30, 2)
INSERT INTO employeeperf VALUES (1, 'John', 20, 3)
INSERT INTO employeeperf VALUES (1, 'John', 10, 4)
INSERT INTO employeeperf VALUES (1, 'Deb', 500, 1)
INSERT INTO employeeperf VALUES (1, 'Deb', 400, 2)
INSERT INTO employeeperf VALUES (1, 'Deb', 200, 3)
INSERT INTO employeeperf VALUES (1, 'Deb', 180, 4)
INSERT INTO employeeperf VALUES (1, 'Peter', 100, 1)
INSERT INTO employeeperf VALUES (1, 'Peter', 740, 2)
INSERT INTO employeeperf VALUES (1, 'Peter', 650, 3)
INSERT INTO employeeperf VALUES (1, 'Peter', 890, 4)
INSERT INTO employeeperf VALUES (1, 'Bill', 230, 1)
INSERT INTO employeeperf VALUES (1, 'Bill', 450, 2)
INSERT INTO employeeperf VALUES (1, 'Bill', 245, 3)
INSERT INTO employeeperf VALUES (1, 'Bill', 180, 4)
INSERT INTO employeeperf VALUES (1, 'Julie', 50, 1)
INSERT INTO employeeperf VALUES (1, 'Julie', 40, 2)
INSERT INTO employeeperf VALUES (1, 'Julie', 20, 3)
INSERT INTO employeeperf VALUES (1, 'Julie', 10, 4)
INSERT INTO employeeperf VALUES (1, 'Paul', 1000, 1)
INSERT INTO employeeperf VALUES (1, 'Paul', 700, 2)
INSERT INTO employeeperf VALUES (1, 'Paul', 600, 3)
INSERT INTO employeeperf VALUES (1, 'Paul', 590, 4)


Now to divide these employees up into the requested 10-80-10 we can apply the NTILE() function like this:

SELECT employeeid, employeename, sales, timeperiod, Ntile(10) OVER (ORDER BY sales) AS Quartile
  FROM employeeperf 
  WHERE timeperiod = 1

This gives us our results, the employees group in 10% increments by sales for the time period we care about, but what if you don't want to re-run it for each time period, and you only want to display those in the top and bottom 10% brackets? A common need for performance management, lets change the code to the following:

;WITH salescte 
AS 
( 
SELECT employeeid, employeename, sales, timeperiod, 
     Ntile(10) OVER (partition BY timeperiod ORDER BY sales) AS Quartile  -- added PARTITION BY
  FROM employeeperf 
  --WHERE TimePeriod = 1    <-- removed to include all periods 
) 

SELECT employeeid, employeename, sales, timeperiod, quartile 
  FROM salescte 
  WHERE quartile IN (1, 10) 
    ORDER BY timeperiod, quartile ASC


And the results are exactly what we wanted, the top and bottom 10%

NTILE ranking function results
My random test data is a bit skewed, as you can see some sales are radically different, if you were in a more normalized environment the number of people in each quartile would be more even. Hope this is useful.

2 comments:

  1. Was just looking for a decription of how NTILE can be used to show the bottom 10 percentile. Great explanation, thank you so much!

    ReplyDelete