/* ISC 561 GROUP BY Through the use of built-in functions, calculations and subqueries, we can create an alternative set of values for an attribute in a GROUP BY clause. we can GROUP BY, for example 1. Date/Time attributes 2. String manipulations on character data 3. Calculations on numeric data (if convertable into categories) The most common (in my experience) involves extracting portions of a datetime attribute */ /* The following three queries are for illustration only. You don't have access to the SqlExerciseSystem database so you can't run these... */ USE SqlExerciseSystem --List the top ten error messages by students in the SQL Exercise System --I could use this information to help me refine my lesson plans and lecture SELECT ErrorNumber, ErrorCount, Description FROM (SELECT TOP 10 ErrorNumber, COUNT(*) AS ErrorCount FROM tblExerciseResponse WHERE ErrorNumber IS NOT NULL AND ErrorNumber <> 0 GROUP BY ErrorNumber ORDER BY COUNT(*) DESC) AS vwTopTenErrors, (SELECT Error, Description FROM Master.dbo.sysMessages) AS vwSystemMessages WHERE vwTopTenErrors.ErrorNumber=vwSystemMessages.Error ORDER BY ErrorCount DESC /* And check out the parallelism in the query! My server has a dual processor. */ /* When do students tend to use the sqlexercise system by hour of day? Let's say I want to know the best time of the day to conduct maintenance on the server. This would be the time of day when usage tends to be smallest. */ --SELECT * FROM tblExerciseResponse SELECT DATEPART(hour, SubmitTime) AS HOUR, COUNT(*) AS SubmitCountToTal FROM tblExerciseResponse GROUP BY DATEPART(hour, SubmitTime) ORDER BY DATEPART(hour, SubmitTime) SELECT TOP 10 * FROM tblExerciseResponse --Is our course any different from the long-term norm? SELECT DATEPART(hour, SubmitTime) AS HOUR, COUNT(*) AS SubmitCountISC561 FROM tblExerciseResponse WHERE PersonID IN(SELECT PersonID FROM tblRoster WHERE ClassID = 44) GROUP BY DATEPART(hour, SubmitTime) ORDER BY DATEPART(hour, SubmitTime) SELECT vwTotal.Hour, SubmitCountTotal, SubmitCountISC561 FROM (SELECT DATEPART(hour, SubmitTime) AS HOUR, COUNT(*) AS SubmitCountTotal FROM tblExerciseResponse GROUP BY DATEPART(hour, SubmitTime)) AS vwTotal, (SELECT DATEPART(hour, SubmitTime) AS HOUR, COUNT(*) AS SubmitCountISC561 FROM tblExerciseResponse WHERE PersonID IN(SELECT PersonID FROM tblRoster WHERE ClassID = 44) GROUP BY DATEPART(hour, SubmitTime)) AS vw561 WHERE vwTotal.Hour=vw561.Hour ORDER BY vwTotal.Hour --The above result can be pasted into Excel and graphs generated very easily. --Or we can use the result in a reporting engine. /* Day of the week? Sunday = 1, Saturday = 7 Is there a pattern of usage by day of week? For our class so far? */ SELECT DATEPART(weekday,SubmitTime) AS DayOfWeek, COUNT(*) AS ResponseCount FROM tblExerciseResponse, tblExercise WHERE PersonID IN(SELECT PersonID FROM tblRoster WHERE ClassID IN(44)) AND tblExerciseResponse.ExerciseID = tblExercise.ExerciseID-- AND --tblExerciseResponse.SubmitTime BETWEEN '2008-08-20 00:00:00.000' AND '2008-09-20 00:00:00.000' GROUP BY DATEPART(weekday,SubmitTime) ORDER BY DATEPART(weekday,SubmitTime) --Frequency distributions --How many students have correctly answered each exercise so far? --That is, produce a frequency distribution of correct responses by exercise. /* Let's do some examples from Northwind. We can look for time-based patterns in orders for the Northwind database. Is there a day-of-the-week pattern for when orders ship? That is, is there a day of the week for which fewer or more orders are shipped, say on Friday? */ USE Northwind --Inspect table SELECT * FROM Orders --Product the weekday part SELECT ShippedDate, DATEPART(WeekDay, ShippedDate) AS WeekDayPart FROM Orders ORDER BY WeekDayPart --Add the GROUP BY SELECT DATEPART(WeekDay, ShippedDate) AS WeekDayPart, COUNT(*) As DayCnt FROM Orders GROUP BY DATEPART(WeekDay, ShippedDate) --Order the output SELECT DATEPART(WeekDay, ShippedDate) AS WeekDayPart, COUNT(*) As DayCnt FROM Orders GROUP BY DATEPART(WeekDay, ShippedDate) ORDER BY WeekDayPart --Note we can ORDER BY an alias but not GROUP BY --Remove Null values SELECT DATEPART(WeekDay, ShippedDate) AS WeekDayPart, COUNT(*) As DayCnt FROM Orders WHERE ShippedDate IS NOT NULL GROUP BY DATEPART(WeekDay, ShippedDate) ORDER BY WeekDayPart --Sunday = 1, Saturday = 7 --How to print the actual day of the week? --CASE statement in the SELECT clause SELECT DATEPART(WeekDay, ShippedDate) AS WeekDayPart, COUNT(*) As DayCnt, DayOfTheWeek = CASE WHEN DATEPART(WeekDay, ShippedDate) = 1 THEN 'Sunday' WHEN DATEPART(WeekDay, ShippedDate) = 2 THEN 'Monday' WHEN DATEPART(WeekDay, ShippedDate) = 3 THEN 'Tuesday' WHEN DATEPART(WeekDay, ShippedDate) = 4 THEN 'Wednesday' WHEN DATEPART(WeekDay, ShippedDate) = 5 THEN 'Thursday' WHEN DATEPART(WeekDay, ShippedDate) = 6 THEN 'Friday' WHEN DATEPART(WeekDay, ShippedDate) = 7 THEN 'Saturday' ELSE 'Error' END FROM Orders WHERE ShippedDate IS NOT NULL GROUP BY DATEPART(WeekDay, ShippedDate) ORDER BY WeekDayPart /* But, you know, this is not very easy to read and makes our SQL too complex. A better solution would be to create a user-defined function. Essentially we take our CASE statement, parameterize the attributes and add a SELECT statement (return). */ --Parameterize the date attribute, initialize it to --the system time for testing... DECLARE @InputDate DATETIME SET @InputDate = GETDATE() SELECT DayOfTheWeek = CASE WHEN DATEPART(WeekDay, @InputDate) = 1 THEN 'Sunday' WHEN DATEPART(WeekDay, @InputDate) = 2 THEN 'Monday' WHEN DATEPART(WeekDay, @InputDate) = 3 THEN 'Tuesday' WHEN DATEPART(WeekDay, @InputDate) = 4 THEN 'Wednesday' WHEN DATEPART(WeekDay, @InputDate) = 5 THEN 'Thursday' WHEN DATEPART(WeekDay, @InputDate) = 6 THEN 'Friday' WHEN DATEPART(WeekDay, @InputDate) = 7 THEN 'Saturday' ELSE 'Error' END /* Now put this inside a FUNCTION declaration */ CREATE FUNCTION fnDayOfWeek (@InputDate DATETIME) RETURNS NVARCHAR(50) AS BEGIN --Return variable DECLARE @DayOfTheWeek AS NVARCHAR(50) --SELECT CASE statement here RETURN (@DayOfTheWeek) END --DROP FUNCTION fnDayOfWeek CREATE FUNCTION fnDayOfWeek (@InputDate DATETIME) RETURNS NVARCHAR(50) AS BEGIN DECLARE @DayOfTheWeek AS NVARCHAR(50) SELECT @DayOfTheWeek = CASE WHEN DATEPART(WeekDay, @InputDate) = 1 THEN 'Sunday' WHEN DATEPART(WeekDay, @InputDate) = 2 THEN 'Monday' WHEN DATEPART(WeekDay, @InputDate) = 3 THEN 'Tuesday' WHEN DATEPART(WeekDay, @InputDate) = 4 THEN 'Wednesday' WHEN DATEPART(WeekDay, @InputDate) = 5 THEN 'Thursday' WHEN DATEPART(WeekDay, @InputDate) = 6 THEN 'Friday' WHEN DATEPART(WeekDay, @InputDate) = 7 THEN 'Saturday' ELSE 'Error' END RETURN (@DayOfTheWeek) END --Now replace the CASE statement with a function call SELECT ShippedDate, dbo.fnDayOfWeek(ShippedDate) AS WeekDayPart FROM Orders WHERE dbo.fnDayOfWeek(ShippedDate) <> 'Error' SELECT dbo.fnDayOfWeek(ShippedDate) AS WeekDayPart, COUNT(*) As DayCnt FROM Orders GROUP BY dbo.fnDayOfWeek(ShippedDate) ORDER BY WeekDayPart /* Are there seasonal patterns? That is, across the entire year? */ SELECT DATEPART(Month, OrderDate) AS WeekDayPart, COUNT(*) As DayCnt FROM Orders GROUP BY DATEPART(Month, OrderDate) ORDER BY WeekDayPart /* We can GROUP BY the results of a string manipulation Create a distribution of counts for area codes for USA customers Use SUBSTRING (expression, start, length) To strip out the Area Code portion of the phone number */ SELECT * FROM Customers SELECT Phone, SUBSTRING (Phone, 2, 3) AS AreaCode FROM Customers WHERE Country = 'USA' SELECT SUBSTRING (Phone, 2, 3) AS AreaCode, COUNT(*) AS AreaCodeCnt FROM Customers WHERE Country = 'USA' GROUP BY SUBSTRING (Phone, 2, 3) /* We can GROUP BY the result of a calculation. The Chief Financial Officer just called. Their group needs a distribution of revenue for all orders in increments of $100 (USD). For example, in the sample data below, 37 orders had revenue below $100. 48 orders were between $100 and $199. RevenueInHundreds Cnt 0 37 1 48 2 45 3 49 4 55 5 42 6 44 7 31 : : and so on */ --Generate revenue result SELECT OrderID, UnitPrice*(1-Discount)*Quantity AS OrderRevenue, * FROM [Order Details] --SUM revenue by order SELECT OrderID, SUM(UnitPrice*(1-Discount)*Quantity) AS OrderRevenue FROM [Order Details] GROUP BY OrderID /* This will convert revenue into a categorical variable. Divide Revenue into 100 increments, that is, divide by 100 CAST(OrderRevenue AS INT)/100 AS Revenue */ SELECT OrderID, CAST(OrderRevenue AS INT)/100 AS Revenue, OrderRevenue FROM (SELECT OrderID, SUM(UnitPrice*(1-Discount)*Quantity) AS OrderRevenue FROM [Order Details] GROUP BY OrderID) AS vwOrderRevenue --Compute counts for each increment SELECT CAST(OrderRevenue AS INT)/100 AS Revenue, COUNT(*) AS FreqDist FROM (SELECT OrderID, SUM(UnitPrice*(1-Discount)*Quantity) AS OrderRevenue FROM [Order Details] GROUP BY OrderID) AS vwOrderRevenue GROUP BY (CAST(OrderRevenue AS INT)/100) ORDER BY Revenue --Generate a graph in Excel SELECT EmployeeID, LastName FROM Employees EXCEPT SELECT EmployeeID, LastName FROM Employees SELECT LastName, EmployeeID FROM Employees EXCEPT SELECT EmployeeID, LastName FROM Employees SELECT EmployeeID, LastName FROM Employees EXCEPT SELECT EmployeeID AS somealias, LastName FROM Employees