/* CIS 324 CHAPTER 7 GROUP BY I */ /* Processing groups of rows. Grouping Data pp. 260 Common uses of GROUP BY: frequency distributions, looking for patterns SELECT attribute list FROM table/view/SQL expression WHERE condition(s) GROUP BY attribute list HAVING condition ORDER BY attribute list [ASC | DESC] */ /* Examples of pattern analyis in the NorthWind Database... Seasonal patterns in the Northwind database? Suppose the marketing people want to target periods of low volume for a special promotional campaign. */ USE Northwind SELECT * FROM Orders SELECT TOP 100 DATEPART(dayofyear,OrderDate) AS DayOfYear, * FROM Orders ORDER BY DayOfYear --Once I have spot checked a few values SELECT DATEPART(dayofyear,OrderDate) AS DayOfYear, COUNT(*) As OrderCountByDay FROM Orders GROUP BY DATEPART(dayofyear,OrderDate) ORDER BY DayOfYear --the unit of analysis seems a bit too small. --Let's try month SELECT DATEPART(Month,OrderDate) AS Month, COUNT(*) As OrderCountByDay FROM Orders GROUP BY DATEPART(Month,OrderDate) ORDER BY DATEPART(Month,OrderDate) SELECT DATEPART(Month,OrderDate) AS Month, COUNT(*) As OrderCountByDay, SUM(Freight) AS SumFreight, AVG(Freight) As AvgFreight FROM Orders GROUP BY DATEPART(Month,OrderDate) ORDER BY DATEPART(Month,OrderDate) /* SELECT attribute list FROM table/view/SQL expression WHERE condition(s) GROUP BY attribute list HAVING condition ORDER BY attribute list [ASC | DESC] GROUP BY attribute is similar to a control variable in a nested loop or "control break" algorithm. Aggregate functions act on the innermost GROUP BY attribute SELECT attribute1, attribute2, aggregate FROM table GROUP BY attribute1, attribute2 Aggregate is for attribute2 only I usually write the query in the same order as the syntax: code a little, test a lot... SELECT FROM WHERE GROUP BY HAVING ORDER BY */ select * from orders /* The director of Human Resources just called. It is time to work out this year's employee bonuses. First, she wanted to know how many orders Nancy Davolio (EmployeeID 1) placed in 1997? */ --Build incrementally SELECT * FROM Orders ORDER BY EmployeeID SELECT EmployeeID, '' AS OrderCount FROM Orders SELECT EmployeeID, '' AS OrderCount, YEAR(OrderDate) AS OrderYear FROM Orders WHERE EmployeeID = 1 SELECT EmployeeID, '' AS OrderCount, YEAR(OrderDate) AS OrderYear FROM Orders WHERE EmployeeID = 1 AND YEAR(OrderDate) = 1997 --This throws an exception because no GROUP BY SELECT EmployeeID, COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE EmployeeID = 1 AND YEAR(OrderDate) = 1997 SELECT COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE EmployeeID = 1 AND YEAR(OrderDate) = 1997 /* Well, actually what she really wants is a list of the top 3 sellers. That is, who were three employees with the most orders for 1997. You could run our query above for all the employees one by one. */ SELECT COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE EmployeeID = 1 AND YEAR(OrderDate) = 1997 SELECT COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE EmployeeID = 2 AND YEAR(OrderDate) = 1997 SELECT COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE EmployeeID = 3 AND YEAR(OrderDate) = 1997 SELECT COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE EmployeeID = 4 AND YEAR(OrderDate) = 1997 SELECT COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE EmployeeID = 5 AND YEAR(OrderDate) = 1997 --We could combine the result with UNION SELECT COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE EmployeeID = 1 AND YEAR(OrderDate) = 1997 UNION SELECT COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE EmployeeID = 2 AND YEAR(OrderDate) = 1997 UNION SELECT COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE EmployeeID = 3 AND YEAR(OrderDate) = 1997 UNION SELECT COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE EmployeeID = 4 AND YEAR(OrderDate) = 1997 UNION SELECT COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE EmployeeID = 5 AND YEAR(OrderDate) = 1997 ORDER BY OrderCount --The better solution is to use a GROUP BY clause --Delete the EmployeeID = n predicate SELECT EmployeeID, COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE YEAR(OrderDate) = 1997 GROUP BY EmployeeID SELECT EmployeeID, COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE YEAR(OrderDate) = 1997 GROUP BY EmployeeID ORDER BY OrderCount DESC SELECT TOP 3 EmployeeID, COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE YEAR(OrderDate) = 1997 GROUP BY EmployeeID ORDER BY OrderCount DESC /* What is wrong with this query? */ SELECT EmployeeID, OrderDate, COUNT(*) AS OrderCount--, YEAR(OrderDate) AS OrderYear FROM Orders WHERE YEAR(OrderDate) = 1997 GROUP BY EmployeeID SELECT * FROM Orders WHERE EmployeeID = 5 AND YEAR(OrderDate) = 1997 --Another testing technique I use is to implement ORDER BY first /* The only attributes permitted in the SELECT statement are aggregate functions (COUNT, SUM, MAX...) and the GROUP BY attributes Server: Msg 8120, Level 16, State 1, Line 1 Column 'attribute' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause. */ --You can't put OrderDate, for example /* Count the number of products in a given category */ SELECT * FROM Products SELECT * FROM Products ORDER BY CategoryID SELECT CategoryID, COUNT(*) AS CategoryCount FROM Products GROUP BY CategoryID SELECT CategoryID, COUNT(*) AS CategoryCount FROM Products GROUP BY CategoryID ORDER BY CategoryCount