/* Subqueries in the HAVING clause Uncorrelated SELECT attributelist FROM table1 WHERE condition list GROUP BY attribute list HAVING attribute [comparison operator] (SELECT attribute FROM table2 WHERE condition list) Correlated SELECT attributelist FROM table1 WHERE condition list GROUP BY attribute list HAVING attribute [comparison operator] (SELECT attribute FROM table2 WHERE tabl1.Pk=tabl2.Fk AND condition list) */ /* List SupplierID and AvgUnitPrice of products for each Supplier whose AvgUnitPrice is greater than the average unitprice for all products. */ Use Northwind --Compute the average SELECT AVG(UnitPrice) AS AvgUnitPrice FROM Products --Construct the GROUP BY SELECT SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products GROUP BY SupplierID ORDER BY AvgUnitPrice --Filter groups with HAVING SELECT SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products GROUP BY SupplierID HAVING AVG(UnitPrice) > 28.8663 ORDER BY AvgUnitPrice --Replace literal with uncorrelated subquery SELECT SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products GROUP BY SupplierID HAVING AVG(UnitPrice) > (SELECT AVG(UnitPrice) AS AvgUnitPrice FROM Products) ORDER BY AvgUnitPrice SELECT SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products GROUP BY SupplierID HAVING AVG(UnitPrice) > (SELECT UnitPrice AS AvgUnitPrice FROM Products) /* Easy... but how about a harder one? Find the average UnitPrice of Products for each Supplier who supplies more than three products. Could solve this in the FROM clause SELECT SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products, (subquery) AS SupplierCnt3 WHERE Products.SupplierID=SupplierCnt3.SupplierID GROUP BY SupplierID OR SELECT SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products GROUP BY SupplierID HAVING 3 < (subquery) */ --Let's do the FROM (view) solution first SELECT * FROM Products --Which suppliers have more than three? SELECT SupplierID, COUNT(*) AS SupplierCnt FROM Products GROUP BY SupplierID ORDER BY SupplierCnt --Restrict rows with the HAVING clause SELECT SupplierID, COUNT(*) AS SupplierCnt FROM Products GROUP BY SupplierID HAVING COUNT(*) > 3 --Insert the uncorrelated subquery in the FROM clause SELECT SupplierCnt3.SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products, (SELECT SupplierID, COUNT(*) AS SupplierCnt FROM Products GROUP BY SupplierID HAVING COUNT(*) > 3) AS SupplierCnt3 WHERE Products.SupplierID=SupplierCnt3.SupplierID GROUP BY SupplierCnt3.SupplierID --Same solution in the WHERE clause SELECT SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products WHERE Products.SupplierID IN(SELECT SupplierID FROM Products GROUP BY SupplierID HAVING COUNT(*) > 3) GROUP BY SupplierID /* Correlated version A subquery in the HAVING clause will have to be correlated. We have to run the inner query once for each supplier to determine whether to print (SELECT) that group. Start with the outer query. */ SELECT SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products GROUP BY SupplierID /* At this point, we can restrict groups by determining if a supplier has more than 3 products. That is... */ SELECT SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products GROUP BY SupplierID HAVING 3 < (subquery) /* Where the subquery looks up the COUNT(*) for each Supplier. */ /* Build subquery We have to add a pk=fk reference in the subquery We can test first with a hardcoded value */ --Test it for supplier 1 SELECT COUNT(*) AS SupplierCnt FROM Products WHERE SupplierID = 2 /* Supplier 1 only has 3 products, so the group for Supplier 1 should not be returned in the output. */ /* Insert the subquery with the hardcoded value just to test. It should return no rows because 3 < 3 evaluates to False. */ SELECT SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products GROUP BY SupplierID HAVING 3 < (SELECT COUNT(*) AS SupplierCnt FROM Products WHERE SupplierID = 1) /* If we tried it for supplier 2, we would print all groups because 3 < 4 evaluates to True */ SELECT SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products GROUP BY SupplierID HAVING 3 < (SELECT COUNT(*) AS SupplierCnt FROM Products WHERE SupplierID = 2) /* Now we add the Pk=Fk comparison with the table in the outer query and it should work. */ SELECT SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products GROUP BY SupplierID HAVING 3 < (SELECT COUNT(*) AS SupplierCnt FROM Products AS ProductsInner WHERE ProductsInner.SupplierID = Products.SupplierID) SELECT SupplierCnt3.SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products, (SELECT SupplierID, COUNT(*) AS SupplierCnt FROM Products GROUP BY SupplierID HAVING COUNT(*) > 3) AS SupplierCnt3 WHERE Products.SupplierID=SupplierCnt3.SupplierID GROUP BY SupplierCnt3.SupplierID SELECT SupplierID, AVG(UnitPrice) AS AvgUnitPrice FROM Products WHERE Products.SupplierID IN(SELECT SupplierID FROM Products GROUP BY SupplierID HAVING COUNT(*) > 3) GROUP BY SupplierID /* Here is a more involved example. Let's say the folks in sales at Northwind are considering an annaul rebate program for customers who consistently place orders over a certain dollar amount, say $700. They want to issue an end-of-year credit to these customers. The credit is computed as (NumberOfOrdersPlaced*15). So a customer who placed 10 orders during say 1998, would receive a credit of $150. List CustomerID and CreditAmount for all customers for whom all orders placed during 1998 exceeded $700 of revenue. Where to start? It sounds like the Customers table will be driving this query. Can we compute the CreditAmount for ALL customers and then worry about filtering for the >700 for all orders placed? Probably... */ SELECT Customers.CustomerID, (COUNT(*)*15) AS CreditAmount FROM Customers, Orders WHERE Customers.CustomerID = Orders.CustomerID GROUP BY Customers.CustomerID /* That computes it for all customers for all years. Add the year predicate. */ SELECT Customers.CustomerID, (COUNT(*)*15) AS CreditAmount FROM Customers, Orders WHERE Customers.CustomerID = Orders.CustomerID AND YEAR(OrderDate) = 1998 GROUP BY Customers.CustomerID /* But sales only wants to give the credit to those customers for whom all orders placed during the year were more than 700 in revenue. If we could compute this value for each customer, we could put this in a HAVING clause and filter groups. */ SELECT Customers.CustomerID, (COUNT(*)*15) AS CreditAmount FROM Customers, Orders WHERE Customers.CustomerID = Orders.CustomerID AND YEAR(OrderDate) = 1998 GROUP BY Customers.CustomerID HAVING 700 < (subquery that looks up the revenue for current Customers.CustomerID) /* So let's start on the subquery. Revenue per order is calculated in Northwind as [Order Details].UnitPrice * [Order Details].Quantity * (1-[Order Details].Discount) summed over all line items in [Order Details] for a given Order. */ --This computes the revenue for all orders in the Northwind Database SELECT OrderID, SUM(UnitPrice * Quantity * (1-Discount)) AS Revenue FROM [Order Details] AS OrderDetail GROUP BY OrderID /* To restrict it to only orders for a given year and customer we'll have to join to the Orders table */ SELECT Orders.OrderID, SUM(UnitPrice * Quantity * (1-Discount)) AS Revenue FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID GROUP BY Orders.OrderID --Now add Year predicate SELECT Orders.OrderID, SUM(UnitPrice * Quantity * (1-Discount)) AS Revenue FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID AND YEAR(OrderDate) = 1998 GROUP BY Orders.OrderID /* And in preparation for making a correlated subquery, let's test our query by hardcoding a specific value for a customer */ SELECT * FROM Customers SELECT Orders.OrderID, SUM(UnitPrice * Quantity * (1-Discount)) AS Revenue FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID AND YEAR(OrderDate) = 1998 AND CustomerID = 'CHOPS' GROUP BY Orders.OrderID /* So... 'CHOPS' had three orders that year and all of them are above 700 in revenue. But what about ALFKI? Two are, but one order is not. ALFKI should not get a credit because one of the orders does not exceed 700. */ SELECT Orders.OrderID, SUM(UnitPrice * Quantity * (1-Discount)) AS Revenue FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID AND YEAR(OrderDate) = 1998 AND CustomerID = 'ALFKI' GROUP BY Orders.OrderID /* Remember what we are trying to get to. We want a subquery that filters groups using the HAVING clause (see below). */ SELECT Customers.CustomerID, (COUNT(*)*15) AS CreditAmount FROM Customers, Orders WHERE Customers.CustomerID = Orders.CustomerID AND YEAR(OrderDate) = 1998 GROUP BY Customers.CustomerID HAVING 700 < (subquery that looks up the revenue for current Customers.CustomerID) /* I think there are at least two ways to do this. One would be to use the ALL operator as in HAVING 700 < ALL(subque... Another approach would be to determine the "lowest" revenue and check if that is above 700. If the lowest is above 700, then we know all other orders will be too! So let's filter the 'ALFKI' result to retrieve the MIN revenue */ SELECT Orders.OrderID, SUM(UnitPrice * Quantity * (1-Discount)) AS Revenue FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID AND YEAR(OrderDate) = 1998 AND CustomerID = 'ALFKI' GROUP BY Orders.OrderID SELECT MIN(UnitPrice) FROM Products SELECT MIN(Revenue) AS LowestRevenue FROM (SELECT Orders.OrderID, SUM(UnitPrice * Quantity * (1-Discount)) AS Revenue FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID AND YEAR(OrderDate) = 1998 AND CustomerID = 'ALFKI' GROUP BY Orders.OrderID) AS MinRevenue /* That seemed to work. Let's try it in the outer query. */ SELECT Customers.CustomerID, (COUNT(*)*15) AS CreditAmount FROM Customers, Orders WHERE Customers.CustomerID = Orders.CustomerID AND YEAR(OrderDate) = 1998 GROUP BY Customers.CustomerID HAVING 700 < (SELECT MIN(Revenue) AS LowestRevenue FROM (SELECT Orders.OrderID, SUM(UnitPrice * Quantity * (1-Discount)) AS Revenue FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID AND YEAR(OrderDate) = 1998 AND CustomerID = 'CHOPS' GROUP BY Orders.OrderID) AS MinRevenue) /* This returned no rows because 700 < 471 evaluated to False. If we tried it with 'CHOPS' we should get all rows. Now make it correlated by adding the Pk=Fk comparison. */ SELECT Customers.CustomerID, (COUNT(*)*15) AS CreditAmount FROM Customers, Orders WHERE Customers.CustomerID = Orders.CustomerID AND YEAR(OrderDate) = 1998 GROUP BY Customers.CustomerID HAVING 700 < (SELECT MIN(Revenue) AS LowestRevenue FROM (SELECT Orders.OrderID, SUM(UnitPrice * Quantity * (1-Discount)) AS Revenue FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID AND YEAR(OrderDate) = 1998 AND CustomerID = Customers.CustomerID GROUP BY Orders.OrderID) AS MinRevenue) /* If we wanted to use the ALL operator, we wouldn't need to find the MIN(). */ SELECT Customers.CustomerID, (COUNT(*)*15) AS CreditAmount FROM Customers, Orders WHERE Customers.CustomerID = Orders.CustomerID AND YEAR(OrderDate) = 1998 GROUP BY Customers.CustomerID HAVING 700 < ALL(SELECT SUM(UnitPrice * Quantity * (1-Discount)) AS Revenue FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID AND YEAR(OrderDate) = 1998 AND CustomerID = Customers.CustomerID GROUP BY Orders.OrderID) /* I had to remove the OrderID attribute and the outer query that identified the MIN value. */ /* Either formulation is rather complex and difficult to understand. But I submit that even so, it is better than writing programming logic in an application (all things being equal of course). I would parameterize the string literals in the query so that it could be used for any year and any cut-off value. */