/* ISC 561 OUTER JOINS --The generic syntax for OUTER joins is: --2 tables SELECT Table1.Attribute, Table2.Attribute... FROM Table1, Table2 WHERE Table1.PrimaryKey*=Table2.ForeignKey AND condition list -- *= means LEFT OUTER JOIN SELECT Table1.Attribute, Table2.Attribute, Table3.Attribute... FROM Table1 LEFT OUTER JOIN Table2 ON Table1.PrimaryKey=Table2.ForeignKey AND condition list LEFT and RIGHT refer to the position of the table in the join statement, i.e., ALL rows from the table on the LEFT of the OUTER JOIN statement */ USE Northwind /* The sales staff need a report that lists all categories with all products in that category that have a unitprice greater than 45. If a category doesn't have a product listed, print "no product currently available" in place of the ProductName. Put 0 for the Unitprice. List CategoryID, CategoryName, ProductName, and UnitPrice for products with a unitprice greater than 45. Order the output by CategoryID, ProductName */ SELECT Categories.CategoryID, CategoryName, ProductName, Products.UnitPrice FROM Categories, Products WHERE Categories.CategoryID=Products.CategoryID AND UnitPrice > 45 ORDER BY Categories.CategoryID, ProductName /* This result shows the categories and products alright, but what about category 2 and 5? How to include them in the result? The problem is, there is no corresponding FK for rows where UnitPrice is > 45. Specifically these two FKs. */ SELECT CategoryID, CategoryName FROM Categories WHERE CategoryID NOT IN(SELECT CategoryID FROM Products WHERE Products.UnitPrice > 45) /* The expression Categories.CategoryID=Products.CategoryID in the WHERE clause eliminates products in category 2 and 5 and therefore there are no matching FK values when Products is joined to Categories. */ /* In order to get rows where the PK=FK AND predicate condition was false, we have to write an OUTER JOIN */ -- *= OR =* SELECT Categories.CategoryID, CategoryName, ProductName, Products.UnitPrice FROM Categories, Products WHERE Categories.CategoryID*=Products.CategoryID AND UnitPrice > 45 ORDER BY Categories.CategoryID, ProductName /* We can write this using the LEFT OUTER RIGHT JOIN syntax */ SELECT Categories.CategoryID, CategoryName, ProductName, Products.UnitPrice FROM Categories LEFT OUTER JOIN Products ON Categories.CategoryID=Products.CategoryID WHERE Products.UnitPrice > 45 ORDER BY Categories.CategoryID, ProductName /* Hmmm... that didn't work right. My understanding is SQL Server executes the WHERE predicate first. Probably for performance reasons. But this results in an unintended result. For SQL Server, you have to put the >45 as an AND (conjunct) to the PK=FK comparison. I think this varies by dbms... */ SELECT Categories.CategoryID, CategoryName, ProductName, Products.UnitPrice FROM Categories LEFT OUTER JOIN Products ON Categories.CategoryID=Products.CategoryID AND Products.UnitPrice > 45 ORDER BY Categories.CategoryID, ProductName /* Replace NULL values */ SELECT Categories.CategoryID, CategoryName, ISNULL(ProductName, 'no product currently available') AS ProductName, ISNULL(UnitPrice, 0.0) AS UnitPrice FROM Categories, Products WHERE Categories.CategoryID*=Products.CategoryID AND UnitPrice > 45 ORDER BY Categories.CategoryID, ProductName SELECT Categories.CategoryID, CategoryName, ISNULL(ProductName, 'no product currently available') AS ProductName, ISNULL(UnitPrice, 0.0) AS UnitPrice FROM Categories LEFT OUTER JOIN Products ON Categories.CategoryID=Products.CategoryID AND Products.UnitPrice > 45 ORDER BY Categories.CategoryID, ProductName /* We'll go over the issue of interpretation of NULL in a later section. But the above technique is one way of removing the potential ambiguity of NULL values. As Kreonke points out, NULLs can mean 1. Not appropriate 2. Not known 3. Not provided or missing (an input error) -9999 */ /* First-week January report. List all product names and the total quantity ordered for the first week of January 1997. [ORDERDATE BETWEEN '1997-01-01' AND '1997-01-07'] */ /* In order to compute the quantity ordered we have to join the Orders and Order Details table. The count is in Order Details and the time stamp is in Orders. We would GROUP BY ProductID in the Order Details table to derive the SUM value. */ SELECT ProductID, SUM(Quantity) AS ProductQty FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID AND OrderDate BETWEEN '1997-01-01' AND '1997-01-07' GROUP BY ProductID /* Per our previous examples, we would use the above query as a subquery or candidate for a view and join the subquery to the Products table in an outer query to retreive non-aggregate attributes. */ SELECT ProductName, ProductQty FROM Products, (SELECT ProductID, SUM(Quantity) AS ProductQty FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID AND OrderDate BETWEEN '1997-01-01' AND '1997-01-07' GROUP BY ProductID) AS vwProductQty WHERE Products.ProductID=vwPRoductQty.ProductID --This result does not include all products --Retrieve all products with an outer join -- *= SELECT ProductName, ProductQty FROM Products, (SELECT ProductID, SUM(Quantity) AS ProductQty FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID AND OrderDate BETWEEN '1997-01-01' AND '1997-01-07' GROUP BY ProductID) AS vwProductQty WHERE Products.ProductID*=vwPRoductQty.ProductID --LEFT OUTER JOIN SELECT ProductName, ProductQty FROM Products LEFT OUTER JOIN (SELECT ProductID, SUM(Quantity) AS ProductQty FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID AND OrderDate BETWEEN '1997-01-01' AND '1997-01-07' GROUP BY ProductID) AS vwProductQty ON Products.ProductID=vwPRoductQty.ProductID --Replace NULL with 0 SELECT ProductName, ISNULL(ProductQty, 0) AS ProductQty FROM Products, (SELECT ProductID, SUM(Quantity) AS ProductQty FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID AND OrderDate BETWEEN '1997-01-01' AND '1997-01-07' GROUP BY ProductID) AS vwProductQty WHERE Products.ProductID*=vwPRoductQty.ProductID SELECT ProductName, ISNULL(ProductQty, 0) AS ProductQty FROM Products LEFT OUTER JOIN (SELECT ProductID, SUM(Quantity) AS ProductQty FROM Orders, [Order Details] AS OrderDetail WHERE Orders.OrderID=OrderDetail.OrderID AND OrderDate BETWEEN '1997-01-01' AND '1997-01-07' GROUP BY ProductID) AS vwProductQty ON Products.ProductID=vwPRoductQty.ProductID --One more? /* The sales group has detected a shipping anomaly with recent orders shipped to 'Helsinki'. They want to know if we could send them a list of all employees and their homephone and the last date they shipped an order to shipcity 'Helsinki'. If an employee hasn't shipped an order to 'Helsinki', print 'NA'. */ /* It seems that the outer query is a list of employees and their phone numbers. */ SELECT LastName, HomePhone, 'somedate' AS LastShippedDate FROM Employees /* The generic syntax would be... */ SELECT LastName, HomePhone, 'somedate' AS LastShippedDate FROM Employees RIGHT OUTER JOIN (subquery) ON Employees.Pk=Subquery.Fk /* A simplier request would have been to list the above result and "all" the ship dates. */ SELECT LastName, HomePhone, 'somedate' AS LastShippedDate, ShippedDate FROM Employees, Orders WHERE Employees.EmployeeID=Orders.EmployeeID AND Orders.ShipCity='Helsinki' ORDER BY LastName, ShippedDate DESC /* Then we could have simply done an OUTER JOIN */ SELECT LastName, HomePhone, 'somedate' AS LastShippedDate, ShippedDate FROM Employees, Orders WHERE Employees.EmployeeID*=Orders.EmployeeID AND Orders.ShipCity='Helsinki' ORDER BY LastName, ShippedDate DESC SELECT LastName, HomePhone, 'somedate' AS LastShippedDate, ShippedDate FROM Employees LEFT OUTER JOIN Orders ON Employees.EmployeeID=Orders.EmployeeID AND Orders.ShipCity='Helsinki' ORDER BY LastName, ShippedDate DESC /* But the request was for a aggregate value: MAX(ShippedDate) Can we simply add the GROUP BY? Yes, but I prefer to write this as a subquery rather than adding non-aggregate attributes to the GROUP BY */ SELECT LastName, HomePhone, MAX(ShippedDate) AS LastShippedDate FROM Employees, Orders WHERE Employees.EmployeeID*=Orders.EmployeeID AND Orders.ShipCity='Helsinki' GROUP BY LastName, HomePhone SELECT LastName, HomePhone, MAX(ShippedDate) AS LastShippedDate FROM Employees LEFT OUTER JOIN Orders ON Employees.EmployeeID=Orders.EmployeeID AND Orders.ShipCity='Helsinki' GROUP BY LastName, HomePhone /* I prefer to write an inner subquery that returns aggregrate statistics by employee and then use an OUTER JOIN to the Employees table. Let's write a generic time-based aggregate "view" for ShipCity='Helsinki' */ SELECT EmployeeID, MAX(ShippedDate) AS LastShippedDate, MIN(ShippedDate) AS FirstShippedDate, AVG(DATEDIFF(DAY, OrderDate, ShippedDate)) AS AvgTimeToShip FROM Orders WHERE ShipCity='Helsinki' GROUP BY EmployeeID /* In our current report, we only need the LastShippedDate. */ SELECT LastName, HomePhone, LastShippedDate FROM Employees, (SELECT EmployeeID, MAX(ShippedDate) AS LastShippedDate, MIN(ShippedDate) AS FistShippedDate, AVG(DATEDIFF(DAY, OrderDate, ShippedDate)) AS AvgTimeToShip FROM Orders WHERE ShipCity='Helsinki' GROUP BY EmployeeID) AS vwHelsinkiOrders WHERE Employees.EmployeeID=vwHelsinkiOrders.EmployeeID /* And now to make it an OUTER JOIN */ SELECT LastName, HomePhone, LastShippedDate FROM Employees, (SELECT EmployeeID, MAX(ShippedDate) AS LastShippedDate, MIN(ShippedDate) AS FistShippedDate, AVG(DATEDIFF(DAY, OrderDate, ShippedDate)) AS AvgTimeToShip FROM Orders WHERE ShipCity='Helsinki' GROUP BY EmployeeID) AS vwHelsinkiOrders WHERE Employees.EmployeeID*=vwHelsinkiOrders.EmployeeID SELECT LastName, HomePhone, LastShippedDate FROM Employees LEFT OUTER JOIN (SELECT EmployeeID, MAX(ShippedDate) AS LastShippedDate, MIN(ShippedDate) AS FistShippedDate, AVG(DATEDIFF(DAY, OrderDate, ShippedDate)) AS AvgTimeToShip FROM Orders WHERE ShipCity='Helsinki' GROUP BY EmployeeID) AS vwHelsinkiOrders ON Employees.EmployeeID=vwHelsinkiOrders.EmployeeID ORDER BY LastName --Replace NULL with 'NA' SELECT LastName, HomePhone, ISNULL(CAST(LastShippedDate AS NVARCHAR), 'NA') AS LastShippedDate FROM Employees, (SELECT EmployeeID, MAX(ShippedDate) AS LastShippedDate, MIN(ShippedDate) AS FistShippedDate, AVG(DATEDIFF(DAY, OrderDate, ShippedDate)) AS AvgTimeToShip FROM Orders WHERE ShipCity='Helsinki' GROUP BY EmployeeID) AS vwHelsinkiOrders WHERE Employees.EmployeeID*=vwHelsinkiOrders.EmployeeID ORDER BY LastName SELECT LastName, HomePhone, ISNULL(CAST(LastShippedDate AS NVARCHAR), 'NA') AS LastShippedDate FROM Employees LEFT OUTER JOIN (SELECT EmployeeID, MAX(ShippedDate) AS LastShippedDate, MIN(ShippedDate) AS FistShippedDate, AVG(DATEDIFF(DAY, OrderDate, ShippedDate)) AS AvgTimeToShip FROM Orders WHERE ShipCity='Helsinki' GROUP BY EmployeeID) AS vwHelsinkiOrders ON Employees.EmployeeID=vwHelsinkiOrders.EmployeeID ORDER BY LastName