/* Normalization inclass demos These examples came from previous CIS 324 exams. The normalization portion of our exam will be similar to these. Again, you are not responsible for the SQL syntax on the exam. I would create these tables in the Northwind Database */ USE Northwind --DROP TABLE tblStudentEnroll CREATE TABLE tblStudentEnroll ( Jnumber CHAR(9), --identifies a user CRN INT, --identifies a class section LastName NVARCHAR(50), Grade NVARCHAR(2) ) /*Highlight all INSERT statements as one block or run one at a time. */ INSERT INTO tblStudentEnroll (Jnumber, CRN, LastName, Grade) VALUES('J00123456',12345 ,'Pardue','A') INSERT INTO tblStudentEnroll (Jnumber, CRN, LastName, Grade) VALUES('J00988885',12345 ,'Smith','B') INSERT INTO tblStudentEnroll (Jnumber, CRN, LastName, Grade) VALUES('J00111111',98765 ,'Angst','C') INSERT INTO tblStudentEnroll (Jnumber, CRN, LastName, Grade) VALUES('J00123456',54321 ,'Pardue','D') SELECT * FROM tblStudentEnroll /* 1. Identify the primary key 2. Identify the dependent attributes (dependency diagram or tokenized table) 3. Draw an ERD or the new relations that normalizes tblStudentEnroll to 3NF 4. Describe an update, delete, and insert anomaly for tblStudentEnroll 5. Explain why tblStudentEnroll is not normalized to 3NF */ --1. Identify the primary key (minimal set of attributes that uniquely identifies the row) --Jnumber? No SELECT DISTINCT COUNT(*), Jnumber FROM tblStudentEnroll GROUP BY Jnumber --CRN? No SELECT DISTINCT COUNT(*), CRN FROM tblStudentEnroll GROUP BY CRN SELECT * FROM tblStudentEnroll --Jnumber and CRN? Yes. SELECT DISTINCT COUNT(*), Jnumber, CRN FROM tblStudentEnroll GROUP BY Jnumber, CRN --2. Identify the dependent attributes (dependency diagram or tokenized table) /* Generic form of query SELECT COUNT(DISTINCT dependent), determinant FROM table(s) GROUP BY determinant HAVING COUNT(DISTINCT dependent) > 1 */ -- Jnumber --> LastName? yes SELECT COUNT(DISTINCT LastName) AS DistinctValues, Jnumber FROM tblStudentEnroll GROUP BY Jnumber HAVING COUNT(DISTINCT Jnumber) > 1 SELECT * FROM tblStudentEnroll -- Jnumber --> Grade? No SELECT COUNT(DISTINCT Grade) AS DistinctValues, Jnumber FROM tblStudentEnroll GROUP BY Jnumber HAVING COUNT(DISTINCT Jnumber) > 1 -- CRN --> Grade? No SELECT COUNT(DISTINCT Grade) AS DistinctValues, CRN FROM tblStudentEnroll GROUP BY CRN HAVING COUNT(DISTINCT CRN) > 1 --3. Draw an ERD or the new relations that normalizes tblStudentEnroll to 3NF -- We can create versions of the base tables using views CREATE VIEW tblStudentEnrollNew AS SELECT Jnumber, CRN, Grade FROM tblStudentEnroll CREATE VIEW tblStudent AS SELECT DISTINCT Jnumber, LastName FROM tblStudentEnroll --Now we have two tables that are in a 1:M relationship --tblStudentEnrollNew is actually a bridge entity SELECT * FROM tblStudent SELECT * FROM tblStudentEnrollNew --The views can be joined to produce the original result SELECT * FROM tblStudent, tblStudentEnrollNew WHERE tblStudent.Jnumber=tblStudentEnrollNew.Jnumber --PK=FK --4. Describe an update, delete, and insert anomaly for tblStudentEnroll SELECT * FROM tblStudentEnroll --Update anomaly UPDATE tblStudentEnroll SET LastName = 'Pardee' WHERE CRN = 12345 AND Jnumber = 'J00123456' --We now have an inconsistency in the database. --Reverse the change... UPDATE tblStudentEnroll SET LastName = 'Pardue' WHERE CRN = 12345 AND Jnumber = 'J00123456' SELECT * FROM tblStudentEnroll --Insert anomaly INSERT INTO tblStudentEnroll (Jnumber, LastName) VALUES ('J00222222', 'Leopold') --This adds a student before they register for a class. --There is now a NULL in the composite primary key (Jnumber, CRN) --Reverse the change... DELETE tblStudentEnroll WHERE Jnumber = 'J00222222' --Delete anomaly DELETE tblStudentEnroll WHERE CRN = 98765 SELECT * FROM tblStudentEnroll --We have potentially deleted all reference to the student --Angst. --Reverse change INSERT INTO tblStudentEnroll (Jnumber, CRN, LastName, Grade) VALUES('J00111111',98765 ,'Angst','C') --This problem doesn't go away if we add an identity field (surrogate key) ALTER TABLE tblStudentEnroll ADD StudentEnrollID INT IDENTITY(1,1) PRIMARY KEY SELECT * FROM tblStudentEnroll --The surrogate key StudentEnrollID makes each row unique, but it doesn't prevent --a developer from introducing anomalies. /* 5. Explain why tblStudentEnroll is not in 3NF tblStudentEnroll is not in 3NF because it is not in 2NF. Normal forms are like concentric circles. A relation that is in 3NF must be in 2NF which must be in 1NF. Conversely, if a relation isn't in 1NF, it isn't in either 2NF or 3NF. tblStudentEnroll has a partial dependency. Jnumber --> LastName Lastname is not dependent on the whole key and nothing but the key. Lastname is determined by Jnumber only. The problem with this relation is that it is combining data from two entities. tblStudentEnroll is really an intersection table between Student and Class. */ /* 1. Identify the primary key 2. Identify the dependent attributes (dependency diagram or tokenized table) 3. Draw an ERD or the new relations that normalizes tblMemberVisit to 3NF 4. Describe an update, delete, and insert anomaly for tblMemberVisit 5. Explain why tblMemeberVisit is not in 3NF */ --DROP TABLE tblMemberVisit CREATE TABLE tblMemberVisit ( ServerLogID INT, MemberID INT, MemberLastName NVARCHAR(50), LoginTime DATETIME, LogoutTime DATETIME ) INSERT INTO tblMemberVisit (ServerLogID, MemberID, MemberLastName, LoginTime, LogoutTime) VALUES (1454, 23, 'Davis', '2005-11-05 13:09', '2005-11-05 14:00') INSERT INTO tblMemberVisit (ServerLogID, MemberID, MemberLastName, LoginTime, LogoutTime) VALUES (1455, 58, 'Luce', '2005-11-05 13:09', '2005-11-05 16:31') INSERT INTO tblMemberVisit (ServerLogID, MemberID, MemberLastName, LoginTime, LogoutTime) VALUES (1456, 23, 'Davis', '2005-11-06 1:00', '2005-11-06 4:00') INSERT INTO tblMemberVisit (ServerLogID, MemberID, MemberLastName, LoginTime, LogoutTime) VALUES (1457, 58, 'Luce', '2005-11-06 3:10', '2005-11-06 4:27') INSERT INTO tblMemberVisit (ServerLogID, MemberID, MemberLastName, LoginTime, LogoutTime) VALUES (1458, 62, 'McGann', '2005-11-07 16:08', '2005-11-07 17:00') SELECT * FROM tblMemberVisit --1. Identify the primary key (minimal set of attributes that uniquely identifies the row) -- ServerLogID? Yes SELECT DISTINCT COUNT(*), ServerLogID FROM tblMemberVisit GROUP BY ServerLogID -- MemberID? No SELECT DISTINCT COUNT(*), MemberID FROM tblMemberVisit GROUP BY MemberID --ServerLogID, MemberID? Yes, but not a minimal set of attributes. This would be true of all combinations with VisitID. SELECT DISTINCT COUNT(*), ServerLogID, MemberID FROM tblMemberVisit GROUP BY ServerLogID, MemberID --2. Identify the dependent attributes (dependency diagram or tokenized table) --Generic form of query SELECT COUNT(DISTINCT dependent), determinant FROM table(s) GROUP BY determinant HAVING COUNT(DISTINCT dependent) > 1 --Since we have a single attribute primary key, we know it is in 2NF. Look for transitive relationships. --However, the MemberID, LoginTime is unique. This would suggest we look for partial dependencies. --It would depend on what VisitID is. If it is the log number from the web server or a surrogate. But --either way we get the same result. -- MemberID --> MemberLastName? Yes SELECT COUNT(DISTINCT MemberLastName), MemberID FROM tblMemberVisit GROUP BY MemberID HAVING COUNT(DISTINCT MemberLastName) > 1 SELECT * FROM tblMemberVisit --3. Draw an ERD or the new relations that normalizes tblMemberVisit to 3NF CREATE VIEW tblMember AS SELECT DISTINCT MemberID, MemberLastName FROM tblMemberVisit CREATE VIEW tblMemberVisitNew AS SELECT ServerLogID, MemberID, LoginTime, LogoutTime FROM tblMemberVisit SELECT * FROM tblMember SELECT * FROM tblMemberVisitNew --And join it back together if necessary SELECT * FROM tblMember, tblMemberVisitNew WHERE tblMember.MemberID=tblMemberVisitNew.MemberID --PK=FK --4. Describe an update, delete, and insert anomaly for tblMemberVisit --Update SELECT * FROM tblMemberVisit UPDATE tblMemberVisit SET MemberLastName = 'Davison' WHERE ServerLogID = 1454 --This statement introduces an inconsistency in the database. What is Davis' real name? --Reverse change... UPDATE tblMemberVisit SET MemberLastName = 'Davis' WHERE ServerLogID = 1454 --Delete DELETE tblMemberVisit WHERE ServerLogID = 1457 --Potentially deleted any reference to the member 'McGann' --Reverse change... INSERT INTO tblMemberVisit (ServerLogID, MemberID, MemberLastName, LoginTime, LogoutTime) VALUES (1457, 62, 'McGann', '2005-11-07 16:08', '2005-11-07 17:00') INSERT INTO tblMemberVisit (ServerLogID, MemberID, MemberLastName, LoginTime, LogoutTime) VALUES (1457, 58, 'Luce', '2005-11-06 3:10', '2005-11-06 4:27') SELECT * FROM tblMemberVisit --Insert INSERT INTO tblMemberVisit (MemberID, MemberLastName) VALUES (135, 'Pardue') --This statement just introduced a NULL for the primary key --Reverse change... DELETE tblMemberVisit WHERE MemberID = 135 /* 5. Explain why tblMemberVisit is not in 3NF tblMemberVisit is not in 3NF because it contains a transitive dependency. MemberID --> MemberLastName MemberLastname is dependent on a non-key attribute. The problem with this relation is that it is combining data from two entities. Member data such as lastname should be in the member table. Data regarding visits to a website should be in the visit table. */ --Write the SQL to create the new tables for tblStudentEnroll /* The functional dependency Jnumber --> LastName suggests there is another theme included in the relation. That is, a Student theme or entity. */ SELECT * FROM tblStudentEnroll --Create a new table for the second theme --DROP TABLE tblStudent SELECT DISTINCT Jnumber, LastName INTO tblStudent FROM tblStudentEnroll SELECT Jnumber, LastName FROM tblStudent --Add a surrogate primary key to our new relation ALTER TABLE tblStudent ADD StudentID INT IDENTITY(1,1) PRIMARY KEY SELECT StudentID, Jnumber, LastName FROM tblStudent --Create a FK in tblStudentEnroll ALTER TABLE tblStudentEnroll ADD StudentID INT NULL SELECT StudentID, Jnumber, LastName FROM tblStudent SELECT * FROM tblStudentEnroll --Link the two tables by inserting the PK value into the FK UPDATE tblStudentEnroll SET tblStudentEnroll.StudentID = tblStudent.StudentID --SELECT * FROM tblStudentEnroll, tblStudent WHERE tblStudentEnroll.Jnumber=tblStudent.Jnumber SELECT * FROM tblStudentEnroll --3D. Create a referential integrity constraint ALTER TABLE tblStudentEnroll ADD CONSTRAINT fk_StudentID FOREIGN KEY (StudentID) REFERENCES tblStudent(StudentID) --Cleanup. Remove copied columns from tblStudentEnroll --Very very dangerous operation! ALTER TABLE tblStudentEnroll DROP COLUMN Jnumber, LastName SELECT StudentEnrollID, StudentID, CRN, Grade FROM tblStudentEnroll SELECT * FROM tblStudent, tblStudentEnroll WHERE tblStudent.StudentID=tblStudentEnroll.StudentID --Write the SQL to create the new tables /* The functional dependency Jnumber --> LastName suggests there is another theme included in the relation. That is, a Student theme or entity. */ SELECT * FROM tblMemberVisit --Create a new table for the second theme --DROP TABLE tblMember SELECT DISTINCT MemberID, MemberLastName INTO tblMember FROM tblMemberVisit SELECT MemberID, MemberLastName FROM tblMember /* The situation here for the surrogate key is a little different. Do we dispense with the old value or create a new set? We can't modify MemberID to be an IDENITY attribute using T-SQL. for example: */ ALTER TABLE tblMember ALTER COLUMN MemberID IDENTITY(1,1) PRIMARY KEY /* But we can change it in Enterprise manager... And so when we insert a new member, the sequence picks up where the last value left off. */ INSERT INTO tblMember (MemberLastName) VALUES('Landry') SELECT MemberID, MemberLastName FROM tblMember /* But we do need to add a Primary Key constraint */ ALTER TABLE tblMember ADD CONSTRAINT CnstMember_PK PRIMARY KEY (MemberID) /* If we use MemberID as the primary key, we don't need to create a foreign key in tblMemberVisite because it is already there. */ SELECT ServerLogID, MemberID, LoginTime, FROM tblMemberVisit --Create a FK in tblStudentEnroll --ALTER TABLE tblMemberVisit --ADD MemberID INT NULL --Nor do we need to Link the two tables by inserting the PK value into the FK UPDATE tblMemberVisit SET tblMemberVisit.MemberID = tblMember.MemberID FROM tblMemberVisit, tblMember WHERE tblMemberVisit.PreviousPK=tblMember.PrevousPK --3D. Create a referential integrity constraint ALTER TABLE tblMemberVisit ADD CONSTRAINT fk_MemberID FOREIGN KEY (MemberID) REFERENCES tblMember(MemberID) --Cleanup. Remove copied columns --Very very dangerous operation! ALTER TABLE tblMemberVisit DROP COLUMN MemberLastName SELECT * FROM tblMemberVisit --And then demonstrate result with JOIN CREATE VIEW vwMemberVisit AS SELECT ServerLogID, tblMemberVisit.MemberID, tblMember.MemberLastName, LoginTime, LogoutTime FROM tblMember, tblMemberVisit WHERE tblMember.MemberID=tblMemberVisit.MemberID SELECT * FROM vwMemberVisit INSERT INTO vwMemberVisit (...) values (...)