/* CIS 324 CHAPTER 7 GROUP BY I */ /* Processing groups of rows. Grouping Data pp. 260 Common uses of GROUP BY: frequency distributions, looking for patterns SELECT attribute list FROM table/view/SQL expression WHERE condition(s) GROUP BY attribute list HAVING condition ORDER BY attribute list [ASC | DESC] */ /* Note: you don't have the SqlExerciseSystem database on your local machine and so you won't be able to run the SqlExerciseSystem queries. The avi accompanying this script (GROUP BY intro) contains video of these queries. */ USE SqlExerciseSystem --Frequency Distributions --List the top ten error messages by students in the SQL Exercise System --I could use this information to help me refine my lesson plans and lecture SELECT ErrorNumber, ErrorCount, Description FROM (SELECT TOP 10 ErrorNumber, COUNT(*) AS ErrorCount FROM tblExerciseResponse WHERE ErrorNumber IS NOT NULL AND ErrorNumber <> 0 GROUP BY ErrorNumber ORDER BY COUNT(*) DESC) AS vwTopTenErrors, (SELECT Error, Description FROM Master.dbo.sysMessages) AS vwSystemMessages WHERE vwTopTenErrors.ErrorNumber=vwSystemMessages.Error ORDER BY ErrorCount DESC SELECT * FROM Master.dbo.sysMessages --The above result can be pasted into Excel and graphs generated very easily. --Or we can use the result in a reporting engine. --Frequency distributions /* How many students have correctly answered each exercise so far? That is, produce a frequency distribution of correct responses by exercise. In this case, this is such a frequent operation, that I encapsulated the query into a stored procedure and parameterized the query. Our ClassID = 30 this semester. */ EXEC GetExerciseProgress 30, '2007-01-01 00:00:00.000', 21 ALTER PROC GetExerciseProgress @ClassID INT, @BeginDate DATETIME = NULL, @EnrollmentCount INT AS BEGIN SELECT AssignmentShortName, ExerciseShortName, (SELECT COUNT(*) AS ResponseCount FROM tblExerciseResponse WHERE PersonID IN(SELECT PersonID FROM tblRoster WHERE ClassID=@ClassID) AND tblExerciseResponse.ExerciseID = tblExercise.ExerciseID AND tblExerciseResponse.SubmitTime > @BeginDate) AS ResponseCount, (SELECT COUNT(DISTINCT PersonID) AS ResponseCount FROM tblExerciseResponse WHERE Correct = 1 AND PersonID IN(SELECT PersonID FROM tblRoster WHERE ClassID=@ClassID) AND tblExerciseResponse.ExerciseID = tblExercise.ExerciseID AND tblExerciseResponse.SubmitTime > @BeginDate) AS CorrectCount, CAST((SELECT COUNT(DISTINCT PersonID) AS ResponseCount FROM tblExerciseResponse WHERE Correct = 1 AND PersonID IN(SELECT PersonID FROM tblRoster WHERE ClassID=@ClassID) AND tblExerciseResponse.ExerciseID = tblExercise.ExerciseID AND tblExerciseResponse.SubmitTime > @BeginDate)AS REAL)/@EnrollmentCount*100 AS PercentOfClassCorrect FROM tblAssignment, tblAssignmentExercise, tblExercise WHERE tblAssignment.AssignmentID=tblAssignmentExercise.AssignmentID AND tblExercise.ExerciseID=tblAssignmentExercise.ExerciseID AND tblAssignment.ClassID = @ClassID END /* Pattern or Trend Analysis When do students tend to use the sqlexercise system by hour of day? Let's say I want to know the best time of the day to conduct maintenance on the server. This would be the time of day when usage tends to be smallest. --SELECT TOP 100 * FROM tblExerciseResponse --Two ways to pull out the hour: hour() or datepart */ SELECT DATEPART(hour, SubmitTime) AS HOUR, COUNT(*) AS SubmitCount FROM tblExerciseResponse GROUP BY DATEPART(hour, SubmitTime) ORDER BY DATEPART(hour, SubmitTime) SELECT DATEPART(Weekday, SubmitTime) AS HOUR, COUNT(*) AS SubmitCount FROM tblExerciseResponse GROUP BY DATEPART(Weekday, SubmitTime) ORDER BY DATEPART(Weekday, SubmitTime) /* Day of the week? Sunday = 1, Saturday = 7 Last semester in CIS 324 Is there a pattern of usage by day of week? */ SELECT DATEPART(weekday,SubmitTime) AS DayOfWeek, COUNT(*) AS ResponseCount FROM tblExerciseResponse, tblExercise WHERE PersonID IN(SELECT PersonID FROM tblRoster WHERE ClassID IN(28, 29)) AND tblExerciseResponse.ExerciseID = tblExercise.ExerciseID AND tblExerciseResponse.SubmitTime BETWEEN '2006-09-01 00:00:00.000' AND '2007-01-01 00:00:00.000' GROUP BY DATEPART(weekday,SubmitTime) ORDER BY DATEPART(weekday,SubmitTime) --Day of the week? Sunday = 1, Saturday = 7 --What about this semester? Is it different with an online course? SELECT DATEPART(weekday,SubmitTime) AS DayOfYear, COUNT(*) AS ResponseCount FROM tblExerciseResponse, tblExercise WHERE PersonID IN(SELECT PersonID FROM tblRoster WHERE ClassID IN(30)) AND tblExerciseResponse.ExerciseID = tblExercise.ExerciseID AND tblExerciseResponse.SubmitTime > '2007-01-01 00:00:00.000' GROUP BY DATEPART(weekday,SubmitTime) ORDER BY DATEPART(weekday,SubmitTime) /* We can analyze patterns over time, say the entire semester */ SELECT DATEPART(dayofyear,SubmitTime) AS DayOfYear, COUNT(*) AS ResponseCount FROM tblExerciseResponse, tblExercise WHERE PersonID IN(SELECT PersonID FROM tblRoster WHERE ClassID IN(28, 29)) AND tblExerciseResponse.ExerciseID = tblExercise.ExerciseID AND tblExerciseResponse.SubmitTime BETWEEN '2006-09-01 00:00:00.000' AND '2007-01-01 00:00:00.000' GROUP BY DATEPART(dayofyear,SubmitTime) ORDER BY DATEPART(dayofyear,SubmitTime) --What about our class? SELECT DATEPART(dayofyear,SubmitTime) AS DayOfYear, COUNT(*) AS ResponseCount FROM tblExerciseResponse, tblExercise WHERE PersonID IN(SELECT PersonID FROM tblRoster WHERE ClassID IN(30)) AND tblExerciseResponse.ExerciseID = tblExercise.ExerciseID AND tblExerciseResponse.SubmitTime > '2007-01-01 00:00:00.000' GROUP BY DATEPART(dayofyear,SubmitTime) ORDER BY DATEPART(dayofyear,SubmitTime)