Before the client took the last ditch effort of just restarting SQL Server, I checked traces. There were 9 user traces collecting a ton of trace events.
I manually killed them all and suddenly performance returned to normal. Phew! Crisis averted.
As a follow up from that issue, I created a script that stops and then deletes all user traces. We are discussing setting up a job to run this script periodically to keep traces at bay and I am educating the group on proper trace discipline (and Extended Events).
-- Turn off (## rows returned) messages
SET NOCOUNT ON;
-- Set the stage by declaring working variables
DECLARE @SQLcmd VARCHAR(1024),
@TraceID SMALLINT
-- Create a temp table to hold the results of the trace detail
CREATE TABLE #TraceDetail
(TraceID SMALLINT,
Property SMALLINT,
Value SQL_VARIANT
)
-- Populate the temp table with only user trace data (TraceID 1 is the default system trace)
-- In our environment, TraceID #2 is our monitoring software, so we actually have WHERE TraceID > 2
INSERT INTO #TraceDetail (TraceID, Property, Value)
SELECT * FROM ::fn_trace_getinfo(NULL) WHERE TraceID > 1
-- Check to see if there is anything in the table, and if so, start stopping and killing the traces
WHILE (SELECT COUNT(*) FROM #TraceDetail) > 1
BEGIN
-- Just grab the first ID in the table
SELECT TOP 1 @TraceID = TraceID FROM #TraceDetail
-- Build the command to stop (status = 0) and then delete (status = 2) the trace
SELECT @SQLcmd = 'EXEC sp_trace_setstatus @traceid = ' + CONVERT(VARCHAR(3), @TraceID) + ', @status = 0;
EXEC sp_trace_setstatus @traceid = ' + CONVERT(VARCHAR(3), @TraceID) + ', @status = 2;'
-- Execute the command
EXEC (@SQLcmd)
-- Report what we killed
PRINT 'Killed TraceID ' + CONVERT(VARCHAR(3), @TraceID)
-- Delete all entries for that TraceID (typically 5 rows each)
DELETE FROM #TraceDetail WHERE TraceID = @TraceID
END
-- Drop the temp table
DROP TABLE #TraceDetail