Writing complex analytical queries with self-joins and subqueries? Window functions perform calculations across rows without grouping.
Common Window Function Patterns:
-- Basic structure
SELECT
column1,
column2,
WINDOW_FUNCTION() OVER (
PARTITION BY partition_column
ORDER BY sort_column
ROWS/RANGE BETWEEN frame_start AND frame_end
) AS calculated_column
FROM table_name;
-- Available functions:
-- Ranking: ROW_NUMBER(), RANK(), DENSE_RANK(), NTILE()
-- Analytics: LEAD(), LAG(), FIRST_VALUE(), LAST_VALUE(), NTH_VALUE()
-- Aggregates: SUM(), AVG(), COUNT(), MIN(), MAX() (as window functions)
Real-World E-commerce Analytics:
-- Customer purchasing behavior analysis
SELECT
CustomerId,
OrderDate,
TotalAmount,
-- Running total per customer
SUM(TotalAmount) OVER (
PARTITION BY CustomerId
ORDER BY OrderDate
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS CustomerLifetimeValue,
-- Average order value per customer
AVG(TotalAmount) OVER (
PARTITION BY CustomerId
) AS AvgOrderValue,
-- Difference from previous order
TotalAmount - LAG(TotalAmount, 1) OVER (
PARTITION BY CustomerId
ORDER BY OrderDate
) AS AmountChangeFromPrevious,
-- Days since last order
DATEDIFF(DAY,
LAG(OrderDate, 1) OVER (
PARTITION BY CustomerId
ORDER BY OrderDate
),
OrderDate
) AS DaysSinceLastOrder,
-- Rank orders by amount per customer
RANK() OVER (
PARTITION BY CustomerId
ORDER BY TotalAmount DESC
) AS OrderRankByAmount,
-- Percent of customer's total spending
TotalAmount * 100.0 / SUM(TotalAmount) OVER (
PARTITION BY CustomerId
) AS PercentOfCustomerTotal,
-- Moving average of last 3 orders
AVG(TotalAmount) OVER (
PARTITION BY CustomerId
ORDER BY OrderDate
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS MovingAvg3Orders
FROM Orders
WHERE OrderDate >= DATEADD(MONTH, -12, GETDATE())
ORDER BY CustomerId, OrderDate;
Advanced: Gap and Island Problems
-- Find continuous date ranges (islands)
WITH DateEvents AS (
SELECT
UserId,
LoginDate,
-- Mark start of new island when gap > 1 day
CASE WHEN DATEDIFF(DAY,
LAG(LoginDate) OVER (PARTITION BY UserId ORDER BY LoginDate),
LoginDate
) > 1
THEN 1
ELSE 0
END AS IslandStart
FROM UserLogins
),
IslandGroups AS (
SELECT
UserId,
LoginDate,
-- Create island groups
SUM(IslandStart) OVER (
PARTITION BY UserId
ORDER BY LoginDate
) AS IslandId
FROM DateEvents
)
SELECT
UserId,
MIN(LoginDate) AS IslandStartDate,
MAX(LoginDate) AS IslandEndDate,
DATEDIFF(DAY, MIN(LoginDate), MAX(LoginDate)) + 1 AS IslandLengthDays,
COUNT(*) AS LoginsInIsland
FROM IslandGroups
GROUP BY UserId, IslandId
HAVING COUNT(*) >= 3 -- Only islands with 3+ consecutive logins
ORDER BY UserId, IslandStartDate;
-- Result: Identifies streaks of consecutive daily logins
