SQL is deceptively simple—basic queries are easy, but solving complex business problems requires advanced patterns. From self-joins to window functions, mastering these techniques transforms how you approach data challenges. This guide covers essential advanced SQL patterns from a senior developer's perspective.
Why Advanced SQL Matters
Mastering advanced SQL enables:
- Performance: Move logic to the database where it's fastest
- Simplicity: Replace application code with single queries
- Reliability: Database-level operations are atomic
- Analysis: Complex reporting without data export
- Efficiency: Reduce network round-trips
Correlated Subqueries
Get Top N Per Group
Find the top 15% of sales per region and week:
-- MS Access/MySQL pattern
SELECT *
FROM sales_by_shops_weekly AS sbs
LEFT JOIN shops_count_weekly AS sc
ON sbs.group_id = sc.group_id
AND sbs.week = sc.week
WHERE (
SELECT COUNT(*)
FROM sales_by_shops_weekly AS sbs2
WHERE sbs2.sold > sbs.sold
AND sbs2.week = sbs.week
AND sbs2.group_id = sbs.group_id
) < sc.shops_count * 15 / 100;
This pattern: 1. Counts how many rows have higher sales than the current row 2. Keeps rows where that count is less than 15% of total shops 3. Effectively returns the top 15% performers
Find Rows Without Matching Related Records
-- Find customers with no orders
SELECT c.*
FROM customers c
WHERE NOT EXISTS (
SELECT 1
FROM orders o
WHERE o.customer_id = c.id
);
-- Alternative using LEFT JOIN
SELECT c.*
FROM customers c
LEFT JOIN orders o ON o.customer_id = c.id
WHERE o.id IS NULL;
Window Functions
Window functions perform calculations across related rows without collapsing them.
Running Totals
SELECT
order_date,
amount,
SUM(amount) OVER (ORDER BY order_date) AS running_total,
SUM(amount) OVER (
PARTITION BY customer_id
ORDER BY order_date
) AS customer_running_total
FROM orders;
Ranking Within Groups
SELECT
department,
employee_name,
salary,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dept_rank,
DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dense_rank,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS row_num
FROM employees;
Differences:
RANK(): Gaps after ties (1, 1, 3)DENSE_RANK(): No gaps (1, 1, 2)ROW_NUMBER(): Unique numbers (1, 2, 3)
Comparing to Previous/Next Rows
SELECT
sale_date,
amount,
LAG(amount, 1) OVER (ORDER BY sale_date) AS prev_amount,
LEAD(amount, 1) OVER (ORDER BY sale_date) AS next_amount,
amount - LAG(amount, 1) OVER (ORDER BY sale_date) AS change_from_prev
FROM daily_sales;
First/Last in Group
SELECT DISTINCT
department,
FIRST_VALUE(employee_name) OVER (
PARTITION BY department
ORDER BY hire_date
) AS first_hire,
LAST_VALUE(employee_name) OVER (
PARTITION BY department
ORDER BY hire_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_hire
FROM employees;
Common Table Expressions (CTEs)
CTEs improve readability and enable recursive queries.
Basic CTE
WITH monthly_totals AS (
SELECT
DATE_TRUNC('month', order_date) AS month,
SUM(amount) AS total
FROM orders
GROUP BY DATE_TRUNC('month', order_date)
),
monthly_growth AS (
SELECT
month,
total,
LAG(total) OVER (ORDER BY month) AS prev_total,
(total - LAG(total) OVER (ORDER BY month)) /
LAG(total) OVER (ORDER BY month) * 100 AS growth_pct
FROM monthly_totals
)
SELECT * FROM monthly_growth
WHERE growth_pct > 10;
Recursive CTE for Hierarchies
-- Employee hierarchy
WITH RECURSIVE employee_tree AS (
-- Base case: top-level managers
SELECT
id,
name,
manager_id,
1 AS level,
name AS path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case: employees with managers
SELECT
e.id,
e.name,
e.manager_id,
et.level + 1,
et.path || ' > ' || e.name
FROM employees e
INNER JOIN employee_tree et ON e.manager_id = et.id
)
SELECT * FROM employee_tree ORDER BY path;
Recursive CTE for Date Ranges
-- Generate date series
WITH RECURSIVE date_series AS (
SELECT DATE '2024-01-01' AS date
UNION ALL
SELECT date + INTERVAL '1 day'
FROM date_series
WHERE date < DATE '2024-12-31'
)
SELECT
ds.date,
COALESCE(SUM(o.amount), 0) AS daily_total
FROM date_series ds
LEFT JOIN orders o ON DATE(o.order_date) = ds.date
GROUP BY ds.date
ORDER BY ds.date;
Character Encoding Recovery
When data has encoding issues (common with Cyrillic or Asian text):
Recover cp1251 from UTF-8 Mojibake
-- MySQL: Fix garbled cp1251 text stored as UTF-8
SELECT
id,
CAST(
CONVERT(
CAST(CONVERT(text_column USING cp1251) AS BINARY)
USING utf8
)
AS CHAR CHARACTER SET cp1251
) COLLATE cp1251_general_ci AS fixed_text
FROM broken_table;
-- Update the data
UPDATE broken_table
SET text_column = CAST(
CONVERT(
CAST(CONVERT(text_column USING cp1251) AS BINARY)
USING utf8
)
AS CHAR CHARACTER SET cp1251
) COLLATE cp1251_general_ci
WHERE text_column LIKE '%Ð%'; -- Pattern for mojibake
Pivoting Data
Static Pivot
-- PostgreSQL/MySQL
SELECT
product_id,
SUM(CASE WHEN month = 1 THEN amount ELSE 0 END) AS jan,
SUM(CASE WHEN month = 2 THEN amount ELSE 0 END) AS feb,
SUM(CASE WHEN month = 3 THEN amount ELSE 0 END) AS mar,
SUM(CASE WHEN month = 4 THEN amount ELSE 0 END) AS apr
FROM sales
GROUP BY product_id;
-- SQL Server with PIVOT
SELECT product_id, [1] AS jan, [2] AS feb, [3] AS mar, [4] AS apr
FROM (
SELECT product_id, month, amount
FROM sales
) AS source
PIVOT (
SUM(amount) FOR month IN ([1], [2], [3], [4])
) AS pivot_table;
Unpivot (Columns to Rows)
-- Turn columns into rows
SELECT product_id, 'jan' AS month, jan AS amount FROM monthly_sales
UNION ALL
SELECT product_id, 'feb' AS month, feb AS amount FROM monthly_sales
UNION ALL
SELECT product_id, 'mar' AS month, mar AS amount FROM monthly_sales;
-- PostgreSQL with VALUES
SELECT
product_id,
month,
amount
FROM monthly_sales
CROSS JOIN LATERAL (
VALUES
('jan', jan),
('feb', feb),
('mar', mar)
) AS unpivot(month, amount);
JSON Operations
PostgreSQL JSON Queries
-- Query JSON data
SELECT
id,
data->>'name' AS name,
data->>'email' AS email,
(data->>'age')::int AS age
FROM users
WHERE data->>'status' = 'active';
-- Query nested JSON
SELECT
data->'address'->>'city' AS city,
data->'address'->>'zip' AS zip
FROM users;
-- Aggregate into JSON
SELECT
department,
jsonb_agg(jsonb_build_object(
'name', name,
'salary', salary
)) AS employees
FROM employees
GROUP BY department;
MySQL JSON
SELECT
id,
JSON_EXTRACT(data, '$.name') AS name,
JSON_EXTRACT(data, '$.address.city') AS city
FROM users
WHERE JSON_EXTRACT(data, '$.active') = true;
Gap and Island Detection
Find consecutive sequences:
WITH numbered AS (
SELECT
sale_date,
amount,
sale_date - (ROW_NUMBER() OVER (ORDER BY sale_date))::int AS grp
FROM daily_sales
WHERE amount > 1000 -- High-sales days
),
islands AS (
SELECT
grp,
MIN(sale_date) AS start_date,
MAX(sale_date) AS end_date,
COUNT(*) AS streak_length
FROM numbered
GROUP BY grp
)
SELECT * FROM islands
WHERE streak_length >= 5 -- At least 5 consecutive days
ORDER BY start_date;
Median Calculation
-- PostgreSQL
SELECT
department,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY salary) AS median_salary
FROM employees
GROUP BY department;
-- MySQL 8+
SELECT
department,
AVG(salary) AS avg_salary,
(
SELECT AVG(sub.salary)
FROM (
SELECT salary,
ROW_NUMBER() OVER (ORDER BY salary) AS rn,
COUNT(*) OVER () AS cnt
FROM employees e2
WHERE e2.department = employees.department
) sub
WHERE sub.rn IN (FLOOR((cnt + 1) / 2), CEIL((cnt + 1) / 2))
) AS median_salary
FROM employees
GROUP BY department;
Deduplication Patterns
Keep Latest Record
-- Using ROW_NUMBER
WITH ranked AS (
SELECT *,
ROW_NUMBER() OVER (
PARTITION BY email
ORDER BY created_at DESC
) AS rn
FROM users
)
DELETE FROM users
WHERE id IN (
SELECT id FROM ranked WHERE rn > 1
);
-- Using self-join
DELETE u1 FROM users u1
INNER JOIN users u2
ON u1.email = u2.email
AND u1.created_at < u2.created_at;
Key Takeaways
- Window functions: Powerful for analytics without subqueries
- CTEs for readability: Break complex queries into steps
- Recursive CTEs: Handle hierarchies and series generation
- Correlated subqueries: Row-by-row calculations when needed
- Encoding awareness: Database charset matters
- Test performance: EXPLAIN ANALYZE before production
Advanced SQL patterns often replace hundreds of lines of application code with a single query—invest in mastering them, and your data handling will transform.