Using postgres i have a query that is taking 7 seconds to run,
first, i will give some context ,
i have a table called courses with ~600 rows
create table code:
CREATE TABLE public.courses
(
id integer NOT NULL DEFAULT nextval('courses_id_seq'::regclass),
name character varying(255) NOT NULL,
text text NOT NULL,
formation_type integer NOT NULL,
status smallint NOT NULL,
slug character varying(255) NOT NULL,
course_type integer NOT NULL DEFAULT 1,
CONSTRAINT courses_pkey PRIMARY KEY (id)
USING INDEX TABLESPACE data_x01
)
WITH (
OIDS=FALSE
)
a table called
course_prices with 1.5Million rows
create table code:
CREATE TABLE public.course_prices
(
id integer NOT NULL DEFAULT nextval('course_prices_id_seq'::regclass),
course integer NOT NULL,
institution integer NOT NULL,
modality integer NOT NULL,
shift integer NOT NULL,
price numeric(10,2) NOT NULL,
status smallint NOT NULL,
duration integer,
CONSTRAINT course_prices_pkey PRIMARY KEY (id)
USING INDEX TABLESPACE data_x01
)
WITH (
OIDS=FALSE
)
TABLESPACE data_app01;
CREATE INDEX course_prices_course_idx3
ON public.course_prices
USING btree
(course);
CREATE INDEX course_prices_course_institution_idx
ON public.course_prices
USING btree
(course, institution);
CREATE INDEX idx_course_prices_composite
ON public.course_prices
USING btree
(course, modality);
and a table called course_price_promo
with 1.5Million rows
create table:
-- Table: public.course_price_promo
-- DROP TABLE public.course_price_promo;
CREATE TABLE public.course_price_promo
(
id integer NOT NULL DEFAULT nextval('course_price_promo_id_seq'::regclass),
course_price integer NOT NULL,
price numeric(10,2),
begin_date date NOT NULL,
end_date date NOT NULL,
status integer NOT NULL DEFAULT 1,
CONSTRAINT course_price_promo_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
ALTER TABLE public.course_price_promo
OWNER TO u_app01;
CREATE INDEX course_price_promo_course_price_begin_date_end_date_price_idx
ON public.course_price_promo
USING btree
(course_price, begin_date, end_date, price);
urse_price_promo_course_price_idx;
CREATE INDEX course_price_promo_course_price_idx
ON public.course_price_promo
USING btree
(course_price);
for now i guess this informations is enough, and the query that im running is:
SELECT
"courses"."id",
"courses"."name",
"courses"."slug",
"courses"."course_type",
"modalities"."name" AS "modality",
"formation_types"."name" AS "formation_type",
"course_modality_content"."duration" AS "duration",
LEAST(MIN(course_prices.price), MIN(course_price_promo.price)) AS min_price
FROM
"courses"
INNER JOIN
"course_prices" ON "course_prices"."course" = "courses"."id"
INNER JOIN
"modalities" ON "course_prices"."modality" = "modalities"."id"
INNER JOIN
"formation_types" ON "courses"."formation_type" = "formation_types"."id"
INNER JOIN
"course_modality_content" ON "course_modality_content"."course" = "courses"."id"
AND "course_modality_content"."modality" = "modalities"."id"
LEFT JOIN
"course_price_promo" ON "course_price_promo"."course_price" = "course_prices"."id"
AND "course_price_promo"."status" = '1'
AND "course_price_promo"."begin_date" <= '2024-06-07'
AND "course_price_promo"."end_date" >= '2024-06-07'
WHERE
"courses"."status" = '1'
AND "course_prices"."status" = '1'
GROUP BY
"courses"."id",
"modalities"."name",
"formation_types"."name",
"course_modality_content"."id"
ORDER BY
"courses"."name" ASC
LIMIT
12
the query plan:
"Sort (cost=718919.76..722498.36 rows=1431440 width=114) (actual time=7363.629..7363.647 rows=502 loops=1)"
" Sort Key: courses.name"
" Sort Method: quicksort Memory: 121kB"
" -> GroupAggregate (cost=445126.82..484491.42 rows=1431440 width=114) (actual time=5651.304..7361.781 rows=502 loops=1)"
" Group Key: courses.id, modalities.name, formation_types.name, course_modality_content.id"
" -> Sort (cost=445126.82..448705.42 rows=1431440 width=114) (actual time=5650.097..6837.582 rows=1668599 loops=1)"
" Sort Key: courses.id, modalities.name, formation_types.name, course_modality_content.id"
" Sort Method: external merge Disk: 208552kB"
" -> Hash Join (cost=97433.10..210698.48 rows=1431440 width=114) (actual time=629.076..2497.610 rows=1668599 loops=1)"
" Hash Cond: ((course_prices.course = courses.id) AND (course_prices.modality = modalities.id))"
" -> Hash Left Join (cost=96962.92..189000.87 rows=1668599 width=20) (actual time=627.370..2002.865 rows=1668599 loops=1)"
" Hash Cond: (course_prices.id = course_price_promo.course_price)"
" -> Seq Scan on course_prices (cost=0.00..47749.99 rows=1668599 width=18) (actual time=0.004..289.424 rows=1668599 loops=1)"
" -> Hash (cost=75375.36..75375.36 rows=1241885 width=10) (actual time=626.563..626.563 rows=1242761 loops=1)"
" Buckets: 131072 Batches: 16 Memory Usage: 4360kB"
" -> Seq Scan on course_price_promo (cost=0.00..75375.36 rows=1241885 width=10) (actual time=54.529..438.553 rows=1242761 loops=1)"
" Filter: ((begin_date <= '2024-06-07'::date) AND (end_date >= '2024-06-07'::date))"
" Rows Removed by Filter: 425838"
" -> Hash (cost=459.93..459.93 rows=683 width=114) (actual time=1.693..1.693 rows=681 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 108kB"
" -> Hash Join (cost=190.91..459.93 rows=683 width=114) (actual time=0.591..1.451 rows=681 loops=1)"
" Hash Cond: (course_modality_content.modality = modalities.id)"
" -> Hash Join (cost=189.86..449.49 rows=683 width=103) (actual time=0.578..1.287 rows=681 loops=1)"
" Hash Cond: (courses.id = course_modality_content.course)"
" -> Hash Join (cost=1.18..250.71 rows=654 width=81) (actual time=0.019..0.579 rows=654 loops=1)"
" Hash Cond: (courses.formation_type = formation_types.id)"
" -> Seq Scan on courses (cost=0.00..240.54 rows=654 width=74) (actual time=0.001..0.244 rows=654 loops=1)"
" -> Hash (cost=1.08..1.08 rows=8 width=15) (actual time=0.009..0.009 rows=8 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 9kB"
" -> Seq Scan on formation_types (cost=0.00..1.08 rows=8 width=15) (actual time=0.004..0.004 rows=8 loops=1)"
" -> Hash (cost=179.97..179.97 rows=697 width=22) (actual time=0.550..0.550 rows=685 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 46kB"
" -> Seq Scan on course_modality_content (cost=0.00..179.97 rows=697 width=22) (actual time=0.002..0.440 rows=685 loops=1)"
" -> Hash (cost=1.02..1.02 rows=2 width=11) (actual time=0.008..0.008 rows=2 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 9kB"
" -> Seq Scan on modalities (cost=0.00..1.02 rows=2 width=11) (actual time=0.003..0.003 rows=2 loops=1)"
"Planning time: 1.652 ms"
"Execution time: 7405.942 ms"
what i realized is that, when i increase work_mem to 500MB, the query plan, instead of using GroupAggregate, uses the HashAggregate, and the execution time decreases to 3.5s, but i dont really know the consequences of it.
some things I wanted to know are,
1- my joins are all generating a Seq Scan. Are there any index changes I could make to generate an Index Scan, or in this situation would it not make sense?
2- “Are there any changes to the query that could be made to improve performance?”
lcs_ is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.
2