I want do fast update in my GP table, and i created index in source table, to improve perfomance but it work very slow, i test same code in Postgres and see that it work very fast and use index when performing subquery, why GP not use index in subquery to improve perfomance??? i also tested with SET enable_indexonlyscan = on;
result was same
-- Drop tables if they exist
DROP TABLE IF EXISTS public.source_table;
DROP TABLE IF EXISTS public.target_table;
-- Create source_table
CREATE TABLE public.source_table (
id SERIAL,
dummy_col1 TEXT
)
DISTRIBUTED BY (id);
-- Create target_table
CREATE TABLE public.target_table (
id SERIAL,
dummy_col1 TEXT
)
DISTRIBUTED BY (id);
-- Populate source_table with random data
INSERT INTO public.source_table (dummy_col1)
SELECT md5(random()::text) AS dummy_col1
FROM generate_series(1, 10000) AS id;
-- Populate target_table with random data
INSERT INTO public.target_table (dummy_col1)
SELECT md5(random()::text) AS dummy_col1
FROM generate_series(1, 10000) AS id;
-- Create an index on the id column of source_table
CREATE INDEX idx_source_table ON public.source_table (id);
-- Example SELECT query joining source_table and target_table on id
SELECT *
FROM public.source_table s
JOIN public.target_table t ON s.id = t.id;
-- Update target_table's dummy_col1 with values from source_table where ids match
UPDATE public.target_table t
SET dummy_col1 = (
SELECT s.dummy_col1
FROM public.source_table s
WHERE t.id = s.id
)
WHERE EXISTS (
SELECT 1
FROM public.source_table s
WHERE t.id = s.id
);
GP QUERY PLAN
POSTGRES QUERY PLAN