Query runs into an Endless Loop


#1

Hi,
I’m using MemSQL running a benchmark, ‘Star Schema Benchmark’ (SSB). I find three queries in SSB(2.2, 2.3, 3.3) can not be executed by MemSQL in single node( one leaf and one master aggregator in single node). It seems that the query runs into an endless loop. Here are some details:

Memory: 100GB +
CPU: 32 cores
Partitions: 16
SSB scale factor: 20
Memsql version: 6.7.

schema:

CREATE TABLE CUSTOMER ( C_CUSTKEY INTEGER,
C_NAME VARCHAR(25) NOT NULL,
C_ADDRESS VARCHAR(40) NOT NULL,
C_CITY VARCHAR(10) NOT NULL,
C_NATION VARCHAR(15) NOT NULL,
C_REGION VARCHAR(12) NOT NULL,
C_PHONE VARCHAR(15) NOT NULL,
C_MKTSEGMENT VARCHAR(10) NOT NULL);

CREATE TABLE DATES ( D_DATEKEY INTEGER,
D_DATE VARCHAR(18) NOT NULL,
D_DAYOFWEEK VARCHAR(18) NOT NULL,
D_MONTH VARCHAR(9) NOT NULL,
D_YEAR INTEGER NOT NULL,
D_YEARMONTHNUM INTEGER,
D_YEARMONTH VARCHAR(7) NOT NULL,
D_DAYNUMINWEEK INTEGER,
D_DAYNUMINMONTH INTEGER,
D_DAYNUMINYEAR INTEGER,
D_MONTHNUMINYEAR INTEGER,
D_WEEKNUMINYEAR INTEGER,
D_SELLINGSEASON VARCHAR(12) NOT NULL,
D_LASTDAYINWEEKFL INTEGER,
D_LASTDAYINMONTHFL INTEGER,
D_HOLIDAYFL INTEGER,
D_WEEKDAYFL INTEGER);

CREATE TABLE PART ( P_PARTKEY INTEGER,
P_NAME VARCHAR(22) NOT NULL,
P_MFGR VARCHAR(6) NOT NULL,
P_CATEGORY VARCHAR(7) NOT NULL,
P_BRAND VARCHAR(9) NOT NULL,
P_COLOR VARCHAR(11) NOT NULL,
P_TYPE VARCHAR(25) NOT NULL,
P_SIZE INTEGER NOT NULL,
P_CONTAINER VARCHAR(10) NOT NULL);

CREATE TABLE SUPPLIER ( S_SUPPKEY INTEGER,
S_NAME VARCHAR(25) NOT NULL,
S_ADDRESS VARCHAR(25) NOT NULL,
S_CITY VARCHAR(10) NOT NULL,
S_NATION VARCHAR(15) NOT NULL,
S_REGION VARCHAR(12) NOT NULL,
S_PHONE VARCHAR(15) NOT NULL);

CREATE TABLE LINEORDER ( LO_ORDERKEY BIGINT,
LO_LINENUMBER BIGINT,
LO_CUSTKEY INTEGER NOT NULL,
LO_PARTKEY INTEGER NOT NULL,
LO_SUPPKEY INTEGER NOT NULL,
LO_ORDERDATE INTEGER NOT NULL,
LO_ORDERPRIOTITY VARCHAR(15) NOT NULL,
LO_SHIPPRIOTITY INTEGER,
LO_QUANTITY BIGINT,
LO_EXTENDEDPRICE BIGINT,
LO_ORDTOTALPRICE BIGINT,
LO_DISCOUNT BIGINT,
LO_REVENUE BIGINT,
LO_SUPPLYCOST BIGINT,
LO_TAX BIGINT,
LO_COMMITDATE INTEGER NOT NULL,
LO_SHIPMODE VARCHAR(10) NOT NULL);

Load data:
load data infile ‘/dbgen/lineorder.tbl’ into table LINEORDER fields terminated by ‘|’ lines terminated by ‘|\n’;

Queries:
q2.2
SELECT SUM(LO_REVENUE), D_YEAR, P_BRAND
FROM LINEORDER, DATES, PART, SUPPLIER
WHERE LO_ORDERDATE = D_DATEKEY
AND LO_PARTKEY = P_PARTKEY
AND LO_SUPPKEY = S_SUPPKEY
AND P_BRAND BETWEEN ‘MFGR#2221’
AND ‘MFGR#2228’
AND S_REGION = ‘ASIA’
GROUP BY D_YEAR, P_BRAND
ORDER BY D_YEAR, P_BRAND;

q2.3
SELECT SUM(LO_REVENUE), D_YEAR, P_BRAND
FROM LINEORDER, DATES, PART, SUPPLIER
WHERE LO_ORDERDATE = D_DATEKEY
AND LO_PARTKEY = P_PARTKEY
AND LO_SUPPKEY = S_SUPPKEY
AND P_BRAND= ‘MFGR#2239’
AND S_REGION = ‘EUROPE’
GROUP BY D_YEAR, P_BRAND
ORDER BY D_YEAR, P_BRAND;

q3.3
SELECT C_CITY, S_CITY, D_YEAR, SUM(LO_REVENUE) AS REVENUE
FROM CUSTOMER, LINEORDER, SUPPLIER, DATES
WHERE LO_CUSTKEY = C_CUSTKEY
AND LO_SUPPKEY = S_SUPPKEY
AND LO_ORDERDATE = D_DATEKEY
AND (C_CITY=‘UNITED KI1’ OR C_CITY=‘UNITED KI5’)
AND (S_CITY=‘UNITED KI1’ OR S_CITY=‘UNITED KI5’)
AND D_YEAR >= 1992
AND D_YEAR >= 1997
GROUP BY C_CITY, S_CITY, D_YEAR
ORDER BY D_YEAR ASC, REVENUE DESC;

Really need help here!
Thanks.


#2

This has been resolved. Because we forgot to analyze the table, so that MemSQL generated an awful execution plan.


#3

Looks like you resolved your initial issue, but if you’re running the Star Schema Benchmark, I would recommend using columnstore. This will be significantly faster. Here’s an example schema.

create table lineorder
(LO_ORDERKEY int,
LO_LINENUMBER int,
LO_CUSTKEY int,
LO_PARTKEY int,
LO_SUPPKEY int,
LO_ORDERDATE int,
LO_ORDERPRIORITY char(15), 
LO_SHIPPRIORITY char(1),
LO_QUANTITY int,
LO_EXTENDEDPRICE int,
LO_ORDTOTALPRICE int,
LO_DISCOUNT int,
LO_REVENUE int,
LO_SUPPLYCOST int,
LO_TAX int,
LO_COMMITDATE int,
LO_SHIPMODE char(10),
KEY lodatesort (LO_ORDERDATE,LO_PARTKEY,LO_SUPPKEY) USING CLUSTERED COLUMNSTORE,
SHARD KEY (LO_PARTKEY)
);


create table part(
P_PARTKEY int,
P_NAME varchar(22), 
P_MFGR char(6),
P_CATEGORY char(7),
P_BRAND1 char(9),
P_COLOR varchar(11),
P_TYPE varchar(25),
P_SIZE int,
P_CONTAINER char(10),
KEY (P_PARTKEY) using clustered columnstore,
SHARD KEY (P_PARTKEY)
);

create REFERENCE table supplier(
S_SUPPKEY int, 
S_NAME char(25),
S_ADDRESS varchar(25),
S_CITY char(10),
S_NATION char(15),
S_REGION char(12),
S_PHONE char(15),
KEY (S_SUPPKEY) using clustered columnstore
);

create reference table customer(
C_CUSTKEY int,
C_NAME varchar(25),
C_ADDRESS varchar(25),
C_CITY char(10),
C_NATION char(15),
C_REGION char(12),
C_PHONE char(15),
C_MKTSEGMENT char(10),
KEY custsort (C_CUSTKEY,C_REGION,C_NATION,C_CITY) USING CLUSTERED COLUMNSTORE
);

create REFERENCE table date_dim(
D_DATEKEY int,
D_DATE char(18),
D_DAYOFWEEK char(10),
D_MONTH char(9),
D_YEAR int,
D_YEARMONTHNUM int,
D_YEARMONTH char(7),
D_DAYNUMINWEEK int,
D_DAYNUMINMONTH int,
D_DAYNUMINYEAR int,
D_MONTHNUMINYEAR int,
D_WEEKNUMINYEAR int,
D_SELLINGSEASON char(12),
D_LASTDAYINWEEKFL char(1),
D_LASTDAYINMONTHFL char(1),
D_HOLIDAYFL char(1),
D_WEEKDAYFL char(1),
PRIMARY KEY (D_DATEKEY)
);