How to model a Data Warehouse for OLAP queries?


#1

Hi guys,
I am having doubts about how I should model a DW on Sales in MemSQL. I did the logical modeling of it, I rode it in MemSQL Studio but I do not know if this is the correct way to do it. I also had trouble putting more than one FOREIGN SHARD KEY into the fact table, so I removed it.
Thanks any suggestions.

CREATE DATABASE DW;
USE DW;

CREATE TABLE DIM_CUSTOMER (
  C_CUSTOMERKEY        INTEGER       NOT NULL AUTO_INCREMENT,
  C_NAME               VARCHAR(100)  CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  C_DOCUMENT           VARCHAR(14)   CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  C_ADDRESS            VARCHAR(100)  CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  C_PHONE              VARCHAR(20)   CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  C_EMAIL              VARCHAR(50)   CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  C_LATITUDE           DOUBLE        NOT NULL,
  C_LONGITUDE          DOUBLE        NOT NULL,
  PRIMARY KEY (C_CUSTOMERKEY)
);

CREATE TABLE DIM_LOCAL (
  L_LOCALKEY            INTEGER       NOT NULL AUTO_INCREMENT,
  L_STATE               VARCHAR(2)    CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  L_CODCITY             VARCHAR(8)    CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  L_CITY                VARCHAR(50)   CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  L_DISTRICT            VARCHAR(50)   CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  PRIMARY KEY (L_LOCALKEY)
);

CREATE TABLE DIM_PRODUCT (
  P_PRODUCTKEY          INTEGER       NOT NULL AUTO_INCREMENT,
  P_IDPRODUCT           INTEGER       NOT NULL,
  P_PRODUCT             VARCHAR(100)  CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  PRIMARY KEY (P_PRODUCTKEY)
);

CREATE TABLE DIM_TIME (
  T_TIMEKEY             INTEGER       NOT NULL,
  T_TIMESTAMP           TIMESTAMP     NOT NULL,
  T_DAYNUMBERWEEK       INTEGER       NOT NULL,
  T_DAYNUMBERMONTH      INTEGER       NOT NULL,
  T_DAYNUMBERYEAR       INTEGER       NOT NULL,
  T_WEEKID              INTEGER       NOT NULL,
  T_WEEKNUMBERYEAR      INTEGER       NOT NULL,
  T_MONTHID             INTEGER       NOT NULL,
  T_MONTHNAME           VARCHAR(15)   CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  T_MONTHNUMBERYEAR     INTEGER       NOT NULL,
  T_YEARID              INTEGER       NOT NULL,
  T_YEARNAME            INTEGER       NOT NULL,
  PRIMARY KEY (T_TIMEKEY)
);

CREATE TABLE FACT_SALES (
  F_TIMEKEY             INTEGER       NOT NULL,
  F_CUSTOMERKEY         INTEGER       NOT NULL,
  F_LOCALKEY            INTEGER       NOT NULL,
  F_PRODUCTKEY          INTEGER       NOT NULL,
  F_QUANTITY            INTEGER       NOT NULL,
  F_UNITARYVALUE        DECIMAL(18,2) NOT NULL,
  F_TOTALVALUE          DECIMAL(18,2) NOT NULL,
  F_DISTANCE            FLOAT,
  PRIMARY KEY (F_TIMEKEY,F_CUSTOMERKEY,F_LOCALKEY,F_PRODUCTKEY),
  KEY (F_TIMEKEY),
  KEY (F_PRODUCTKEY),
  KEY (F_CUSTOMERKEY),
  KEY (F_LOCALKEY)
);

#2

Don’t bother with foreign shard keys. MemSQL can infer foreign shard key relationships reasonably well on its own.

The one quick thing I would recommend is changing your fact tables to be columnstore tables (https://docs.memsql.com/concepts/v6.5/columnstore/). Depending on how much data your storing and your query workload, columnstore tables are usually a better choice for fact tables (they’re stored on disk compressed and extremely fast to table scan (https://www.memsql.com/blog/memsql-processing-shatters-trillion-rows-per-second-barrier/)