ClickHouse VS Hive

CK官网
HIVE官网

建表语句

  • CK MergeTree
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]  
(  
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],  
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],  
...  
INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,  
INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2  
) ENGINE = MergeTree()  

ORDER BY expr 
[PARTITION BY expr]  
[PRIMARY KEY expr]  
[SAMPLE BY expr]  
[SETTINGS name=value, ...]

demo:

CREATE TABLE tutorial.hits_v1
(
    `WatchID` UInt64,
    `JavaEnable` UInt8,
    `Title` String,
    `GoodEvent` Int16,
    `EventTime` DateTime,
    `EventDate` Date,
    `CounterID` UInt32,
    `ClientIP` UInt32 comment '客户IP',
    `ClientIP6` FixedString(16),
    `RegionID` UInt32,
    `UserID` UInt64,
    `CounterClass` Int8,
    `OS` UInt8,
    `UserAgent` UInt8,
    `URL` String,
    `Referer` String,
    `URLDomain` String,
    `RefererDomain` String,
    `Refresh` UInt8,
    `IsRobot` UInt8,
    `RefererCategories` Array(UInt16),
    `URLCategories` Array(UInt16),
    `URLRegions` Array(UInt32),
    `RefererRegions` Array(UInt32),
    `ResolutionWidth` UInt16,
    `ResolutionHeight` UInt16,
    `ResolutionDepth` UInt8,
    `FlashMajor` UInt8,
    `FlashMinor` UInt8,
    `FlashMinor2` String,
    `NetMajor` UInt8,
    `NetMinor` UInt8,
    `UserAgentMajor` UInt16,
    `UserAgentMinor` FixedString(2),
    `CookieEnable` UInt8,
    `JavascriptEnable` UInt8,
    `IsMobile` UInt8,
    `MobilePhone` UInt8,
    `MobilePhoneModel` String,
    `Params` String,
    `IPNetworkID` UInt32,
    `TraficSourceID` Int8,
    `SearchEngineID` UInt16,
    `SearchPhrase` String,
    `AdvEngineID` UInt8,
    `IsArtifical` UInt8,
    `WindowClientWidth` UInt16,
    `WindowClientHeight` UInt16,
    `ClientTimeZone` Int16,
    `ClientEventTime` DateTime,
    `SilverlightVersion1` UInt8,
    `SilverlightVersion2` UInt8,
    `SilverlightVersion3` UInt32,
    `SilverlightVersion4` UInt16,
    `PageCharset` String,
    `CodeVersion` UInt32,
    `IsLink` UInt8,
    `IsDownload` UInt8,
    `IsNotBounce` UInt8,
    `FUniqID` UInt64,
    `HID` UInt32,
    `IsOldCounter` UInt8,
    `IsEvent` UInt8,
    `IsParameter` UInt8,
    `DontCountHits` UInt8,
    `WithHash` UInt8,
    `HitColor` FixedString(1),
    `UTCEventTime` DateTime,
    `Age` UInt8,
    `Sex` UInt8,
    `Income` UInt8,
    `Interests` UInt16,
    `Robotness` UInt8,
    `GeneralInterests` Array(UInt16),
    `RemoteIP` UInt32,
    `RemoteIP6` FixedString(16),
    `WindowName` Int32,
    `OpenerName` Int32,
    `HistoryLength` Int16,
    `BrowserLanguage` FixedString(2),
    `BrowserCountry` FixedString(2),
    `SocialNetwork` String,
    `SocialAction` String,
    `HTTPError` UInt16,
    `SendTiming` Int32,
    `DNSTiming` Int32,
    `ConnectTiming` Int32,
    `ResponseStartTiming` Int32,
    `ResponseEndTiming` Int32,
    `FetchTiming` Int32,
    `RedirectTiming` Int32,
    `DOMInteractiveTiming` Int32,
    `DOMContentLoadedTiming` Int32,
    `DOMCompleteTiming` Int32,
    `LoadEventStartTiming` Int32,
    `LoadEventEndTiming` Int32,
    `NSToDOMContentLoadedTiming` Int32,
    `FirstPaintTiming` Int32,
    `RedirectCount` Int8,
    `SocialSourceNetworkID` UInt8,
    `SocialSourcePage` String,
    `ParamPrice` Int64,
    `ParamOrderID` String,
    `ParamCurrency` FixedString(3),
    `ParamCurrencyID` UInt16,
    `GoalsReached` Array(UInt32),
    `OpenstatServiceName` String,
    `OpenstatCampaignID` String,
    `OpenstatAdID` String,
    `OpenstatSourceID` String,
    `UTMSource` String,
    `UTMMedium` String,
    `UTMCampaign` String,
    `UTMContent` String,
    `UTMTerm` String,
    `FromTag` String,
    `HasGCLID` UInt8,
    `RefererHash` UInt64,
    `URLHash` UInt64,
    `CLID` UInt32,
    `YCLID` UInt64,
    `ShareService` String,
    `ShareURL` String,
    `ShareTitle` String,
    `ParsedParams` Nested(
        Key1 String,
        Key2 String,
        Key3 String,
        Key4 String,
        Key5 String,
        ValueDouble Float64),
    `IslandID` FixedString(16),
    `RequestNum` UInt32,
    `RequestTry` UInt8
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID)
SETTINGS index_granularity = 8192


  • Hive
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name    -- (Note: TEMPORARY available in Hive 0.14.0 and later)
  [(col_name data_type [COMMENT col_comment], ... [constraint_specification])]
  [COMMENT table_comment]
  [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
  [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
  [SKEWED BY (col_name, col_name, ...)                  -- (Note: Available in Hive 0.10.0 and later)]
     ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)
     [STORED AS DIRECTORIES]
  [
   [ROW FORMAT row_format] 
   [STORED AS file_format]
     | STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)]  -- (Note: Available in Hive 0.6.0 and later)
  ]
  [LOCATION hdfs_path]
  [TBLPROPERTIES (property_name=property_value, ...)]   -- (Note: Available in Hive 0.6.0 and later)
 
 
 
[AS select_statement];   -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
 
 
 
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
  LIKE existing_table_or_view_name
  [LOCATION hdfs_path];
 
data_type
  : primitive_type
  | array_type
  | map_type
  | struct_type
  | union_type  -- (Note: Available in Hive 0.7.0 and later)
 
primitive_type
  : TINYINT
  | SMALLINT
  | INT
  | BIGINT
  | BOOLEAN
  | FLOAT
  | DOUBLE
  | DOUBLE PRECISION -- (Note: Available in Hive 2.2.0 and later)
  | STRING
  | BINARY      -- (Note: Available in Hive 0.8.0 and later)
  | TIMESTAMP   -- (Note: Available in Hive 0.8.0 and later)
  | DECIMAL     -- (Note: Available in Hive 0.11.0 and later)
  | DECIMAL(precision, scale)  -- (Note: Available in Hive 0.13.0 and later)
  | DATE        -- (Note: Available in Hive 0.12.0 and later)
  | VARCHAR     -- (Note: Available in Hive 0.12.0 and later)
  | CHAR        -- (Note: Available in Hive 0.13.0 and later)
 
array_type
  : ARRAY < data_type >
 
map_type
  : MAP < primitive_type, data_type >
 
struct_type
  : STRUCT < col_name : data_type [COMMENT col_comment], ...>
 
union_type
   : UNIONTYPE < data_type, data_type, ... >  -- (Note: Available in Hive 0.7.0 and later)
 
row_format
  : DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
        [MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
        [NULL DEFINED AS char]   -- (Note: Available in Hive 0.13 and later)
  | SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
 
file_format:
  : SEQUENCEFILE
  | TEXTFILE    -- (Default, depending on hive.default.fileformat configuration)
  | RCFILE      -- (Note: Available in Hive 0.6.0 and later)
  | ORC         -- (Note: Available in Hive 0.11.0 and later)
  | PARQUET     -- (Note: Available in Hive 0.13.0 and later)
  | AVRO        -- (Note: Available in Hive 0.14.0 and later)
  | INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
 
constraint_specification:
  : [, PRIMARY KEY (col_name, ...) DISABLE NOVALIDATE ]
    [, CONSTRAINT constraint_name FOREIGN KEY (col_name, ...) REFERENCES table_name(col_name, ...) DISABLE NOVALIDATE


demo

CREATE  TABLE IF NOT EXISTS default.records
(
    exer_recore_id  BIGINT,
    user_id int,
    channel tinyint,
    item_id int,
    question_id int,
    `type` tinyint,
    question_num int,
    orgin tinyint,
    orgin_id int,
    cate_id int,
    is_hf tinyint,
    user_answer string,
    correct_answer string,
    is_correct tinyint comment "正确与否",
    dutration int,
    record_id int,
    subject_1 int,
    subject_2 int,
    subject_3 int,
    status tinyint,
    is_signed tinyint,
    create_time int,
    practice_level_1 string,
    practice_level_2 string,
    practice_level_3 string,
    update_time int
)
comment "做题记录"
CLUSTERED BY (exer_recore_id) into 4 buckets
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
      "separatorChar" = ",",
      "quoteChar"     = "\\"",
      "escapeChar"    = "\\\\"        
    )
  • MYSQL TO CLICKHOUSE
CREATE TABLE tiku.test
ENGINE = MergeTree
ORDER BY (question_id,subject_1)
PARTITION BY toYYYYMM(toDate(create_time))
PRIMARY KEY question_id
SETTINGS index_granularity=8192
AS
SELECT *
FROM mysql('p**.aliyuncs.com:3306', 'tiku_practice_1', 't_exer_record_0', '**', '***') 

select database,table,partition,partition_id,name,path from system.parts where table='test'

res:


┌─database─┬─table─┬─partition─┬─partition_id─┬─name───────────┬─path─────────────────────────────────────────────────┐
│ tiku     │ test  │ 201809    │ 201809       │ 201809_1_1_0   │ /clickhouse/data/data/data/tiku/test/201809_1_1_0/   │
│ tiku     │ test  │ 201809    │ 201809       │ 201809_13_13_0 │ /clickhouse/data/data/data/tiku/test/201809_13_13_0/ │
│ tiku     │ test  │ 201810    │ 201810       │ 201810_2_2_0   │ /clickhouse/data/data/data/tiku/test/201810_2_2_0/   │
│ tiku     │ test  │ 201810    │ 201810       │ 201810_14_14_0 │ /clickhouse/data/data/data/tiku/test/201810_14_14_0/ │
│ tiku     │ test  │ 201811    │ 201811       │ 201811_3_3_0   │ /clickhouse/data/data/data/tiku/test/201811_3_3_0/   │
│ tiku     │ test  │ 201811    │ 201811       │ 201811_17_17_0 │ /clickhouse/data/data/data/tiku/test/201811_17_17_0/ │
│ tiku     │ test  │ 201812    │ 201812       │ 201812_4_4_0   │ /clickhouse/data/data/data/tiku/test/201812_4_4_0/   │
│ tiku     │ test  │ 201812    │ 201812       │ 201812_16_16_0 │ /clickhouse/data/data/data/tiku/test/201812_16_16_0/ │
│ tiku     │ test  │ 201901    │ 201901       │ 201901_5_5_0   │ /clickhouse/data/data/data/tiku/test/201901_5_5_0/   │
│ tiku     │ test  │ 201901    │ 201901       │ 201901_15_15_0 │ /clickhouse/data/data/data/tiku/test/201901_15_15_0/ │

数据类型

架构

tips

  • CK mergetree 引擎
    必须要设置 order by 字段
    primary key 必须是order by 的字段

修改表名

HIVE:
alter table record rename to t_record;
Clickhosue
REANEM table record to t_record

查看建表语句

describe t_record

show create table t_record

你可能感兴趣的:(hive,clickhouse)