clickhouse install on centos

下载clickhouse repo

curl -s https://packagecloud.io/install/repositories/altinity/clickhouse/script.rpm.sh | sudo bash

查看clickhouse rpm包

[root@ipa clickhouse-server]# sudo yum list 'clickhouse*'

Loaded plugins: fastestmirror
Loading mirror speeds from cached hostfile
Installed Packages
clickhouse-client.x86_64                                                            19.9.2.4-1.el7                                                       @Altinity_clickhouse
clickhouse-common-static.x86_64                                                     19.9.2.4-1.el7                                                       @Altinity_clickhouse
clickhouse-server.x86_64                                                            19.9.2.4-1.el7                                                       @Altinity_clickhouse
clickhouse-server-common.x86_64                                                     19.9.2.4-1.el7                                                       @Altinity_clickhouse
Available Packages
clickhouse-compressor.x86_64                                                        1.1.54336-3.el7                                                      Altinity_clickhouse
clickhouse-debuginfo.x86_64                                                         19.9.2.4-1.el7                                                       Altinity_clickhouse
clickhouse-mysql.noarch                                                             0.0.20180319-1                                                       Altinity_clickhouse
clickhouse-odbc.x86_64                                                              1.0.0.20190611-1                                                     Altinity_clickhouse
clickhouse-test.x86_64

安装clickhouse

sudo yum install -y clickhouse-server clickhouse-client

验证安装

[root@ipa clickhouse-server]#  sudo yum list installed 'clickhouse*'

Loaded plugins: fastestmirror
Loading mirror speeds from cached hostfile
Installed Packages
clickhouse-client.x86_64                                                             19.9.2.4-1.el7                                                      @Altinity_clickhouse
clickhouse-common-static.x86_64                                                      19.9.2.4-1.el7                                                      @Altinity_clickhouse
clickhouse-server.x86_64                                                             19.9.2.4-1.el7                                                      @Altinity_clickhouse
clickhouse-server-common.x86_64                                                      19.9.2.4-1.el7                                                      @Altinity_clickhouse
[root@ipa clickhouse-server]#

启动

sudo /etc/init.d/clickhouse-server restart

使用客户端连接

[root@ipa clickhouse-server]# clickhouse-client

ClickHouse client version 19.9.2.4.
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 19.9.2 revision 54421.

ipa.haohaozhu.hadoop :)

下载测试数据
vi download.sh

for s in `seq 1987 2018`
do
for m in `seq 1 12`
do
wget https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_${s}_${m}.zip
done
done

chmod +x download.sh
./download.sh
建表

CREATE TABLE ontime ( \
  Year UInt16, \
  Quarter UInt8, \
  Month UInt8, \
  DayofMonth UInt8, \
  DayOfWeek UInt8, \
  FlightDate Date, \
  UniqueCarrier FixedString(7), \
  AirlineID Int32, \
  Carrier FixedString(2), \
  TailNum String, \
  FlightNum String, \
  OriginAirportID Int32, \
  OriginAirportSeqID Int32, \
  OriginCityMarketID Int32, \
  Origin FixedString(5), \
  OriginCityName String, \
  OriginState FixedString(2), \
  OriginStateFips String, \
  OriginStateName String, \
  OriginWac Int32, \
  DestAirportID Int32, \
  DestAirportSeqID Int32, \
  DestCityMarketID Int32, \
  Dest FixedString(5), \
  DestCityName String, \
  DestState FixedString(2), \
  DestStateFips String, \
  DestStateName String, \
  DestWac Int32, \
  CRSDepTime Int32, \
  DepTime Int32, \
  DepDelay Int32, \
  DepDelayMinutes Int32, \
  DepDel15 Int32, \
  DepartureDelayGroups String, \
  DepTimeBlk String, \
  TaxiOut Int32, \
  WheelsOff Int32, \
  WheelsOn Int32, \
  TaxiIn Int32, \
  CRSArrTime Int32, \
  ArrTime Int32, \
  ArrDelay Int32, \
  ArrDelayMinutes Int32, \
  ArrDel15 Int32, \
  ArrivalDelayGroups Int32, \
  ArrTimeBlk String, \
  Cancelled UInt8, \
  CancellationCode FixedString(1), \
  Diverted UInt8, \
  CRSElapsedTime Int32, \
  ActualElapsedTime Int32, \
  AirTime Int32, \
  Flights Int32, \
  Distance Int32, \
  DistanceGroup UInt8, \
  CarrierDelay Int32, \
  WeatherDelay Int32, \
  NASDelay Int32, \
  SecurityDelay Int32, \
  LateAircraftDelay Int32, \
  FirstDepTime String, \
  TotalAddGTime String, \
  LongestAddGTime String, \
  DivAirportLandings String, \
  DivReachedDest String, \
  DivActualElapsedTime String, \
  DivArrDelay String, \
  DivDistance String, \
  Div1Airport String, \
  Div1AirportID Int32, \
  Div1AirportSeqID Int32, \
  Div1WheelsOn String, \
  Div1TotalGTime String, \
  Div1LongestGTime String, \
  Div1WheelsOff String, \
  Div1TailNum String, \
  Div2Airport String, \
  Div2AirportID Int32, \
  Div2AirportSeqID Int32, \
  Div2WheelsOn String, \
  Div2TotalGTime String, \
  Div2LongestGTime String, \
  Div2WheelsOff String, \
  Div2TailNum String, \
  Div3Airport String, \
  Div3AirportID Int32, \
  Div3AirportSeqID Int32, \
  Div3WheelsOn String, \
  Div3TotalGTime String, \
  Div3LongestGTime String, \
  Div3WheelsOff String, \
  Div3TailNum String, \
  Div4Airport String, \
  Div4AirportID Int32, \
  Div4AirportSeqID Int32, \
  Div4WheelsOn String, \
  Div4TotalGTime String, \
  Div4LongestGTime String, \
  Div4WheelsOff String, \
  Div4TailNum String, \
  Div5Airport String, \
  Div5AirportID Int32, \
  Div5AirportSeqID Int32, \
  Div5WheelsOn String, \
  Div5TotalGTime String, \
  Div5LongestGTime String, \
  Div5WheelsOff String, \
  Div5TailNum String \
) ENGINE = MergeTree(FlightDate, (Year, FlightDate), 8192) 

导入之前下载的数据,这里下载了1987-2000区间数据,压缩前文件大小为:28GB左右,clickhouse server服务器配置:2core 8GB

for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=localhost --query="INSERT INTO ontime FORMAT CSVWithNames"; done

sql测试:
测试1

ipa.haohaozhu.hadoop :) select count(*) from ontime;

SELECT count(*)
FROM ontime

┌──count()─┐
│ 62077338 │
└──────────┘

1 rows in set. Elapsed: 0.887 sec. Processed 62.08 million rows, 62.08 MB (69.96 million rows/s., 69.96 MB/s.)

测试2

ipa.haohaozhu.hadoop :) SELECT DayOfWeek, count(*) AS c  FROM ontime  GROUP BY DayOfWeek  ORDER BY c DESC;

SELECT
    DayOfWeek,
    count(*) AS c
FROM ontime
GROUP BY DayOfWeek
ORDER BY c DESC

┌─DayOfWeek─┬───────c─┐
│         2 │ 9107705 │
│         1 │ 9090812 │
│         3 │ 9088271 │
│         4 │ 9049647 │
│         5 │ 9036578 │
│         7 │ 8585662 │
│         6 │ 8118663 │
└───────────┴─────────┘

7 rows in set. Elapsed: 0.231 sec. Processed 62.08 million rows, 62.08 MB (268.31 million rows/s., 268.31 MB/s.)

测试三

ipa.haohaozhu.hadoop :) SELECT DayOfWeek, count(*) AS c\
:-] FROM ontime\
:-] WHERE DepDelay>10 \
:-] GROUP BY DayOfWeek\
:-] ORDER BY c DESC;

SELECT
    DayOfWeek,
    count(*) AS c
FROM ontime
WHERE DepDelay > 10
GROUP BY DayOfWeek
ORDER BY c DESC

┌─DayOfWeek─┬───────c─┐
│         5 │ 2008505 │
│         4 │ 1885211 │
│         3 │ 1645889 │
│         7 │ 1611706 │
│         1 │ 1534372 │
│         2 │ 1496814 │
│         6 │ 1372759 │
└───────────┴─────────┘

7 rows in set. Elapsed: 3.044 sec. Processed 62.08 million rows, 259.86 MB (20.39 million rows/s., 85.37 MB/s.)

ipa.haohaozhu.hadoop :)

测试四

ipa.haohaozhu.hadoop :) SELECT Origin, count(*) AS c\
:-] FROM ontime\
:-] WHERE DepDelay>10 \
:-] GROUP BY Origin\
:-] ORDER BY c DESC\
:-] LIMIT 10;

SELECT
    Origin,
    count(*) AS c
FROM ontime
WHERE DepDelay > 10
GROUP BY Origin
ORDER BY c DESC
LIMIT 10

┌─Origin─┬──────c─┐
│ ORD    │ 751029 │
│ ATL    │ 604497 │
│ DFW    │ 578965 │
│ LAX    │ 435993 │
│ PHX    │ 405636 │
│ STL    │ 363985 │
│ DEN    │ 349509 │
│ SFO    │ 338541 │
│ PIT    │ 331315 │
│ CLT    │ 327095 │
└────────┴────────┘

10 rows in set. Elapsed: 3.210 sec. Processed 62.08 million rows, 306.09 MB (19.34 million rows/s., 95.35 MB/s.)

测试五

ipa.haohaozhu.hadoop :) SELECT Carrier, count(*)\
:-] FROM ontime\
:-] WHERE DepDelay>10 \
:-] GROUP BY Carrier\
:-] ORDER BY count(*) DESC;

SELECT
    Carrier,
    count(*)
FROM ontime
WHERE DepDelay > 10
GROUP BY Carrier
ORDER BY count(*) DESC

┌─Carrier─┬─count()─┐
│ DL      │ 1801581 │
│ UA      │ 1685842 │
│ US      │ 1683872 │
│ WN      │ 1487246 │
│ AA      │ 1307232 │
│ NW      │  898815 │
│ CO      │  854443 │
│ TW      │  580275 │
│ HP      │  463263 │
│ AS      │  267853 │
│ PI      │  209439 │
│ EA      │  155349 │
│ AL      │   99515 │
│ PA      │   36805 │
│ PS      │   17672 │
│ ML      │    5033 │
│ AQ      │    1021 │
└─────────┴─────────┘

17 rows in set. Elapsed: 1.287 sec. Processed 62.08 million rows, 271.43 MB (48.25 million rows/s., 210.96 MB/s.)

测试六

ipa.haohaozhu.hadoop :) SELECT Carrier, c, c2, c*100/c2 as c3\
:-] FROM\
:-] (\
:-]     SELECT\
:-]         Carrier,\
:-]         count(*) AS c\
:-]     FROM ontime\
:-]     WHERE DepDelay>10\
:-]     GROUP BY Carrier\
:-] )\
:-] ANY INNER JOIN\
:-] (\
:-]     SELECT\
:-]         Carrier,\
:-]         count(*) AS c2\
:-]     FROM ontime\
:-]     GROUP BY Carrier\
:-] ) USING Carrier\
:-] ORDER BY c3 DESC;

SELECT
    Carrier,
    c,
    c2,
    (c * 100) / c2 AS c3
FROM
(
    SELECT
        Carrier,
        count(*) AS c
    FROM ontime
    WHERE DepDelay > 10
    GROUP BY Carrier
)
ANY INNER JOIN
(
    SELECT
        Carrier,
        count(*) AS c2
    FROM ontime
    GROUP BY Carrier
) USING (Carrier)
ORDER BY c3 DESC

┌─Carrier─┬───────c─┬───────c2─┬─────────────────c3─┐
│ PI      │  209439 │   873957 │ 23.964451340283333 │
│ WN      │ 1487246 │  6769119 │ 21.971042317323718 │
│ AL      │   99515 │   455873 │ 21.829544631947932 │
│ UA      │ 1685842 │  7946749 │  21.21423490285147 │
│ PS      │   17672 │    83617 │  21.13445830393341 │
│ US      │ 1683872 │  8492064 │  19.82877189809215 │
│ HP      │  463263 │  2339125 │ 19.804969807085982 │
│ AS      │  267853 │  1421935 │ 18.837218297601506 │
│ TW      │  580275 │  3125082 │  18.56831276747298 │
│ DL      │ 1801581 │ 10211113 │ 17.643336235726704 │
│ CO      │  854443 │  4970900 │  17.18889939447585 │
│ EA      │  155349 │   919785 │ 16.889707920872812 │
│ ML      │    5033 │    31123 │  16.17132024547762 │
│ AA      │ 1307232 │  8335280 │ 15.683120423069171 │
│ NW      │  898815 │  5812478 │ 15.463542399644352 │
│ PA      │   36805 │   278102 │ 13.234352863337913 │
│ AQ      │    1021 │    11036 │  9.251540413193187 │
└─────────┴─────────┴──────────┴────────────────────┘

17 rows in set. Elapsed: 0.973 sec. Processed 124.15 million rows, 395.59 MB (127.64 million rows/s., 406.69 MB/s.)

测试七

ipa.haohaozhu.hadoop :) SELECT Carrier, avg(DepDelay>10)*100 AS c3\
:-] FROM ontime\
:-] GROUP BY Carrier\
:-] ORDER BY Carrier;

SELECT
    Carrier,
    avg(DepDelay > 10) * 100 AS c3
FROM ontime
GROUP BY Carrier
ORDER BY Carrier ASC

┌─Carrier─┬─────────────────c3─┐
│ AA      │ 15.683120423069171 │
│ AL      │ 21.829544631947932 │
│ AQ      │  9.251540413193187 │
│ AS      │ 18.837218297601506 │
│ CO      │ 17.188899394475847 │
│ DL      │ 17.643336235726704 │
│ EA      │ 16.889707920872812 │
│ HP      │ 19.804969807085985 │
│ ML      │  16.17132024547762 │
│ NW      │  15.46354239964435 │
│ PA      │  13.23435286333791 │
│ PI      │ 23.964451340283333 │
│ PS      │  21.13445830393341 │
│ TW      │  18.56831276747298 │
│ UA      │ 21.214234902851466 │
│ US      │  19.82877189809215 │
│ WN      │ 21.971042317323715 │
└─────────┴────────────────────┘

17 rows in set. Elapsed: 0.697 sec. Processed 62.08 million rows, 372.47 MB (89.11 million rows/s., 534.67 MB/s.)

测试八

ipa.haohaozhu.hadoop :) SELECT Carrier, c, c2, c*100/c2 as c3\
:-] FROM\
:-] (\
:-]     SELECT\
:-]         Carrier,\
:-]         count(*) AS c\
:-]     FROM ontime\
:-]     WHERE DepDelay>10\
:-]     GROUP BY Carrier\
:-] )\
:-] ANY INNER JOIN\
:-] (\
:-]     SELECT\
:-]         Carrier,\
:-]         count(*) AS c2\
:-]     FROM ontime\
:-]     GROUP BY Carrier\
:-] ) USING Carrier\
:-] ORDER BY c3 DESC;

SELECT
    Carrier,
    c,
    c2,
    (c * 100) / c2 AS c3
FROM
(
    SELECT
        Carrier,
        count(*) AS c
    FROM ontime
    WHERE DepDelay > 10
    GROUP BY Carrier
)
ANY INNER JOIN
(
    SELECT
        Carrier,
        count(*) AS c2
    FROM ontime
    GROUP BY Carrier
) USING (Carrier)
ORDER BY c3 DESC

┌─Carrier─┬───────c─┬───────c2─┬─────────────────c3─┐
│ PI      │  209439 │   873957 │ 23.964451340283333 │
│ WN      │ 1487246 │  6769119 │ 21.971042317323718 │
│ AL      │   99515 │   455873 │ 21.829544631947932 │
│ UA      │ 1685842 │  7946749 │  21.21423490285147 │
│ PS      │   17672 │    83617 │  21.13445830393341 │
│ US      │ 1683872 │  8492064 │  19.82877189809215 │
│ HP      │  463263 │  2339125 │ 19.804969807085982 │
│ AS      │  267853 │  1421935 │ 18.837218297601506 │
│ TW      │  580275 │  3125082 │  18.56831276747298 │
│ DL      │ 1801581 │ 10211113 │ 17.643336235726704 │
│ CO      │  854443 │  4970900 │  17.18889939447585 │
│ EA      │  155349 │   919785 │ 16.889707920872812 │
│ ML      │    5033 │    31123 │  16.17132024547762 │
│ AA      │ 1307232 │  8335280 │ 15.683120423069171 │
│ NW      │  898815 │  5812478 │ 15.463542399644352 │
│ PA      │   36805 │   278102 │ 13.234352863337913 │
│ AQ      │    1021 │    11036 │  9.251540413193187 │
└─────────┴─────────┴──────────┴────────────────────┘

17 rows in set. Elapsed: 1.011 sec. Processed 124.15 million rows, 395.59 MB (122.82 million rows/s., 391.35 MB/s.)

测试九

ipa.haohaozhu.hadoop :) SELECT DestCityName, uniqExact(OriginCityName) AS u \
:-] FROM ontime\
:-] GROUP BY DestCityName\
:-] ORDER BY u DESC\
:-] LIMIT 10;

SELECT
    DestCityName,
    uniqExact(OriginCityName) AS u
FROM ontime
GROUP BY DestCityName
ORDER BY u DESC
LIMIT 10

┌─DestCityName──────────┬───u─┐
│ Chicago, IL           │ 126 │
│ Atlanta, GA           │ 114 │
│ Dallas/Fort Worth, TX │ 105 │
│ Pittsburgh, PA        │ 104 │
│ Minneapolis, MN       │  99 │
│ Denver, CO            │  97 │
│ Newark, NJ            │  94 │
│ St. Louis, MO         │  94 │
│ Charlotte, NC         │  94 │
│ Detroit, MI           │  92 │
└───────────────────────┴─────┘

10 rows in set. Elapsed: 7.020 sec. Processed 62.08 million rows, 2.76 GB (8.84 million rows/s., 393.13 MB/s.)

测试十

SELECT
    min(Year),
    max(Year),
    Carrier,
    count(*) AS cnt,
    sum(ArrDelayMinutes > 30) AS flights_delayed,
    round(sum(ArrDelayMinutes > 30) / count(*), 2) AS rate
FROM ontime
WHERE (DayOfWeek NOT IN (6, 7)) AND (OriginState NOT IN ('AK', 'HI', 'PR', 'VI')) AND (DestState NOT IN ('AK', 'HI', 'PR', 'VI')) AND (FlightDate < '2010-01-01')
GROUP BY Carrier
HAVING (cnt > 100000) AND (max(Year) > 1990)
ORDER BY rate DESC
LIMIT 1000

┌─min(Year)─┬─max(Year)─┬─Carrier─┬─────cnt─┬─flights_delayed─┬─rate─┐
│      1987 │      2000 │ UA      │ 5668532 │          705834 │ 0.12 │
│      1987 │      2000 │ TW      │ 2257896 │          242250 │ 0.11 │
│      1987 │      2000 │ CO      │ 3635353 │          394498 │ 0.11 │
│      1987 │      2000 │ AA      │ 5770469 │          578915 │  0.1 │
│      1988 │      2000 │ US      │ 6231991 │          593442 │  0.1 │
│      1987 │      1991 │ PA      │  194060 │           18923 │  0.1 │
│      1987 │      2000 │ NW      │ 4230638 │          382152 │ 0.09 │
│      1987 │      2000 │ DL      │ 7188301 │          671722 │ 0.09 │
│      1987 │      2000 │ HP      │ 1700319 │          157018 │ 0.09 │
│      1987 │      2000 │ AS      │  720882 │           61112 │ 0.08 │
│      1987 │      2000 │ WN      │ 5051739 │          424503 │ 0.08 │
└───────────┴───────────┴─────────┴─────────┴─────────────────┴──────┘

11 rows in set. Elapsed: 6.396 sec. Processed 62.08 million rows, 730.73 MB (9.71 million rows/s., 114.25 MB/s.)

你可能感兴趣的:(clickhouse)