0%

ClickHouse使用备忘录

SQL

创建表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
CREATE DATABASE IF NOT EXISTS min_data;

CREATE TABLE IF NOT EXISTS min_data.min_commodity
(
Symbol String,
TradingDate Date,
TradingTime DateTime64(6),
LastPrice Float64,
TradeVolume Int64,
TotalVolume Int64,
LastVolume Int64,
PreTotalPosition Int64,
TotalPosition Int64,
PrePositionChange Int64,
TotalAmount Int64,
PriceUpLimit Float64,
PriceDownLimit Float64,
PreClosePrice Float64,
OpenPrice Float64,
ClosePrice Float64,
SettlePrice Float64,
SellPrice01 Float64,
BuyPrice01 Float64,
SellVolume01 Int64,
BuyVolume01 Int64,
MidPrice Float64,
local_time DateTime64(6),
cross_time DateTime

) ENGINE = MergeTree ORDER BY (Symbol, cross_time);

数据去重

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
-- 获取库下表名称列表
SELECT name
FROM system.tables
WHERE database = 'tick_option';

-- 获取表内重复项
SELECT local_time, TradingTime, Symbol, count() AS count
FROM tick_future.tick_commodity_202101
WHERE Symbol = 'A2101'
GROUP BY local_time, TradingTime, Symbol
HAVING count > 1
ORDER BY (local_time, TradingTime, Symbol);

-- 提取重复数据
SELECT *
FROM tick_future.tick_commodity_202101
WHERE Symbol = 'A2101'
AND TradingTime = '2021-01-07 23:00:00.020'
AND local_time = '2021-01-07 23:00:00.020';

-- 删除重复数据,全部删除
ALTER
TABLE
tick_future.tick_commodity_202101
DELETE
WHERE Symbol = 'A2101'
AND TradingTime = '2021-01-07 23:00:00.020'
AND local_time = '2021-01-07 23:00:00.020';

表大小

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
select database,
table,
formatReadableSize(size) as size,
formatReadableSize(bytes_on_disk) as bytes_on_disk,
formatReadableSize(data_uncompressed_bytes) as data_uncompressed_bytes,
formatReadableSize(data_compressed_bytes) as data_compressed_bytes,
compress_rate,
rows,
days,
formatReadableSize(avgDaySize) as avgDaySize
from (
select database,
table,
sum(bytes) as size,
sum(rows) as rows,
min(min_date) as min_date,
max(max_date) as max_date,
sum(bytes_on_disk) as bytes_on_disk,
sum(data_uncompressed_bytes) as data_uncompressed_bytes,
sum(data_compressed_bytes) as data_compressed_bytes,
(data_compressed_bytes / data_uncompressed_bytes) * 100 as compress_rate,
max_date - min_date as days,
size / (max_date - min_date) as avgDaySize
from system.parts
where active
and database = 'min_data'
and table = 'min_commodity_future'
group by database,
table
);