一、背景
在分布式数据库中数据量级大都在千万以上,用到的最多的sql类似 where ,order by ,limit 的语句。
where,order by的字段能否同时走索引呢?带着这个疑问基于 TiDBV6.0 做一个实验验证。
二、环境准备
2.1 建立TiDB cluster111
- 拓扑如下(cluster111-full.yaml)
global:
user: "tidb"
ssh_port: 22
deploy_dir: "/tidb-deploy"
data_dir: "/tidb-data"
# # Monitored variables are applied to all the machines.
monitored:
node_exporter_port: 9100
blackbox_exporter_port: 9115
server_configs:
tidb:
log.slow-threshold: 300
binlog.enable: false
binlog.ignore-error: false
tikv:
readpool.storage.use-unified-pool: false
readpool.coprocessor.use-unified-pool: true
pd:
schedule.leader-schedule-limit: 4
schedule.region-schedule-limit: 2048
schedule.replica-schedule-limit: 64
replication.location-labels:
- host
pd_servers:
- host: 10.0.2.15
# ssh_port: 22
# name: "pd-1"
client_port: 2379
# peer_port: 2380
tidb_servers:
- host: 10.0.2.15
tikv_servers:
- host: 10.0.2.15
# ssh_port: 22
port: 20160
status_port: 20180
config:
server.grpc-concurrency: 4
#server.labels: {host: "10.0.2.15.20160" }
monitoring_servers:
- host: 10.0.2.15
grafana_servers:
- host: 10.0.2.15
alertmanager_servers:
- host: 10.0.2.15
-
部署集群
具体的部署可以参考文章 https://tidb.net/blog/af8080f7#TiDB-最小实践Cluster111
# tiup cluster list
# tiup cluster stop cluster111
# tiup cluster destroy cluster111
# 部署cluster111集群
tiup cluster deploy cluster111 v6.0.0 ./cluster111-full.yaml --user root -p
tiup cluster start cluster111
2.2 创建库表
CREATE DATABASE `b_crm` /*!40100 DEFAULT CHARACTER SET utf8mb4 */ ;
drop table if exists `m_test_sort`;
CREATE TABLE `m_test_sort` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`orgid` int(11) NOT NULL default 0,
`labelid` int not null default 0,
`catid` int not null default 0,
`productid` int not null default 0,
`p1` int not null default 0,
`p2` int not null default 0,
`p3` int not null default 0,
`name` varchar(20) not null default '',
`cust_id` char(30) not null default '',
PRIMARY KEY (`id`) ,
key ix_orgid(`orgid`),
key ix_labelid_catid(`labelid`,`catid`),
key ix_p1_p2_p3(`p1`,`p2`,`p3`),
UNIQUE KEY uix_cust_id(`cust_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
2.3 表数据初始化
-- 插入1条 seed
insert into m_test_sort(orgid,labelid,catid,productid,p1,p2,p3,name,cust_id)values(1,1,1,1,1,1,1,'111','111');
-- 执行多次14次
insert into m_test_sort(orgid,labelid,catid,productid,p1,p2,p3,name,cust_id)
select FLOOR(RAND() * 100),FLOOR(RAND() * 1000),FLOOR(RAND() * 100),FLOOR(RAND() * 1000),FLOOR(RAND() * 1000),FLOOR(RAND() * 1000),FLOOR(RAND() * 1000),concat(substring('赵钱孙李周吴郑王冯陈诸卫蒋沈韩杨朱秦尤许何吕施张孔曹严华金魏陶姜戚谢邹喻柏水窦章云苏潘葛奚范彭郎鲁韦昌马苗凤花方俞任袁柳酆鲍史唐费廉岑薛雷贺倪汤滕殷罗毕郝邬安常乐于时傅皮齐康伍余元卜顾孟平黄和穆萧尹姚邵堪汪祁毛禹狄米贝明臧计伏成戴谈宋茅庞熊纪舒屈项祝董粱杜阮蓝闵席季麻强贾路娄危江童颜郭梅盛林刁钟徐邱骆高夏蔡田樊胡凌霍虞万支柯咎管卢莫经房裘干解应宗丁宣贲邓郁单杭洪包诸左石崔吉钮龚',floor(1+190*rand()),1),substring('明国华建文平志伟东海强晓生光林小民永杰军金健一忠洪江福祥中正振勇耀春大宁亮宇兴宝少剑云学仁涛瑞飞鹏安亚泽世汉达卫利胜敏群波成荣新峰刚家龙德庆斌辉良玉俊立浩天宏子松克清长嘉红山贤阳乐锋智青跃元武广思雄锦威启昌铭维义宗英凯鸿森超坚旭政传康继翔栋仲权奇礼楠炜友年震鑫雷兵万星骏伦绍麟雨行才希彦兆贵源有景升惠臣慧开章润高佳虎根远力进泉茂毅富博霖顺信凡豪树和恩向道川彬柏磊敬书鸣芳培全炳基冠晖京欣廷哲保秋君劲轩帆若连勋祖锡吉崇钧田石奕发洲彪钢运伯满庭申湘皓承梓雪孟其潮冰怀鲁裕翰征谦航士尧标洁城寿枫革纯风化逸腾岳银鹤琳显焕来心凤睿勤延凌昊西羽百捷定琦圣佩麒虹如靖日咏会久昕黎桂玮燕可越彤雁孝宪萌颖艺夏桐月瑜沛诚夫声冬奎扬双坤镇楚水铁喜之迪泰方同滨邦先聪朝善非恒晋汝丹为晨乃秀岩辰洋然厚灿卓杨钰兰怡灵淇美琪亦晶舒菁真涵爽雅爱依静棋宜男蔚芝菲露娜珊雯淑曼萍珠诗璇琴素梅玲蕾艳紫珍丽仪梦倩伊茜妍碧芬儿岚婷菊妮媛莲娟一',floor(1+400*rand()),1),substring('明国华建文平志伟东海强晓生光林小民永杰军金健一忠洪江福祥中正振勇耀春大宁亮宇兴宝少剑云学仁涛瑞飞鹏安亚泽世汉达卫利胜敏群波成荣新峰刚家龙德庆斌辉良玉俊立浩天宏子松克清长嘉红山贤阳乐锋智青跃元武广思雄锦威启昌铭维义宗英凯鸿森超坚旭政传康继翔栋仲权奇礼楠炜友年震鑫雷兵万星骏伦绍麟雨行才希彦兆贵源有景升惠臣慧开章润高佳虎根远力进泉茂毅富博霖顺信凡豪树和恩向道川彬柏磊敬书鸣芳培全炳基冠晖京欣廷哲保秋君劲轩帆若连勋祖锡吉崇钧田石奕发洲彪钢运伯满庭申湘皓承梓雪孟其潮冰怀鲁裕翰征谦航士尧标洁城寿枫革纯风化逸腾岳银鹤琳显焕来心凤睿勤延凌昊西羽百捷定琦圣佩麒虹如靖日咏会久昕黎桂玮燕可越彤雁孝宪萌颖艺夏桐月瑜沛诚夫声冬奎扬双坤镇楚水铁喜之迪泰方同滨邦先聪朝善非恒晋汝丹为晨乃秀岩辰洋然厚灿卓杨钰兰怡灵淇美琪亦晶舒菁真涵爽雅爱依静棋宜男蔚芝菲露娜珊雯淑曼萍珠诗璇琴素梅玲蕾艳紫珍丽仪梦倩伊茜妍碧芬儿岚婷菊妮媛莲娟一',floor(1+400*rand()),if(rand()>0.6,0,1))),substring(md5(concat(rand(),rand(),rand(),rand())), 1, 30) from m_test_sort;
-- 16384 条记录
select count(*) from m_test_sort;
三、测试
主键忽略
-- 查找 最多的orgid
select orgid ,count(*) as ct from m_test_sort GROUP BY orgid ORDER BY ct desc limit 10;
3.1 二级索引 ix_orgid(orgid
)
3.1.1 where:orgid,order:id
-- where:orgid索引;order:id索引读
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by id limit 100;
3.1.2 where:orgid,order:cust_id
-- where:orgid索引;order:回表 cust_id虽然是唯一索引但是还是回表
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by cust_id limit 100 ;
3.1.3 where:orgid,order:productid
-- where:orgid索引;order:回表 productid 没有索引
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by productid limit 100 ;
3.1.4 where:orgid,order:catid
-- where:orgid索引;order:回表
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by catid limit 100;
3.1.5 where:orgid,order:labelid
-- where:orgid索引;order:回表
EXPLAIN ANALYZE select id from m_test_sort where orgid=85 order by labelid limit 100;
3.2 复合索引两列 ix_labelid_catid(labelid
,catid
)
-- 查询 最多的 labelid
select labelid ,count(*) as ct from m_test_sort GROUP BY labelid ORDER BY ct desc limit 10;
3.2.1 where:labelid,order:id
-- where:labelid索引,order:索引读
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by id limit 100;
3.2.2 where:labelid,order:catid
-- where:labelid索引,order:catid 走了复合索引的第二个字段
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by catid limit 100;
3.2.3 where:labelid,order:labelid
-- where:labelid索引,order:labelid 索引读
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by labelid limit 100;
3.2.4 where:labelid,order:cust_id
-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by cust_id limit 100 ;
3.2.5 where:labelid,order:productid
-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by productid limit 100;
3.2.6 where:labelid,order:orgid
-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by orgid limit 100;
3.3复合索引三列 ix_p1_p2_p3(p1
,p2
,p3
)
-- 查找 最多的 labelid
select p1 ,count(*) as ct from m_test_sort GROUP BY p1 ORDER BY ct desc limit 10;
3.3.1 where:p1,order:id
-- where:p1索引,order:索引读
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by id limit 100;
3.3.2 where:p1,order:p1
-- where:p1索引,order:索引读
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p1 limit 100;
3.3.3 where:p1,order:p2
-- where:p1索引,order:索引读
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p2 limit 100;
3.3.4 where:p1,order:p3
-- where:p1索引,order:索引读
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p3 limit 100;
3.3.5 where:p1,p2,order:p3
-- where:p1,p2索引,order:索引读
EXPLAIN ANALYZE select id from m_test_sort where p1=346 and p2=32 order by p3 limit 100;
3.3.6 where:p1,order:labelid
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by labelid limit 100;
3.3.7 where:p1,order:cust_id
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by cust_id limit 100 ;
3.3.8 where:p1,order:productid
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by productid limit 100;
3.3.9 where:p1,order:orgid
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by orgid limit 100;
3.3.10 where:p1,order:catid
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by catid limit 100;
3.4 关于TopN 与limit
参考:https://docs.pingcap.com/zh/tidb/stable/topn-limit-push-down
Limit 节点等价于一个排序规则为空的 TopN 节点。
SQL 中的 LIMIT 子句在 TiDB 查询计划树中对应 Limit 算子节点,ORDER BY 子句在查询计划树中对应 Sort 算子节点,此外,我们会将相邻的 Limit 和 Sort 算子组合成 TopN 算子节点,表示按某个排序规则提取记录的前 N 项。从另一方面来说,Limit 节点等价于一个排序规则为空的 TopN 节点。
和谓词下推类似,TopN(及 Limit,下同)下推将查询计划树中的 TopN 计算尽可能下推到距离数据源最近的地方,以尽早完成数据的过滤,进而显著地减少数据传输或计算的开销。
四、总结
4.1 执行计划汇总
-- 查找 最多的orgid
select orgid ,count(*) as ct from m_test_sort GROUP BY orgid ORDER BY ct desc limit 10;
-- 二级索引
-- where:orgid索引;order:id索引读
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by id limit 100 ;
-- where:orgid索引;order:回表 cust_id虽然是唯一索引但是还是回表
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by cust_id limit 100 ;
-- where:orgid索引;order:回表 productid 没有索引
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by productid limit 100 ;
-- where:orgid索引;order:回表
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by catid limit 100;
-- where:orgid索引;order:回表
EXPLAIN ANALYZE select id from m_test_sort where orgid=85 order by labelid limit 100;
---------------
-- 复合索引 2列复合
-- 查找 最多的 labelid
select labelid ,count(*) as ct from m_test_sort GROUP BY labelid ORDER BY ct desc limit 10;
-- where:labelid索引,order:索引读
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by id limit 100;
-- where:labelid索引,order:catid 走了复合索引的第二个字段
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by catid limit 100;
-- where:labelid索引,order:labelid 索引读
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by labelid limit 100;
-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by cust_id limit 100 ;
-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by productid limit 100;
-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by orgid limit 100;
---------------
-- 复合索引 3列复合
-- 查找 最多的 labelid
select p1 ,count(*) as ct from m_test_sort GROUP BY p1 ORDER BY ct desc limit 10;
-- where:p1索引,order:索引读
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by id limit 100;
-- where:p1索引,order:索引读
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p1 limit 100;
-- where:p1索引,order:索引读
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p2 limit 100;
-- where:p1索引,order:索引读
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p3 limit 100;
-- where:p1,p2索引,order:索引读
EXPLAIN ANALYZE select id from m_test_sort where p1=346 and p2=32 order by p3 limit 100;
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by labelid limit 100;
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by cust_id limit 100 ;
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by productid limit 100;
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by orgid limit 100;
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by catid limit 100;
4.2 sql语句建议
- where 条件尽量走索引
- order by 尽量避免回表
谢谢 TiDB社区,谢谢TiDBer,后续会探索更多的TiDB相关的技术!