环境准备
新建项目后在 pom.xml 中添加依赖(API的版本要与HBase的版本一致HBase是2.4.11,所以maven版本也是2.4.11):
1 2 3 4 5 6 7
| <dependencies> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>2.4.11</version> </dependency> </dependencies>
|
注意:如果报错 javax.el 包不存在,是一个测试用的依赖,不影响使用,如果介意则如此添加:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
| <dependencies> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>2.4.11</version> <exclusions> <exclusion> <groupId>org.glassfish</groupId> <artifactId>javax.el</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.glassfish</groupId> <artifactId>javax.el</artifactId> <version>3.0.1-b06</version> </dependency> </dependencies>
|
创建链接
根据官方 API 介绍,HBase 的客户端连接由 ConnectionFactory 类来创建,用户使用完成之后需要手动关闭连接。同时连接是一个重量级的,推荐一个进程使用一个连接,对 HBase的命令通过连接中的两个属性 Admin 和 Table 来实现。
单线程创建链接
HBaseConnection1:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
| package com.zhang.A_TestConnection;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory;
import java.io.IOException;
public class HBaseConnection1 { public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum","hadoop103,hadoop104,hadoop105");
Connection connection = ConnectionFactory.createConnection(conf);
System.out.println(connection);
connection.close(); } }
|
多线程创建链接
使用类单例模式,确保使用一个连接,可以同时用于多个线程。即设置hbase链接为静态属性。
HBaseConnection2:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
| package com.zhang.A_TestConnection;
import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory;
import java.io.IOException;
public class HBaseConnection2 {
public static Connection connection = null; static { try { connection = ConnectionFactory.createConnection(); } catch (IOException e) { throw new RuntimeException(e); } }
public static void closeConnection() { try { connection.close(); } catch (IOException e) { throw new RuntimeException(e); } } public static void main(String[] args) throws IOException { System.out.println(connection);
closeConnection(); } }
|
这里就不再创建 Configuration
对象,将描述信息给链接了,因为通过追踪ConnectionFactory.createConnection()
发现它默认会去 resources 目录下找 hbase-site.xml 文件,该文件就是配置文件。我们也应该将配置写在配置文件中,以此可以使得连接更加方便和灵活。
在resources下创建hbase-site.xml文件,并写入:
1 2 3 4 5 6 7 8
| <?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>hbase.zookeeper.quorum</name> <value>hadoop103,hadoop104,hadoop105</value> </property> </configuration>
|
DDL
创建 HBaseDDL 类,添加静态方法即可作为工具类 。
直接利用HBaseConnection2文件中创建的静态连接即可:
1
| public static Connection connection = HBaseConnection2.connection;
|
DDL操作使用Admin操作,都获取Admin:
1
| Admin admin = connection.getAdmin();
|
创建命名空间
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
|
public static void createNamespace(String namespace) throws IOException { Admin admin = connection.getAdmin();
NamespaceDescriptor.Builder builder = NamespaceDescriptor.create(namespace);
builder.addConfiguration("user", "ZYR");
try { admin.createNamespace(builder.build()); } catch (IOException e) { System.out.println("命名空间已经存在"); throw new RuntimeException(e); }
admin.close(); }
|
在类中的main函数中写测试代码即可,记得关闭HBase连接:
1 2 3 4 5 6 7
| public static void main(String[] args) throws IOException { createNamespace("studyHBase");
HBaseConnection2.closeConnection(); }
|
判断表格是否存在
其中重点是表格的表达,是利用TableName
对象,但是不能直接new,要用:TableName.valueOf(namespace, tableName)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
|
public static boolean isTableExists(String namespace, String tableName) throws IOException { Admin admin = connection.getAdmin();
boolean exists = false; try { exists = admin.tableExists(TableName.valueOf(namespace, tableName)); } catch (IOException e) { throw new RuntimeException(e); }
admin.close();
return exists; }
|
1 2 3 4 5 6 7 8 9
| public static void main(String[] args) throws IOException { System.out.println(isTableExists("bigdata","student"));
HBaseConnection2.closeConnection(); }
|
创建表格
通过HBase Shell中的help "create"
命令,查看帮助:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| Create a table with namespace=ns1 and table qualifier=t1 hbase> create 'ns1:t1', {NAME => 'f1', VERSIONS => 5}
Create a table with namespace=default and table qualifier=t1 hbase> create 't1', {NAME => 'f1'}, {NAME => 'f2'}, {NAME => 'f3'} hbase> # The above in shorthand would be the following: hbase> create 't1', 'f1', 'f2', 'f3' hbase> create 't1', {NAME => 'f1', VERSIONS => 1, TTL => 2592000, BLOCKCACHE => true} hbase> create 't1', {NAME => 'f1', CONFIGURATION => {'hbase.hstore.blockingStoreFiles' => '10'}} hbase> create 't1', {NAME => 'f1', IS_MOB => true, MOB_THRESHOLD => 1000000, MOB_COMPACT_PARTITION_POLICY => 'weekly'}
Table configuration options can be put at the end. Examples:
hbase> create 'ns1:t1', 'f1', SPLITS => ['10', '20', '30', '40'] hbase> create 't1', 'f1', SPLITS => ['10', '20', '30', '40'] hbase> create 't1', 'f1', SPLITS_FILE => 'splits.txt', OWNER => 'johndoe' hbase> create 't1', {NAME => 'f1', VERSIONS => 5}, METADATA => { 'mykey' => 'myvalue' } hbase> # Optionally pre-split the table into NUMREGIONS, using hbase> # SPLITALGO ("HexStringSplit", "UniformSplit" or classname) hbase> create 't1', 'f1', {NUMREGIONS => 15, SPLITALGO => 'HexStringSplit'} hbase> create 't1', 'f1', {NUMREGIONS => 15, SPLITALGO => 'HexStringSplit', REGION_REPLICATION => 2, CONFIGURATION => {'hbase.hregion.scan.loadColumnFamiliesOnDemand' => 'true'}} hbase> create 't1', 'f1', {SPLIT_ENABLED => false, MERGE_ENABLED => false} hbase> create 't1', {NAME => 'f1', DFS_REPLICATION => 1}
You can also keep around a reference to the created table:
hbase> t1 = create 't1', 'f1'
Which gives you a reference to the table named 't1', on which you can then call methods.
|
基本的信息是:命名空间名称,表格名称,列族的名称。其他信息是拓展。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
|
public static void createTable(String namespace, String tableName, String... columnFamilies) throws IOException { Admin admin = connection.getAdmin();
TableDescriptorBuilder tableDescriptorBuilder = TableDescriptorBuilder.newBuilder(TableName.valueOf(namespace, tableName));
for (String columnFamily : columnFamilies) { ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(columnFamily));
columnFamilyDescriptorBuilder.setMaxVersions(5);
tableDescriptorBuilder.setColumnFamily(columnFamilyDescriptorBuilder.build()); }
try { admin.createTable(tableDescriptorBuilder.build()); } catch (IOException e) { System.out.println("表格已经存在了"); throw new RuntimeException(e); }
admin.close(); }
|
1 2 3 4 5 6 7 8
| public static void main(String[] args) throws IOException {
createTable("studyHBase","student","info","msg");
HBaseConnection2.closeConnection(); }
|
通过前后两次在HBase Shell中执行的list
命令,可以验证创建表格成功:
1 2 3 4 5 6 7 8 9 10 11 12 13
| hbase:006:0> list TABLE bigdata:student 1 row(s) Took 0.0343 seconds => ["bigdata:student"] hbase:007:0> list TABLE bigdata:student studyHBase:student 2 row(s) Took 0.0309 seconds => ["bigdata:student", "studyHBase:student"]
|
通过HBase Shell中的describe "studyHBase:student"
可以查看描述信息。我们设置的两个列族,分别叫info和msg,version都设置为5:
1 2 3 4 5 6 7 8 9 10 11 12 13
| hbase:008:0> describe "studyHBase:student" Table studyHBase:student is ENABLED studyHBase:student COLUMN FAMILIES DESCRIPTION {NAME => 'info', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '5', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ ENCODING => 'NONE', COMPRESSION => 'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65 536', REPLICATION_SCOPE => '0'}
{NAME => 'msg', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '5', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_E NCODING => 'NONE', COMPRESSION => 'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '655 36', REPLICATION_SCOPE => '0'}
2 row(s)
|
表格创建的问题
最终代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
|
public static void createTable(String namespace, String tableName, String... columnFamilies) throws IOException { if (columnFamilies.length == 0) { System.out.println("创建表格至少需要一个列族"); return; } if (isTableExists(namespace, tableName)) { System.out.println("表格已经存在了"); return; }
Admin admin = connection.getAdmin();
TableDescriptorBuilder tableDescriptorBuilder = TableDescriptorBuilder.newBuilder(TableName.valueOf(namespace, tableName));
for (String columnFamily : columnFamilies) { ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(columnFamily));
columnFamilyDescriptorBuilder.setMaxVersions(5);
tableDescriptorBuilder.setColumnFamily(columnFamilyDescriptorBuilder.build()); }
try { admin.createTable(tableDescriptorBuilder.build()); } catch (IOException e) { throw new RuntimeException(e); }
admin.close(); }
|
修改表
重点是创建一个表格建造者是一个新的,这样修改会报错:无法找到该列族。
修改表时应该利用admin获取旧的表格描述,然后获取旧的列族描述。之后再对列族描述建造者添加对应列族描述完成修改。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
|
public static void modifyTable(String namespace, String tableName, String columnFamily, int version) throws IOException { if (!isTableExists(namespace, tableName)) { System.out.println("表格不存在"); return; } Admin admin = connection.getAdmin(); try { TableDescriptor descriptor = admin.getDescriptor(TableName.valueOf(namespace, tableName));
TableDescriptorBuilder tableDescriptorBuilder = TableDescriptorBuilder.newBuilder(descriptor);
ColumnFamilyDescriptor columnFamily1 = descriptor.getColumnFamily(Bytes.toBytes(columnFamily)); ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(columnFamily1);
columnFamilyDescriptorBuilder.setMaxVersions(version); tableDescriptorBuilder.modifyColumnFamily(columnFamilyDescriptorBuilder.build()); admin.modifyTable(tableDescriptorBuilder.build()); } catch (IOException e) { throw new RuntimeException(e); }
admin.close(); }
|
1 2 3 4 5 6 7 8
| public static void main(String[] args) throws IOException {
modifyTable("studyHBase","student","info", 10);
HBaseConnection2.closeConnection(); }
|
去HBase Shell中检查:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| hbase:008:0> describe "studyHBase:student" Table studyHBase:student is ENABLED studyHBase:student COLUMN FAMILIES DESCRIPTION {NAME => 'info', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '5', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ ENCODING => 'NONE', COMPRESSION => 'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65 536', REPLICATION_SCOPE => '0'}
{NAME => 'msg', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '5', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_E NCODING => 'NONE', COMPRESSION => 'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '655 36', REPLICATION_SCOPE => '0'}
2 row(s) Quota is disabled Took 0.1055 seconds hbase:009:0> describe "studyHBase:student" Table studyHBase:student is ENABLED studyHBase:student COLUMN FAMILIES DESCRIPTION {NAME => 'info', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '10', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK _ENCODING => 'NONE', COMPRESSION => 'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '6 5536', REPLICATION_SCOPE => '0'}
{NAME => 'msg', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '5', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_E NCODING => 'NONE', COMPRESSION => 'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '655 36', REPLICATION_SCOPE => '0'}
2 row(s) Quota is disabled Took 0.0701 seconds
|
可以通过两次查询看出来info列族的VERSIONS确实从5变成了10。
删除表
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
|
public static boolean deleteTable(String namespace, String tableName) throws IOException { if (!isTableExists(namespace, tableName)) { System.out.println("表格不存在,无法删除"); return false; } Admin admin = connection.getAdmin(); try { TableName tableName1 = TableName.valueOf(namespace, tableName); admin.disableTable(tableName1); admin.deleteTable(tableName1); } catch (IOException e) { throw new RuntimeException(e); }
admin.close(); return true; }
|
1 2 3 4 5 6 7 8
| public static void main(String[] args) throws IOException {
deleteTable("studyHBase","student");
HBaseConnection2.closeConnection(); }
|
操作DDL的完整代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
| package com.zhang.B_TestDDL;
import com.zhang.A_TestConnection.HBaseConnection2; import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class HBaseDDL1 { public static Connection connection = HBaseConnection2.connection;
public static void createNamespace(String namespace) throws IOException { Admin admin = connection.getAdmin();
NamespaceDescriptor.Builder builder = NamespaceDescriptor.create(namespace);
builder.addConfiguration("user", "ZYR");
try { admin.createNamespace(builder.build()); } catch (IOException e) { System.out.println("命名空间已经存在"); throw new RuntimeException(e); }
admin.close(); }
public static boolean isTableExists(String namespace, String tableName) throws IOException { Admin admin = connection.getAdmin();
boolean exists = false; try { exists = admin.tableExists(TableName.valueOf(namespace, tableName)); } catch (IOException e) { throw new RuntimeException(e); }
admin.close();
return exists; }
public static void createTable(String namespace, String tableName, String... columnFamilies) throws IOException { if (columnFamilies.length == 0) { System.out.println("创建表格至少需要一个列族"); return; } if (isTableExists(namespace, tableName)) { System.out.println("表格已经存在了"); return; }
Admin admin = connection.getAdmin();
TableDescriptorBuilder tableDescriptorBuilder = TableDescriptorBuilder.newBuilder(TableName.valueOf(namespace, tableName));
for (String columnFamily : columnFamilies) { ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(columnFamily));
columnFamilyDescriptorBuilder.setMaxVersions(5);
tableDescriptorBuilder.setColumnFamily(columnFamilyDescriptorBuilder.build()); }
try { admin.createTable(tableDescriptorBuilder.build()); } catch (IOException e) { throw new RuntimeException(e); }
admin.close(); }
public static void modifyTable(String namespace, String tableName, String columnFamily, int version) throws IOException { if (!isTableExists(namespace, tableName)) { System.out.println("表格不存在"); return; }
Admin admin = connection.getAdmin();
try { TableDescriptor descriptor = admin.getDescriptor(TableName.valueOf(namespace, tableName));
TableDescriptorBuilder tableDescriptorBuilder = TableDescriptorBuilder.newBuilder(descriptor);
ColumnFamilyDescriptor columnFamily1 = descriptor.getColumnFamily(Bytes.toBytes(columnFamily)); ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(columnFamily1);
columnFamilyDescriptorBuilder.setMaxVersions(version); tableDescriptorBuilder.modifyColumnFamily(columnFamilyDescriptorBuilder.build()); admin.modifyTable(tableDescriptorBuilder.build()); } catch (IOException e) { throw new RuntimeException(e); }
admin.close(); }
public static boolean deleteTable(String namespace, String tableName) throws IOException { if (!isTableExists(namespace, tableName)) { System.out.println("表格不存在,无法删除"); return false; }
Admin admin = connection.getAdmin();
try { TableName tableName1 = TableName.valueOf(namespace, tableName); admin.disableTable(tableName1); admin.deleteTable(tableName1); } catch (IOException e) { throw new RuntimeException(e); }
admin.close();
return true; }
public static void main(String[] args) throws IOException {
deleteTable("studyHBase","student");
HBaseConnection2.closeConnection(); } }
|
DML
DML的相关操作是利用connection获取Table执行的:
1
| Table table = connection.getTable(TableName.valueOf(namespace, tableName));
|
插入数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
|
public static void putCell(String namespace, String tableName, String rowKey, String columnFamily, String column, String value) throws IOException { if (!HBaseDDL1.isTableExists(namespace, tableName)) { System.out.println("表格不存在"); return; }
Table table = connection.getTable(TableName.valueOf(namespace, tableName));
Put put = new Put(Bytes.toBytes(rowKey)); put.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(column),Bytes.toBytes(value)); try { table.put(put); } catch (IOException e) { throw new RuntimeException(e); }
table.close(); }
|
1 2 3 4 5 6 7
| public static void main(String[] args) throws IOException { putCell("studyHBase","student","2001","info","name","zhangsan");
HBaseConnection2.closeConnection(); }
|
测试结果:
1 2 3 4 5 6 7 8 9
| hbase:001:0> scan "studyHBase:student" ROW COLUMN+CELL 0 row(s) Took 0.2841 seconds hbase:002:0> scan "studyHBase:student" ROW COLUMN+CELL 2001 column=info:name, timestamp=2023-03-24T08:21:22.454, value=zhangsan 1 row(s) Took 0.0464 seconds
|
为了后续读取数据时的操作演示效果更加明显,可以再自行添加一些数据。
读取数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
|
public static void getCells(String namespace, String tableName, String rowKey, String columnFamily, String column) throws IOException { Table table = connection.getTable(TableName.valueOf(namespace, tableName));
Get get = new Get(Bytes.toBytes(rowKey));
get.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column));
get.readAllVersions();
try { Result result = table.get(get);
Cell[] cells = result.rawCells(); for (Cell cell : cells) { String value = new String(CellUtil.cloneValue(cell)); System.out.println(value); } } catch (IOException e) { throw new RuntimeException(e); }
table.close(); }
|
1 2 3 4 5 6 7 8
| public static void main(String[] args) throws IOException {
getCells("studyHBase","student","2001","info","name");
HBaseConnection2.closeConnection(); }
|
扫描数据
读取数据的效率太低,一般只能看一行的消息,但实际开发中可能会需要数十亿行的数据,所以不能简单读取数据。要实现同时读数十亿行数据就要使用扫描数据的API。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
|
public static void scanRaws(String namespace, String tableName, String startRow, String stopRow) throws IOException { Table table = connection.getTable(TableName.valueOf(namespace, tableName));
Scan scan = new Scan(); scan.withStartRow(Bytes.toBytes(startRow)); scan.withStopRow(Bytes.toBytes(stopRow));
try { ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner) { Cell[] cells = result.rawCells(); for (Cell cell : cells) { System.out.print(new String(CellUtil.cloneRow(cell)) + "-" + new String(CellUtil.cloneQualifier(cell)) + "-" + new String(CellUtil.cloneFamily(cell))+ '-' + new String(CellUtil.cloneValue(cell)) + "\t"); } System.out.println(); } } catch (IOException e) { throw new RuntimeException(e); } table.close(); }
|
1 2 3 4 5 6 7 8
| public static void main(String[] args) throws IOException {
scanRaws("studyHBase", "student", "2001", "2003");
HBaseConnection2.closeConnection(); }
|
输出结果:
1 2
| 2001-name-info-zhangsan 2002-name-info-lisi
|
因为这里是默认不包含终止行,所以只有两条数据。
实际扫描全表内容:
1 2 3 4 5 6 7
| hbase:003:0> scan "studyHBase:student" ROW COLUMN+CELL 2001 column=info:name, timestamp=2023-03-24T08:21:22.454, value=zhangsan 2002 column=info:name, timestamp=2023-03-25T04:07:01.510, value=lisi 2003 column=info:name, timestamp=2023-03-25T04:07:01.521, value=wangwu 3 row(s) Took 0.0390 seconds
|
带过滤器的扫描
过滤器有两种:
单列过滤、整行过滤
也就是添加条件,得到满足一定条件的结果。
但是注意,由于HBase的性质,是允许有缺失数据的,所以有些数据不一定能过滤掉。
举个例子:
假设按照info列族下的name列进行过滤匹配,但是有一行数据只有其他属性,name列没有数据,则会跳过匹配这一行的数据。
对于单列过滤来说无法察觉,但是对于整行过滤就可以看到效果。例如输出一行内容,但是这一行内容中并没有对应的过滤列的值。
带单列过滤的扫描
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
|
public static void filterScanRows(String namespace, String tableName, String startRow, String stopRow, String columnFamily, String columnName, String value) throws IOException { Table table = connection.getTable(TableName.valueOf(namespace, tableName));
Scan scan = new Scan(); scan.withStartRow(Bytes.toBytes(startRow)); scan.withStopRow(Bytes.toBytes(stopRow));
FilterList filterList = new FilterList(); ColumnValueFilter columnValueFilter = new ColumnValueFilter( Bytes.toBytes(columnFamily), Bytes.toBytes(columnName), CompareOperator.EQUAL, Bytes.toBytes(value) ); filterList.addFilter(columnValueFilter); scan.setFilter(filterList);
try { ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner) { Cell[] cells = result.rawCells(); for (Cell cell : cells) { System.out.print(new String(CellUtil.cloneRow(cell)) + "-" + new String(CellUtil.cloneQualifier(cell)) + "-" + new String(CellUtil.cloneFamily(cell))+ '-' + new String(CellUtil.cloneValue(cell)) + "\t"); } System.out.println(); } } catch (IOException e) { throw new RuntimeException(e); }
table.close(); }
|
1 2 3 4 5 6 7
| public static void main(String[] args) throws IOException { filterScanRows("studyHBase", "student", "2001", "2004", "info", "name", "zhangsan");
HBaseConnection2.closeConnection(); }
|
输出:
这一个列族下其实还有其他属性。
带整行过滤的扫描
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
public static void filterScanRows(String namespace, String tableName, String startRow, String stopRow, String columnFamily, String columnName, String value) throws IOException { Table table = connection.getTable(TableName.valueOf(namespace, tableName));
Scan scan = new Scan(); scan.withStartRow(Bytes.toBytes(startRow)); scan.withStopRow(Bytes.toBytes(stopRow));
FilterList filterList = new FilterList(); ColumnValueFilter columnValueFilter = new ColumnValueFilter( Bytes.toBytes(columnFamily), Bytes.toBytes(columnName), CompareOperator.EQUAL, Bytes.toBytes(value) );
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter( Bytes.toBytes(columnFamily), Bytes.toBytes(columnName), CompareOperator.EQUAL, Bytes.toBytes(value) );
filterList.addFilter(singleColumnValueFilter); scan.setFilter(filterList);
try { ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner) { Cell[] cells = result.rawCells(); for (Cell cell : cells) { System.out.print(new String(CellUtil.cloneRow(cell)) + "-" + new String(CellUtil.cloneQualifier(cell)) + "-" + new String(CellUtil.cloneFamily(cell))+ '-' + new String(CellUtil.cloneValue(cell)) + "\t"); } System.out.println(); } } catch (IOException e) { throw new RuntimeException(e); }
table.close(); }
|
1 2 3 4 5 6 7
| public static void main(String[] args) throws IOException { filterScanRows("studyHBase", "student", "2001", "2004", "info", "name", "zhangsan");
HBaseConnection2.closeConnection(); }
|
输出:
1
| 2001-age-info-11 2001-name-info-zhangsan
|
删除数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
|
public static void deleteColumn(String namespace, String tableName, String rowKey, String columnFamily, String columnName) throws IOException { Table table = connection.getTable(TableName.valueOf(namespace, tableName));
Delete delete = new Delete(Bytes.toBytes(rowKey));
delete.addColumns(Bytes.toBytes(columnFamily), Bytes.toBytes(columnName));
try { table.delete(delete); } catch (IOException e) { throw new RuntimeException(e); }
table.close(); }
|
1 2 3 4 5 6 7
| public static void main(String[] args) throws IOException { deleteColumn("studyHBase", "student", "2001", "info", "name");
HBaseConnection2.closeConnection(); }
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
| hbase:003:0> scan "studyHBase:student" ROW COLUMN+CELL 2001 column=info:age, timestamp=2023-03-25T04:34:00.445, value=11 2001 column=info:name, timestamp=2023-03-24T08:21:22.454, value=zhangsan 2002 column=info:name, timestamp=2023-03-25T04:07:01.510, value=lisi 2003 column=info:name, timestamp=2023-03-25T04:07:01.521, value=wangwu 3 row(s) Took 0.0390 seconds hbase:004:0> scan "studyHBase:student" ROW COLUMN+CELL 2001 column=info:age, timestamp=2023-03-25T04:34:00.445, value=11 2002 column=info:name, timestamp=2023-03-25T04:07:01.510, value=lisi 2003 column=info:name, timestamp=2023-03-25T04:07:01.521, value=wangwu 3 row(s) Took 0.0190 seconds
|
可以看到实际上删除的是info列族,name列中rowKey行的数据。它不会删除其他数据,不会删除rowKey行的其他列,例如info列族下的age列的信息就没有删除。
操作DML的完整代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
| package com.zhang.C_TestDML;
import com.zhang.A_TestConnection.HBaseConnection2; import com.zhang.B_TestDDL.HBaseDDL1; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.CompareOperator; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.filter.ColumnValueFilter; import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class HBaseDML1 { public static Connection connection = HBaseConnection2.connection;
public static void putCell(String namespace, String tableName, String rowKey, String columnFamily, String column, String value) throws IOException { if (!HBaseDDL1.isTableExists(namespace, tableName)) { System.out.println("表格不存在"); return; }
Table table = connection.getTable(TableName.valueOf(namespace, tableName));
Put put = new Put(Bytes.toBytes(rowKey)); put.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(column),Bytes.toBytes(value)); try { table.put(put); } catch (IOException e) { throw new RuntimeException(e); }
table.close(); }
public static void getCells(String namespace, String tableName, String rowKey, String columnFamily, String column) throws IOException { Table table = connection.getTable(TableName.valueOf(namespace, tableName));
Get get = new Get(Bytes.toBytes(rowKey));
get.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column));
get.readAllVersions();
try { Result result = table.get(get);
Cell[] cells = result.rawCells(); for (Cell cell : cells) { String value = new String(CellUtil.cloneValue(cell)); System.out.println(value); } } catch (IOException e) { throw new RuntimeException(e); }
table.close(); }
public static void scanRows(String namespace, String tableName, String startRow, String stopRow) throws IOException { Table table = connection.getTable(TableName.valueOf(namespace, tableName));
Scan scan = new Scan(); scan.withStartRow(Bytes.toBytes(startRow)); scan.withStopRow(Bytes.toBytes(stopRow));
try { ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner) { Cell[] cells = result.rawCells(); for (Cell cell : cells) { System.out.print(new String(CellUtil.cloneRow(cell)) + "-" + new String(CellUtil.cloneQualifier(cell)) + "-" + new String(CellUtil.cloneFamily(cell))+ '-' + new String(CellUtil.cloneValue(cell)) + "\t"); } System.out.println(); } } catch (IOException e) { throw new RuntimeException(e); }
table.close(); }
public static void filterScanRows(String namespace, String tableName, String startRow, String stopRow, String columnFamily, String columnName, String value) throws IOException { Table table = connection.getTable(TableName.valueOf(namespace, tableName));
Scan scan = new Scan(); scan.withStartRow(Bytes.toBytes(startRow)); scan.withStopRow(Bytes.toBytes(stopRow));
FilterList filterList = new FilterList(); ColumnValueFilter columnValueFilter = new ColumnValueFilter( Bytes.toBytes(columnFamily), Bytes.toBytes(columnName), CompareOperator.EQUAL, Bytes.toBytes(value) );
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter( Bytes.toBytes(columnFamily), Bytes.toBytes(columnName), CompareOperator.EQUAL, Bytes.toBytes(value) );
filterList.addFilter(singleColumnValueFilter); scan.setFilter(filterList);
try { ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner) { Cell[] cells = result.rawCells(); for (Cell cell : cells) { System.out.print(new String(CellUtil.cloneRow(cell)) + "-" + new String(CellUtil.cloneQualifier(cell)) + "-" + new String(CellUtil.cloneFamily(cell))+ '-' + new String(CellUtil.cloneValue(cell)) + "\t"); } System.out.println(); } } catch (IOException e) { throw new RuntimeException(e); }
table.close(); }
public static void deleteColumn(String namespace, String tableName, String rowKey, String columnFamily, String columnName) throws IOException { Table table = connection.getTable(TableName.valueOf(namespace, tableName));
Delete delete = new Delete(Bytes.toBytes(rowKey));
delete.addColumns(Bytes.toBytes(columnFamily), Bytes.toBytes(columnName));
try { table.delete(delete); } catch (IOException e) { throw new RuntimeException(e); }
table.close(); }
public static void main(String[] args) throws IOException {
deleteColumn("studyHBase", "student", "2001", "info", "name");
HBaseConnection2.closeConnection(); } }
|
总结
HBase API是很底层的东西,调用起来比HBase Shell更加麻烦,但可查看源码,很容易理解。开放性也很好,HBase Shell能干的事情它也能干。
具体要写好,用好HBase API首先就需要先了解HBase的底层数据结构,也要了解HBase Shell的一些基本操作。