LOCATION
LOCATION
adl://
abfs://
abfss://
ALTERTABLE
LOCATION
CREATETABLEASSELECT
CREATETABLE
<property>
<name>dfs.adls.oauth2.access.token.provider.type</name>
<value>ClientCredential</value>
</property>
<property>
<name>dfs.adls.oauth2.client.id</name>
<value>your_client_id</value>
</property>
<property>
<name>dfs.adls.oauth2.credential</name>
<value>your_client_secret</value>
</property>
<property>
<name>dfs.adls.oauth2.refresh.url</name>
<value>https://login.windows.net/your_azure_tenant_id/oauth2/token</value>
</property>
<property>
<name>fs.azure.account.auth.type</name>
<value>OAuth</value>
</property>
<property>
<name>fs.azure.account.oauth.provider.type</name>
<value>org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider</value>
</property>
<property>
<name>fs.azure.account.oauth2.client.id</name>
<value>your_client_id</value>
</property>
<property>
<name>fs.azure.account.oauth2.client.secret</name>
<value>your_client_secret</value>
</property>
<property>
<name>fs.azure.account.oauth2.client.endpoint</name>
<value>https://login.microsoftonline.com/your_azure_tenant_id/oauth2/token</value>
</property>
笔记:
CREATETABLE
INSERT
LOADDATA
CREATETABLEASSELECT
CREATETABLE
ALTERTABLE
adl://
abfs://
abfss://
LOCATION
REFRESH
REFRESHtable_name
LOCATION
LOCATION
CREATETABLEALTERTABLELOCATION
adl://account.azuredatalakestore.net/path/file
abfs://container@account.dfs.core.windows.net/path/file
abfss://container@account.dfs.core.windows.net/path/file
container
account
笔记:
abfs://
abfss://
fs.azure.always.use.https=false
abfs://
abfss://
LOCATION
LOCATION
/
LOCATION
adl://
impalademo
[localhost:21000] > create database db_on_hdfs;
[localhost:21000] > use db_on_hdfs;
[localhost:21000] > create table mostly_on_hdfs (x int) partitioned by (year int);
[localhost:21000] > alter table mostly_on_hdfs add partition (year=2013);
[localhost:21000] > alter table mostly_on_hdfs add partition (year=2014);
[localhost:21000] > alter table mostly_on_hdfs add partition (year=2015)
> location 'adl://impalademo.azuredatalakestore.net/dir1/dir2/dir3/t1';
LOCATION
LOCATION
LOCATION
adl://
month=1
[localhost:21000] > create database db_on_adls location 'adl://impalademo.azuredatalakestore.net/dir1/dir2/dir3';
[localhost:21000] > use db_on_adls;
[localhost:21000] > create table partitioned_on_adls (x int) partitioned by (year int);
[localhost:21000] > alter table partitioned_on_adls add partition (year=2013);
[localhost:21000] > alter table partitioned_on_adls add partition (year=2014);
[localhost:21000] > alter table partitioned_on_adls add partition (year=2015);
[localhost:21000] > ! hadoop fs -ls adl://impalademo.azuredatalakestore.net/dir1/dir2/dir3 --recursive;
2015-03-17 13:56:34 0 dir1/dir2/dir3/
2015-03-17 16:43:28 0 dir1/dir2/dir3/partitioned_on_adls/
2015-03-17 16:43:49 0 dir1/dir2/dir3/partitioned_on_adls/year=2013/
2015-03-17 16:43:53 0 dir1/dir2/dir3/partitioned_on_adls/year=2014/
2015-03-17 16:43:58 0 dir1/dir2/dir3/partitioned_on_adls/year=2015/
[localhost:21000] > create table partitioned_multiple_keys (x int)
> partitioned by (year smallint, month tinyint, day tinyint);
[localhost:21000] > alter table partitioned_multiple_keys
> add partition (year=2015,month=1,day=1);
[localhost:21000] > alter table partitioned_multiple_keys
> add partition (year=2015,month=1,day=31);
[localhost:21000] > alter table partitioned_multiple_keys
> add partition (year=2015,month=2,day=28);
[localhost:21000] > ! hadoop fs -ls adl://impalademo.azuredatalakestore.net/dir1/dir2/dir3 --recursive;
2015-03-17 13:56:34 0 dir1/dir2/dir3/
2015-03-17 16:47:13 0 dir1/dir2/dir3/partitioned_multiple_keys/
2015-03-17 16:47:44 0 dir1/dir2/dir3/partitioned_multiple_keys/year=2015/month=1/day=1/
2015-03-17 16:47:50 0 dir1/dir2/dir3/partitioned_multiple_keys/year=2015/month=1/day=31/
2015-03-17 16:47:57 0 dir1/dir2/dir3/partitioned_multiple_keys/year=2015/month=2/day=28/
2015-03-17 16:43:28 0 dir1/dir2/dir3/partitioned_on_adls/
2015-03-17 16:43:49 0 dir1/dir2/dir3/partitioned_on_adls/year=2013/
2015-03-17 16:43:53 0 dir1/dir2/dir3/partitioned_on_adls/year=2014/
2015-03-17 16:43:58 0 dir1/dir2/dir3/partitioned_on_adls/year=2015/
CREATEDATABASE
CREATETABLE
CREATE
mkdir-p
ALTERTABLE
LOCATION
LOCATION
CREATETABLE
CREATEEXTERNALTABLE
CREATEEXTERNALTABLE
CREATETABLE
adl://
LOCATION
REFRESH
[localhost:21000] > create table usa_cities_adls like usa_cities location 'adl://impalademo.azuredatalakestore.net/usa_cities';
[localhost:21000] > desc usa_cities_adls;
+-------+----------+---------+
| name | type | comment |
+-------+----------+---------+
| id | smallint | |
| city | string | |
| state | string | |
+-------+----------+---------+
-- Now from a web browser, upload the same data file(s) to ADLS as in the HDFS table,
-- under the relevant store and path. If you already have the data in ADLS, you would
-- point the table LOCATION at an existing path.
[localhost:21000] > refresh usa_cities_adls;
[localhost:21000] > select count(*) from usa_cities_adls;
+----------+
| count(*) |
+----------+
| 289 |
+----------+
[localhost:21000] > select distinct state from sample_data_adls limit 5;
+----------------------+
| state |
+----------------------+
| Louisiana |
| Minnesota |
| Georgia |
| Alaska |
| Ohio |
+----------------------+
[localhost:21000] > desc formatted usa_cities_adls;
+------------------------------+----------------------------------------------------+---------+
| name | type | comment |
+------------------------------+----------------------------------------------------+---------+
| # col_name | data_type | comment |
| | NULL | NULL |
| id | smallint | NULL |
| city | string | NULL |
| state | string | NULL |
| | NULL | NULL |
| # Detailed Table Information | NULL | NULL |
| Database: | adls_testing | NULL |
| Owner: | jrussell | NULL |
| CreateTime: | Mon Mar 16 11:36:25 PDT 2017 | NULL |
| LastAccessTime: | UNKNOWN | NULL |
| Protect Mode: | None | NULL |
| Retention: | 0 | NULL |
| Location: | adl://impalademo.azuredatalakestore.net/usa_cities | NULL |
| Table Type: | MANAGED_TABLE | NULL |
...
+------------------------------+----------------------------------------------------+---------+
sample_data
impalademo
REFRESH
[localhost:21000] > create table sample_data_adls
> (id int, id bigint, val int, zerofill string,
> name string, assertion boolean, city string, state string)
> stored as parquet location 'adl://impalademo.azuredatalakestore.net/sample_data';
[localhost:21000] > select count(*) from sample_data_adls;
+----------+
| count(*) |
+----------+
| 1000000 |
+----------+
[localhost:21000] > select count(*) howmany, assertion from sample_data_adls group by assertion;
+---------+-----------+
| howmany | assertion |
+---------+-----------+
| 667149 | true |
| 332851 | false |
+---------+-----------+
LOCATION
ALTERTABLE...ADDPARTITION
REFRESH
INVALIDATEMETADATA
COMPUTESTATS
SHOWTABLESTATS
SHOWCOLUMNSTATS
SELECT
INSERT
CREATETABLEASSELECT
PARQUET_FILE_SIZE
COMPUTESTATS
BytesReadLocal
BytesReadShortCircuit
BytesReadDataNodeCached
BytesReadRemoteUnexpected
ALTERTABLE...SETCACHED