CREATE VIEW valid_records AS SELECT * FROM records2 WHERE temperature !=9999;
1.4 创建外部表:
1 2 3 4 5 6 7 8
CREATE EXTERNAL TABLE page_view(viewTime INT, userid BIGINT, page_url STRING, referrer_url STRING, ip STRING COMMENT 'IP Address of the User', country STRING COMMENT 'country of origination') COMMENT 'This is the staging page view table' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054' STORED AS TEXTFILE LOCATION '<hdfs_location>';
1.5 建分区表:
1 2 3 4 5 6 7 8
CREATE TABLE par_table(viewTime INT, userid BIGINT, page_url STRING, referrer_url STRING, ip STRING COMMENT 'IP Address of the User') COMMENT 'This is the page view table' PARTITIONED BY(date STRING, pos STRING) ROW FORMAT DELIMITED fields terminated by '\t' lines terminated by '\n'; STORED AS SEQUENCEFILE;
ALTER TABLE table_name DROP partition_spec, partition_spec,...
4.4 增加表的元数据信息
ALTER TABLE table_name SET TBLPROPERTIES table_properties table_properties:
:[property_name = property_value…..]
4.5 改变表文件格式与组织
ALTER TABLE table_name SET FILEFORMAT file_format
ALTER TABLE table_name CLUSTERED BY(userid) SORTED BY(viewTime) INTO num_buckets BUCKETS
5. 删除操作
5.1 删除表:
DROP TABLE records;
5.2 删除表中数据,但要保持表的结构定义
dfs -rmr /user/hive/warehouse/records;
5.3 删除视图
DROP VIEW view_name
6. 连接操作
6.1 内连接:
1
SELECT sales.*, things.* FROM sales JOIN things ON (sales.id = things.id);
6.2 外连接:
1 2 3
SELECT sales.*, things.* FROM sales LEFT OUTER JOIN things ON (sales.id = things.id); SELECT sales.*, things.* FROM sales RIGHT OUTER JOIN things ON (sales.id = things.id); SELECT sales.*, things.* FROM sales FULL OUTER JOIN things ON (sales.id = things.id);
6.3 in查询:Hive不支持,但可以使用LEFT SEMI JOIN
SELECT * FROM things LEFT SEMI JOIN sales ON (sales.id = things.id);
6.4 Map连接:Hive可以把较小的表放入每个Mapper的内存来执行连接操作
1
SELECT /*+ MAPJOIN(things) */ sales.*, things.* FROM sales JOIN things ON (sales.id = things.id);
INSERT OVERWRITE TABLE ..SELECT:新表预先存在
1 2 3 4
FROM records2 INSERT OVERWRITE TABLE stations_by_year SELECT year, COUNT(DISTINCT station) GROUP BY year INSERT OVERWRITE TABLE records_by_year SELECT year, COUNT(1) GROUP BY year INSERT OVERWRITE TABLE good_records_by_year SELECT year, COUNT(1) WHERE temperature != 9999 AND (quality = 0 OR quality = 1 OR quality = 4 OR quality = 5 OR quality = 9) GROUP BY year;
CREATE TABLE … AS SELECT:新表表预先不存在
1
CREATE TABLE target AS SELECT col1,col2 FROM source;