Create DB
CREATE DATABASE < db -name> ;
Select an DB
If not select default DB is used
USE < db -name> ;
Show DB and Tables
SHOW DATABASES;
SHOW TABLES;
Create Managed Table
CREATE TABLE student (
regno INT ,
name STRING,
cgpa INT
);
# Create an table for loading data from file
CREATE TABLE patient (
pid INT ,
pname STRING,
drug STRING,
gender string,
tot_amt INT
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
Create External Table
# No directory is created in the tables name . The files is given numeric names '00000_0'
CREATE EXTERNAL TABLE patext(
pid int ,
pname string,
drug string,
gender string,
amt int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE
LOCATION '/xyz/hiveext/' ;
# A Directory in the tables name is created at '/user/hive/warehouse/xyz.db/'
CREATE EXTERNAL TABLE patext1(
pid int ,
pname string,
drug string,
gender string,
amt int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
# Create table use data on Local FS. Note we need to specify an directory not an file
CREATE EXTERNAL TABLE employee(
empno int ,
pname string,
sal int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE
LOCATION 'file://home/ak/datasets/Test/' ;
Insert Data
Insert Command
INSERT INTO TABLE student VALUES ( 1 , 'Arjun' , 9 );
INSERT INTO TABLE < db -name>
SELECT * FROM <table-name> ;
# Overwrites existing data in the table
INSERT OVERWRITE TABLE patient1
SELECT * FROM patient
WHERE drug = 'Para' ;
Load Data from Local FS
LOAD DATA LOCAL INPATH '<local-file>' INTO TABLE <table-name> ;
Local Data from HDFS
LOAD DATA INPATH '<hdfs-file>' INTO TABLE <table-name> ;
When loading data from HDFS the file is moved from the current location to the Hive DB location
Hence this approach is not recommended
Better alternative to use external tables
Sqoop Command
Used for bulk loading data from RDB (Refer hive & Sqoop for command)
HDFS Shell
hadoop fs -put '/hive-table-path'
Describe Table
DESC <table-name> ;
DESC EXTENDED <table-name> ;
DESC FORMATTED <table-name> ;
Delete Table
DROP TABLE <table-name> ;
Alter Table
ALTER TABLE <table-name> RENAME TO < new -table-name> ;
ALTER TABLE <table-name> ADD COLUMNS (new - field datatype);
ALTER TABLE <table-name> CHANGE < column -name> < new - column -name> < datatype > ;
ALTER TABLE <table-name> SET TBLPROPERTIES( 'EXTERNAL' = 'TRUE' ); # Change managed table to external table
ALTER TABLE <table-name> SET LOCATION "hdfs:///tmp/newloc" ;
When we change the location of Hive table the data remains in the original location it has to be changed manually
Load Parquet File Data
Data can be saved in formats like parquet, ORC using the INSERT INTO/ INSERT OVERWRITE
command
create table parquet_file (
pid int ,
pname string,
drug string,
gender string,
amt int
)
row format delimited
fields terminated by ','
lines terminated by '\n'
stored as parquetfile;
insert overwrite table parquet_file
select * from patient;
Hive Scripts
hive -f filename.q
Change Hive Execution Engine
set . hive . execution .engine = spark