记录Hive CLI中常见的Query操作示例, 备用!
1.SELECT … FROM Clauses
SELECT name, salary FROM employees;
SELECT e.name, e.salary FROM employees e;
SELECT name, subordinates[0] FROM employees; //For array
SELECT name, deductions["State Taxes"] FROM employees; //For map
SELECT name, address.city FROM employees; //For struct
2.Specify Columns with Regular Expressions
SELECT symbol, `price.*` FROM stocks;
SELECT upper(name), salary, deductions["Federal Taxes"],
round(salary * (1 - deductions["Federal Taxes"])) FROM employees;
SELECT count(*), avg(salary) FROM employees;
SELECT count(DISTINCT symbol) FROM stocks;
SELECT count(DISTINCT ymd), count(DISTINCT volume) FROM stocks;
3.LIMIT Clause
SELECT upper(name), salary, deductions["Federal Taxes"],
round(salary * (1 - deductions["Federal Taxes"])) FROM employees LIMIT 2;
4.Column Aliases
SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,
round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes
FROM employees LIMIT 2;
5.Nested SELECT Statements
FROM (
SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,
round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes
FROM employees
) e
SELECT e.name, e.salary_minus_fed_taxes
WHERE e.salary_minus_fed_taxes > 70000;
6.CASE … WHEN … THEN Statements
SELECT name, salary,
CASE
WHEN salary < 50000.0 THEN 'low'
WHEN salary >= 50000.0 AND salary < 70000.0 THEN 'middle'
WHEN salary >= 70000.0 AND salary < 100000.0 THEN 'high'
ELSE 'very high'
END AS bracket FROM employees;
7.WHERE Clauses
SELECT * FROM employees WHERE country = 'US' AND state = 'CA';
SELECT name, salary, deductions["Federal Taxes"],
salary * (1 - deductions["Federal Taxes"])
FROM employees
WHERE round(salary * (1 - deductions["Federal Taxes"])) > 70000;
SELECT e.* FROM
(SELECT name, salary, deductions["Federal Taxes"] as ded,
salary * (1 - deductions["Federal Taxes"]) as salary_minus_fed_taxes
FROM employees) e
WHERE round(e.salary_minus_fed_taxes) > 70000;
SELECT name, salary, deductions['Federal Taxes'] FROM employees
WHERE deductions['Federal Taxes'] > cast(0.2 AS FLOAT);
8.LIKE and RLIKE
SELECT name, address.street FROM employees WHERE address.street LIKE '%Ave.';
SELECT name, address.city FROM employees WHERE address.city LIKE 'O%';
SELECT name, address.street FROM employees WHERE address.street LIKE '%Chi%';
SELECT name, address.street FROM employees
WHERE address.street RLIKE '.*(Chicago|Ontario).*';
SELECT name, address FROM employees
WHERE address.street LIKE '%Chicago%' OR address.street LIKE '%Ontario%';
9.GROUP BY Clauses
SELECT year(ymd), avg(price_close) FROM stocks
WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'
GROUP BY year(ymd);
10.HAVING Clauses
SELECT year(ymd), avg(price_close) FROM stocks
WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'
GROUP BY year(ymd)
HAVING avg(price_close) > 50.0;
//下面的嵌套查询效果等价于上面的SQL
SELECT s2.year, s2.avg FROM
(SELECT year(ymd) AS year, avg(price_close) AS avg FROM stocks
WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'
GROUP BY year(ymd)) s2
WHERE s2.avg > 50.0;
11.JOIN Statements
(1) Inner Join
SELECT a.ymd, a.price_close, b.price_close
FROM stocks a JOIN stocks b ON a.ymd = b.ymd
WHERE a.symbol = 'AAPL' AND b.symbol = 'IBM';
SELECT s.ymd, s.symbol, s.price_close, d.dividend
FROM stocks s JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
WHERE s.symbol = 'AAPL';
SELECT a.ymd, a.price_close, b.price_close , c.price_close
FROM stocks a JOIN stocks b ON a.ymd = b.ymd
JOIN stocks c ON a.ymd = c.ymd
WHERE a.symbol = 'AAPL' AND b.symbol = 'IBM' AND c.symbol = 'GE';
(2) LEFT OUTER JOIN
SELECT s.ymd, s.symbol, s.price_close, d.dividend
FROM stocks s LEFT OUTER JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
WHERE s.symbol = 'AAPL';
(3) RIGHT OUTER JOIN
SELECT s.ymd, s.symbol, s.price_close, d.dividend
FROM dividends d RIGHT OUTER JOIN stocks s ON d.ymd = s.ymd AND d.symbol = s.symbol
WHERE s.symbol = 'AAPL';
(4) FULL OUTER JOIN
SELECT s.ymd, s.symbol, s.price_close, d.dividend
FROM dividends d FULL OUTER JOIN stocks s ON d.ymd = s.ymd AND d.symbol = s.symbol
WHERE s.symbol = 'AAPL';
(5) LEFT SEMI-JOIN
SELECT s.ymd, s.symbol, s.price_close
FROM stocks s LEFT SEMI JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol;
(6) Cartesian Product JOINs
SELECTS * FROM stocks JOIN dividends;
12.ORDER BY and SORT BY
SELECT s.ymd, s.symbol, s.price_close
FROM stocks s
ORDER BY s.ymd ASC, s.symbol DESC;
SELECT s.ymd, s.symbol, s.price_close
FROM stocks s
SORT BY s.ymd ASC, s.symbol DESC;
13.DISTRIBUTE BY with SORT BY
SELECT s.ymd, s.symbol, s.price_close
FROM stocks s
DISTRIBUTE BY s.symbol
SORT BY s.symbol ASC, s.ymd ASC;
14.CLUSTER BY
SELECT s.ymd, s.symbol, s.price_close
FROM stocks s
CLUSTER BY s.symbol;
15.UNION ALL
SELECT log.ymd, log.level, log.message
FROM (
SELECT l1.ymd, l1.level,
l1.message, 'Log1' AS source
FROM log1 l1
UNION ALL
SELECT l2.ymd, l2.level,
l2.message, 'Log2' AS source
FROM log1 l2
) log
SORT BY log.ymd ASC;
FROM (
FROM src SELECT src.key, src.value WHERE src.key < 100
UNION ALL
FROM src SELECT src.* WHERE src.key > 110
) unioninput
INSERT OVERWRITE DIRECTORY '/tmp/union.out' SELECT unioninput.*