Hive Query操作示例

记录Hive CLI中常见的Query操作示例, 备用!

1.SELECT … FROM Clauses
  SELECT name, salary FROM employees;
  SELECT e.name, e.salary FROM employees e;
  SELECT name, subordinates[0] FROM employees; //For array
  SELECT name, deductions["State Taxes"] FROM employees; //For map
  SELECT name, address.city FROM employees; //For struct
2.Specify Columns with Regular Expressions
  SELECT symbol, `price.*` FROM stocks;
 
  SELECT upper(name), salary, deductions["Federal Taxes"],
  round(salary * (1 - deductions["Federal Taxes"])) FROM employees;
 
  SELECT count(*), avg(salary) FROM employees;
  SELECT count(DISTINCT symbol) FROM stocks;
  SELECT count(DISTINCT ymd), count(DISTINCT volume) FROM stocks;
3.LIMIT Clause
  SELECT upper(name), salary, deductions["Federal Taxes"],
  round(salary * (1 - deductions["Federal Taxes"])) FROM employees LIMIT 2;
4.Column Aliases
  SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,
  round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes
  FROM employees LIMIT 2;
5.Nested SELECT Statements
  FROM (
  SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,
  round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes
  FROM employees
  ) e
  SELECT e.name, e.salary_minus_fed_taxes
  WHERE e.salary_minus_fed_taxes > 70000;
6.CASE … WHEN … THEN Statements
  SELECT name, salary,
    CASE
    WHEN salary < 50000.0 THEN 'low'
    WHEN salary >= 50000.0 AND salary < 70000.0 THEN 'middle'
    WHEN salary >= 70000.0 AND salary < 100000.0 THEN 'high'
    ELSE 'very high'
  END AS bracket FROM employees;
7.WHERE Clauses
  SELECT * FROM employees WHERE country = 'US' AND state = 'CA';
 
  SELECT name, salary, deductions["Federal Taxes"],
  salary * (1 - deductions["Federal Taxes"])
  FROM employees
  WHERE round(salary * (1 - deductions["Federal Taxes"])) > 70000;
 
  SELECT e.* FROM
  (SELECT name, salary, deductions["Federal Taxes"] as ded,
  salary * (1 - deductions["Federal Taxes"]) as salary_minus_fed_taxes
  FROM employees) e
  WHERE round(e.salary_minus_fed_taxes) > 70000;
 
  SELECT name, salary, deductions['Federal Taxes'] FROM employees
  WHERE deductions['Federal Taxes'] > cast(0.2 AS FLOAT);
8.LIKE and RLIKE
  SELECT name, address.street FROM employees WHERE address.street LIKE '%Ave.';
  SELECT name, address.city FROM employees WHERE address.city LIKE 'O%';
  SELECT name, address.street FROM employees WHERE address.street LIKE '%Chi%';
 
  SELECT name, address.street FROM employees
  WHERE address.street RLIKE '.*(Chicago|Ontario).*';
 
  SELECT name, address FROM employees
  WHERE address.street LIKE '%Chicago%' OR address.street LIKE '%Ontario%';
9.GROUP BY Clauses
  SELECT year(ymd), avg(price_close) FROM stocks
  WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'
  GROUP BY year(ymd);
10.HAVING Clauses
  SELECT year(ymd), avg(price_close) FROM stocks
  WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'
  GROUP BY year(ymd)
  HAVING avg(price_close) > 50.0;
 
  //下面的嵌套查询效果等价于上面的SQL
  SELECT s2.year, s2.avg FROM
  (SELECT year(ymd) AS year, avg(price_close) AS avg FROM stocks
  WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'
  GROUP BY year(ymd)) s2
  WHERE s2.avg > 50.0;
11.JOIN Statements
  (1) Inner Join
  SELECT a.ymd, a.price_close, b.price_close
  FROM stocks a JOIN stocks b ON a.ymd = b.ymd
  WHERE a.symbol = 'AAPL' AND b.symbol = 'IBM';
 
  SELECT s.ymd, s.symbol, s.price_close, d.dividend
  FROM stocks s JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
  WHERE s.symbol = 'AAPL';
 
  SELECT a.ymd, a.price_close, b.price_close , c.price_close
  FROM stocks a JOIN stocks b ON a.ymd = b.ymd
  JOIN stocks c ON a.ymd = c.ymd
  WHERE a.symbol = 'AAPL' AND b.symbol = 'IBM' AND c.symbol = 'GE';
  (2) LEFT OUTER JOIN
  SELECT s.ymd, s.symbol, s.price_close, d.dividend
  FROM stocks s LEFT OUTER JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
  WHERE s.symbol = 'AAPL';
  (3) RIGHT OUTER JOIN
  SELECT s.ymd, s.symbol, s.price_close, d.dividend
  FROM dividends d RIGHT OUTER JOIN stocks s ON d.ymd = s.ymd AND d.symbol = s.symbol
  WHERE s.symbol = 'AAPL';
  (4) FULL OUTER JOIN
  SELECT s.ymd, s.symbol, s.price_close, d.dividend
  FROM dividends d FULL OUTER JOIN stocks s ON d.ymd = s.ymd AND d.symbol = s.symbol
  WHERE s.symbol = 'AAPL';
  (5) LEFT SEMI-JOIN
  SELECT s.ymd, s.symbol, s.price_close
  FROM stocks s LEFT SEMI JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol;
  (6) Cartesian Product JOINs
  SELECTS * FROM stocks JOIN dividends;
12.ORDER BY and SORT BY
  SELECT s.ymd, s.symbol, s.price_close
  FROM stocks s
  ORDER BY s.ymd ASC, s.symbol DESC;
 
  SELECT s.ymd, s.symbol, s.price_close
  FROM stocks s
  SORT BY s.ymd ASC, s.symbol DESC;
13.DISTRIBUTE BY with SORT BY
  SELECT s.ymd, s.symbol, s.price_close
  FROM stocks s
  DISTRIBUTE BY s.symbol
  SORT BY s.symbol ASC, s.ymd ASC;
14.CLUSTER BY
  SELECT s.ymd, s.symbol, s.price_close
  FROM stocks s
  CLUSTER BY s.symbol;
15.UNION ALL
  SELECT log.ymd, log.level, log.message
  FROM (
    SELECT l1.ymd, l1.level,
    l1.message, 'Log1' AS source
    FROM log1 l1
    UNION ALL
    SELECT l2.ymd, l2.level,
    l2.message, 'Log2' AS source
    FROM log1 l2
  ) log
  SORT BY log.ymd ASC;

  FROM (
    FROM src SELECT src.key, src.value WHERE src.key < 100
    UNION ALL
    FROM src SELECT src.* WHERE src.key > 110
  ) unioninput
  INSERT OVERWRITE DIRECTORY '/tmp/union.out' SELECT unioninput.*

你可能感兴趣的:(Hadoop/YARN)