通常Hive用来做数据仓库,做离线数据分析,一般我们直接用JDBC直接去连接Hive,有时候我们可以通过SparkSQL去查询Hive效率会高一些,开启Kerberos认证之后,这里连接方式稍微有一些变动,网上也有很多资料,我这里还是给大家贴一下吧:
软件版本:
hive1.2.1
spark2.2.0
1.Java操作Hive,我先贴一下pom.xml文件:
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<log4j.version>1.2.17</log4j.version>
<slf4j.version>1.7.22</slf4j.version>
<hadoop.version>2.7.3</hadoop.version>
<hive.version>1.2.1</hive.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
复制
主函数代码:
package com.hadoop.ljs.hive121;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
/**
* @author: Created By lujisen
* @company ChinaUnicom Software JiNan
* @date: 2020-03-06 14:19
* @version: v1.0
* @description: com.hadoop.ljs
*/
public class HiveKerberosDemo {
private static String jdbcDriver = "org.apache.hive.jdbc.HiveDriver";
public static final String krb5Conf="D:\\kafkaSSL\\krb5.conf";
public static final String userTicket="lujs/unicom@CHINAUNICOM";
public static final String userKeytab="D:\\kafkaSSL\\lujs.unicom.keytab";
private static String hiveConnectURL ="jdbc:hive2://salver158.hadoop.unicom:10000/lujs;principal=hive/_HOST@CHINAUNICOM";
public static void main(String[] args) throws Exception {
Class.forName(jdbcDriver);
//登录Kerberos账号
System.setProperty("java.security.krb5.conf", krb5Conf);
Configuration conf = new Configuration();
conf.set("hadoop.security.authentication", "kerberos");
conf.set("hadoop.security.authorization", "true");
UserGroupInformation.setConfiguration(conf);
UserGroupInformation.loginUserFromKeytab(userTicket,userKeytab);
Connection connection = null;
ResultSet rs = null;
PreparedStatement ps = null;
try {
connection = DriverManager.getConnection(hiveConnectURL);
ps = connection.prepareStatement("select * from lujs.table1");
rs = ps.executeQuery();
while (rs.next()) {
System.out.println("id: "+rs.getString(1)+" name: "+rs.getString(2));
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
复制
2.SparkSQL连接Hive,这里也贴一下pom.xml文件:
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<log4j.version>1.2.17</log4j.version>
<slf4j.version>1.7.22</slf4j.version>
<spark.version>2.2.0</spark.version>
<hadoop.version>2.7.3</hadoop.version>
<hive.version>1.2.1</hive.version>
</properties>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
复制
主函数代码:
package com.hadoop.ljs.spark220.security;
import org.apache.hadoop.conf.Configuration;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;
import java.io.IOException;
import java.util.Properties;
import org.apache.hadoop.security.UserGroupInformation;
/**
* @author: Created By lujisen
* @company ChinaUnicom Software JiNan
* @date: 2020-03-06 15:03
* @version: v1.0
* @description: com.hadoop.ljs.spark220.security
*/
public class SparkKerberosHive {
public static final String krb5Conf="D:\\kafkaSSL\\krb5.conf";
public static final String userTicket="lujs/unicom@CHINAUNICOM";
public static final String userKeytab="D:\\kafkaSSL\\lujs.unicom.keytab";
/*注意这里后面的principal一般不变,只需要变动userTicket和userKeytab即可*/
private static String hiveConnectURL ="jdbc:hive2://salver158.hadoop.unicom:10000/lujs;principal=hive/_HOST@CHINAUNICOM";
public static void main(String[] args) throws IOException {
// 初始化配置文件
System.setProperty("java.security.krb5.conf", krb5Conf);
System.setProperty("java.security.krb5.conf", krb5Conf);
Configuration conf = new Configuration();
conf.set("hadoop.security.authentication", "kerberos");
conf.set("hadoop.security.authorization", "true");
UserGroupInformation.setConfiguration(conf);
UserGroupInformation.loginUserFromKeytab(userTicket,userKeytab);
SparkConf sparkConf = new SparkConf().setAppName("SparkKerberosHive")
.setMaster("local[*]")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
SparkSession sparkSession = SparkSession.builder().config(sparkConf).getOrCreate();
Properties props = new Properties();
props.setProperty("hadoop.security.authentication", "kerberos");
props.setProperty("hadoop.security.authorization", "true");
Dataset<Row> df = sparkSession
.read()
.jdbc(hiveConnectURL, "lujs.table1", props);
df.createOrReplaceTempView("table1");
SQLContext sqlCtx = new SQLContext(sparkSession);
df = sqlCtx.sql("select * from table1 limit 10");
df.show();
sparkSession.stop();
}
}
复制
如果觉得我的文章能帮到您,请关注微信公众号“大数据开发运维架构”,并转发朋友圈,谢谢支持!!!
文章转载自大数据开发运维架构,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。