This article explains how to configure R to use with Hive.
Once a hadoop cluster is setup, including hive server and hive metastore, follow the steps below on the hive client to install R.
1. Get the yum repository to install R.
[root@hdm1 ~]# rpm -Uvh http://download.fedoraproject.org/pub/epel/6/i386/epel-release-6-8.noarch.rpm Retrieving http://download.fedoraproject.org/pub/epel/6/i386/epel-release-6-8.noarch.rpm warning: /var/tmp/rpm-tmp.BONZww: Header V3 RSA/SHA256 Signature, key ID 0608b895: NOKEY Preparing... ########################################### [100%] 1:epel-release ########################################### [100%]
2. Update the yum repository and install R.
[root@hdm1 ~]# Yum clean all If you face below error: Error: Cannot retrieve metalink for repository: epel. Please verify its path and try again Change the mirrorlist from epel.repo from https to http Ex: mirrorlist=http://mirrors.fedoraproject.org/metalink?repo=epel-6&arch=$basearch [root@hdm1 ~]# Yum install R
3. Once the "R" package is installed, make sure the java path is set correctly.
As root user: R CMD javareconf As non-root user:R CMD javareconf -e
4. Install rJava
and rJDBC
.
[root@hdm1 ~]# R > install.packages("rJava") > install.packages("RJDBC",dep=TRUE) > q() Save workspace image? [y/n/c]: y
5. Start the Hive server.
[root@hdm1 ~]# $HIVE_HOME/bin/hive --service hiveserver
6. Use R to use Hive.
> library("DBI") > library("rJava") > library("RJDBC") > hive.class.path = list.files(path=c("/usr/lib/gphd/hive/lib"), pattern="jar", full.names=T); > hadoop.lib.path = list.files(path=c("/usr/lib/gphd/hadoop/lib"), pattern="jar", full.names=T); > hadoop.class.path = list.files(path=c("/usr/lib/gphd/hadoop"), pattern="jar", full.names=T); > class.path = c(hive.class.path, hadoop.lib.path, hadoop.class.path); > drv options(java.parameters = "-Xmx8g"); > hive.master="hdm1.phd.local:10000<http://hdm1.phd.local:10000>"; > url.dbc = paste0("jdbc:hive://", hive.master,"/default"); > conn = dbConnect(drv, url.dbc, "gpadmin", "changeme"); log4j:WARN No appenders could be found for logger (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe). log4j:WARN Please initialize the log4j system properly. log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info. > dbListTables(conn); [1] "hbase_table_3" "walmart_test" "page_view" "weblogs" > q()
7. Use R to use Hive when kerberos is enabled.
> library("DBI") > library("rJava") > library("RJDBC") > hive.class.path = list.files(path=c("/usr/lib/gphd/hive/lib"), pattern="jar", full.names=T); > hadoop.lib.path = list.files(path=c("/usr/lib/gphd/hadoop/lib"), pattern="jar", full.names=T); > hadoop.class.path = list.files(path=c("/usr/lib/gphd/hadoop"), pattern="jar", full.names=T); > mapred.class.path = list.files(path=c("/usr/lib/gphd/hadoop-mapreduce", pattern="jar", full.names=T)); > cp = c(hive.class.path, hadoop.lib.path, hadoop.class.path, mapred.class.path, "/usr/lib/gphd/hadoop-mapreduce/hadoop-mapreduce-client-core.jar") > .jinit(classpath=cp) > drv url.dbc = paste0("jdbc:hive2://hdm1.phd.local:10002/default; > principal=hive/[email protected]"); > conn = dbConnect(drv, url.dbc, "hive", "hive"); log4j:WARN No appenders could be found for logger (org.apache.hadoop.util.Shell). log4j:WARN Please initialize the log4j system properly. log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info. > dbListTables(conn); [1] "abctest" > q()