This article explains how to configure R to use with Hive.
Once a hadoop cluster is setup, including hive server and hive metastore, follow the steps below on the hive client to install R.
1. Get the yum repository to install R.
[root@hdm1 ~]# rpm -Uvh http://download.fedoraproject.org/pub/epel/6/i386/epel-release-6-8.noarch.rpm Retrieving http://download.fedoraproject.org/pub/epel/6/i386/epel-release-6-8.noarch.rpm warning: /var/tmp/rpm-tmp.BONZww: Header V3 RSA/SHA256 Signature, key ID 0608b895: NOKEY Preparing... ########################################### [100%] 1:epel-release ########################################### [100%]
2. Update the yum repository and install R.
[root@hdm1 ~]# Yum clean all If you face below error: Error: Cannot retrieve metalink for repository: epel. Please verify its path and try again Change the mirrorlist from epel.repo from https to http Ex: mirrorlist=http://mirrors.fedoraproject.org/metalink?repo=epel-6&arch=$basearch [root@hdm1 ~]# Yum install R
3. Once the "R" package is installed, make sure the java path is set correctly.
As root user: R CMD javareconf As non-root user:R CMD javareconf -e
4. Install rJava and rJDBC.
[root@hdm1 ~]# R
> install.packages("rJava")
> install.packages("RJDBC",dep=TRUE)
> q()
Save workspace image? [y/n/c]: y
5. Start the Hive server.
[root@hdm1 ~]# $HIVE_HOME/bin/hive --service hiveserver
6. Use R to use Hive.
> library("DBI")
> library("rJava")
> library("RJDBC")
> hive.class.path = list.files(path=c("/usr/lib/gphd/hive/lib"), pattern="jar", full.names=T);
> hadoop.lib.path = list.files(path=c("/usr/lib/gphd/hadoop/lib"), pattern="jar", full.names=T);
> hadoop.class.path = list.files(path=c("/usr/lib/gphd/hadoop"), pattern="jar", full.names=T);
> class.path = c(hive.class.path, hadoop.lib.path, hadoop.class.path);
> drv options(java.parameters = "-Xmx8g");
> hive.master="hdm1.phd.local:10000<http://hdm1.phd.local:10000>";
> url.dbc = paste0("jdbc:hive://", hive.master,"/default");
> conn = dbConnect(drv, url.dbc, "gpadmin", "changeme");
log4j:WARN No appenders could be found for logger (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
> dbListTables(conn);
[1] "hbase_table_3" "walmart_test" "page_view" "weblogs"
> q()
7. Use R to use Hive when kerberos is enabled.
> library("DBI")
> library("rJava")
> library("RJDBC")
> hive.class.path = list.files(path=c("/usr/lib/gphd/hive/lib"), pattern="jar", full.names=T);
> hadoop.lib.path = list.files(path=c("/usr/lib/gphd/hadoop/lib"), pattern="jar", full.names=T);
> hadoop.class.path = list.files(path=c("/usr/lib/gphd/hadoop"), pattern="jar", full.names=T);
> mapred.class.path = list.files(path=c("/usr/lib/gphd/hadoop-mapreduce", pattern="jar", full.names=T));
> cp = c(hive.class.path, hadoop.lib.path, hadoop.class.path, mapred.class.path, "/usr/lib/gphd/hadoop-mapreduce/hadoop-mapreduce-client-core.jar")
> .jinit(classpath=cp)
> drv url.dbc = paste0("jdbc:hive2://hdm1.phd.local:10002/default; > principal=hive/[email protected]");
> conn = dbConnect(drv, url.dbc, "hive", "hive");
log4j:WARN No appenders could be found for logger (org.apache.hadoop.util.Shell).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
> dbListTables(conn);
[1] "abctest"
> q()