Commit b5db54c4 authored by demon's avatar demon
Browse files

Add a port for hadoop version 2.

Not perfect but reasonable starting point.
parent e145ee00
......@@ -641,6 +641,7 @@
SUBDIR += hachoir-parser
SUBDIR += hachoir-regex
SUBDIR += hadoop
SUBDIR += hadoop2
SUBDIR += hapy
SUBDIR += hcs12mem
SUBDIR += hexcompare
......
# Created by: Dmitry Sivachenko <demon@FreeBSD.org>
# $FreeBSD$
PORTNAME= hadoop
PORTVERSION= 2.4.1
CATEGORIES= devel java
MASTER_SITES= ${MASTER_SITE_APACHE}
MASTER_SITE_SUBDIR=${PORTNAME}/common/stable
PKGNAMEPREFIX= apache-
PKGNAMESUFFIX= 2
DISTNAME= ${PORTNAME}-${PORTVERSION}-src
DIST_SUBDIR= hadoop
MAINTAINER= demon@FreeBSD.org
COMMENT= Apache Map/Reduce framework
LICENSE= APACHE20
BUILD_DEPENDS= mvn:${PORTSDIR}/devel/maven3 \
cmake:${PORTSDIR}/devel/cmake \
protoc:${PORTSDIR}/devel/protobuf
RUN_DEPENDS= bash:${PORTSDIR}/shells/bash
CONFLICTS_INSTALL= apache-hadoop-1*
USES= shebangfix
USE_JAVA= yes
JAVA_VERSION= 1.7+
USE_LDCONFIG= yes
SHEBANG_FILES= hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/sbin/httpfs.sh hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/libexec/httpfs-config.sh
MAKE_ENV+= HADOOP_PROTOC_PATH=${LOCALBASE}/bin/protoc
HADOOP_DIST= ${WRKSRC}/hadoop-dist/target/hadoop-${PORTVERSION}
HADOOP_LOGDIR= /var/log/hadoop
HADOOP_RUNDIR= /var/run/hadoop
HDFS_USER= hdfs
MAPRED_USER= mapred
HADOOP_GROUP= hadoop
USERS= ${HDFS_USER} ${MAPRED_USER}
GROUPS= ${HADOOP_GROUP}
SUB_FILES= hadoop-layout.sh
USE_RC_SUBR= resourcemanager nodemanager datanode namenode secondarynamenode
PLIST_SUB= PORTVERSION="${PORTVERSION}" \
HADOOP_LOGDIR="${HADOOP_LOGDIR}" \
HADOOP_RUNDIR="${HADOOP_RUNDIR}" \
HDFS_USER="${HDFS_USER}" \
MAPRED_USER="${MAPRED_USER}" \
HADOOP_GROUP="${HADOOP_GROUP}"
SUB_LIST= HDFS_USER="${HDFS_USER}" \
MAPRED_USER="${MAPRED_USER}" \
HADOOP_GROUP="${HADOOP_GROUP}" \
JAVA_HOME="${JAVA_HOME}" \
HADOOP_LOGDIR="${HADOOP_LOGDIR}" \
HADOOP_RUNDIR="${HADOOP_RUNDIR}"
post-patch:
${MKDIR} ${WRKDIR}/m2
${CP} ${FILESDIR}/settings.xml ${WRKDIR}
${REINPLACE_CMD} -e "s|WORK|${WRKDIR}|" ${WRKDIR}/settings.xml
do-build:
cd ${WRKSRC} && ${LOCALBASE}/bin/mvn -gs "${WRKDIR}/settings.xml" clean package -Pdist,native -DskipTests
post-build:
${RM} ${HADOOP_DIST}/etc/hadoop/*.cmd
do-install:
cd ${HADOOP_DIST}/bin && ${INSTALL_SCRIPT} hadoop hdfs mapred rcc yarn ${STAGEDIR}${PREFIX}/bin/
cd ${HADOOP_DIST} && ${COPYTREE_BIN} "libexec sbin" ${STAGEDIR}${PREFIX}/ "! -name *.cmd"
cd ${HADOOP_DIST}/include && ${INSTALL_DATA} *h ${STAGEDIR}${PREFIX}/include/
cd ${HADOOP_DIST}/lib/native && ${INSTALL_DATA} *.a ${STAGEDIR}${PREFIX}/lib/
cd ${HADOOP_DIST}/lib/native && ${INSTALL_DATA} libhadoop.so.1.0.0 ${STAGEDIR}${PREFIX}/lib/libhadoop.so.1
cd ${HADOOP_DIST}/lib/native && ${INSTALL_DATA} libhdfs.so.0.0.0 ${STAGEDIR}${PREFIX}/lib/libhdfs.so.0
${LN} -sf libhdfs.so.0 ${STAGEDIR}${PREFIX}/lib/libhdfs.so
${LN} -sf libhadoop.so.1 ${STAGEDIR}${PREFIX}/lib/libhadoop.so
cd ${HADOOP_DIST}/share/hadoop && ${COPYTREE_SHARE} "*" ${STAGEDIR}${DATADIR}/ "! -name *-sources.jar -and ! -name sources"
${MKDIR} ${STAGEDIR}${EXAMPLESDIR}/conf
cd ${HADOOP_DIST}/etc/hadoop && ${COPYTREE_SHARE} "*" ${STAGEDIR}${EXAMPLESDIR}/conf/
${INSTALL_DATA} ${WRKSRC}/hadoop-hdfs-project/hadoop-hdfs/target/classes/hdfs-default.xml ${WRKSRC}/hadoop-hdfs-project/hadoop-hdfs-httpfs/target/classes/httpfs-default.xml ${WRKSRC}/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/target/classes/yarn-default.xml ${WRKSRC}/hadoop-common-project/hadoop-common/target/classes/core-default.xml ${WRKSRC}/hadoop-tools/hadoop-distcp/target/classes/distcp-default.xml ${WRKSRC}/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/target/classes/mapred-default.xml ${STAGEDIR}/${EXAMPLESDIR}/
${INSTALL_DATA} ${WRKDIR}/hadoop-layout.sh ${STAGEDIR}${PREFIX}/libexec/
${MKDIR} ${STAGEDIR}${ETCDIR}
${MKDIR} ${STAGEDIR}${HADOOP_LOGDIR}
${MKDIR} ${STAGEDIR}${HADOOP_RUNDIR}
.include <bsd.port.mk>
SHA256 (hadoop/hadoop-2.4.1-src.tar.gz) = 09f897738e9d34bceb7e7d4494bbc75e363fb32993f56dc5a6de8aab3419b990
SIZE (hadoop/hadoop-2.4.1-src.tar.gz) = 15417097
#!/bin/sh
#
# $FreeBSD$
#
# PROVIDE: datanode
# REQUIRE: LOGIN
# KEYWORD: shutdown
#
# datanode_enable (bool): Set to NO by default.
# Set it to YES to enable datanode.
. /etc/rc.subr
name=datanode
rcvar=datanode_enable
load_rc_config "${name}"
: ${datanode_enable:=NO}
: ${datanode_user:=%%HDFS_USER%%}
command="%%PREFIX%%/sbin/hadoop-daemon.sh"
command_args='--config %%ETCDIR%% start datanode'
stop_cmd=datanode_stop
datanode_stop () {
su -m ${datanode_user} -c "${command} --config ${HADOOP_CONF_DIR} stop datanode"
}
run_rc_command "$1"
export JAVA_HOME=${JAVA_HOME:-%%JAVA_HOME%%}
export HADOOP_PREFIX=%%PREFIX%%
export HADOOP_CONF_DIR=%%ETCDIR%%
export HADOOP_LOG_DIR=%%HADOOP_LOGDIR%%
export HADOOP_PID_DIR=%%HADOOP_RUNDIR%%
export YARN_LOG_DIR=%%HADOOP_LOGDIR%%
export YARN_PID_DIR=%%HADOOP_RUNDIR%%
#!/bin/sh
#
# $FreeBSD$
#
# PROVIDE: namenode
# REQUIRE: LOGIN
# KEYWORD: shutdown
#
# namenode_enable (bool): Set to NO by default.
# Set it to YES to enable namenode.
. /etc/rc.subr
name=namenode
rcvar=namenode_enable
load_rc_config "${name}"
: ${namenode_enable:=NO}
: ${namenode_user:=%%HDFS_USER%%}
command="%%PREFIX%%/sbin/hadoop-daemon.sh"
command_args='--config %%ETCDIR%% start namenode'
stop_cmd=namenode_stop
namenode_stop () {
su -m ${namenode_user} -c "${command} --config ${HADOOP_CONF_DIR} stop namenode"
}
run_rc_command "$1"
#!/bin/sh
#
# $FreeBSD$
#
# PROVIDE: nodemanager
# REQUIRE: LOGIN
# KEYWORD: shutdown
#
# nodemanager_enable (bool): Set to NO by default.
# Set it to YES to enable resourcemanager.
. /etc/rc.subr
name=nodemanager
rcvar=nodemanager_enable
load_rc_config "${name}"
: ${nodemanager_enable:=NO}
: ${nodemanager_user:=%%MAPRED_USER%%}
command="%%PREFIX%%/sbin/yarn-daemon.sh"
command_args='--config %%ETCDIR%% start nodemanager'
stop_cmd=nodemanager_stop
nodemanager_stop () {
su -m ${nodemanager_user} -c "${command} --config ${HADOOP_CONF_DIR} stop nodemanager"
}
run_rc_command "$1"
--- hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh.orig 2014-06-21 09:40:05.000000000 +0400
+++ hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh 2014-07-03 21:37:24.000000000 +0400
@@ -97,11 +97,6 @@
export HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"
fi
-if [ ! -w "$HADOOP_LOG_DIR" ] ; then
- mkdir -p "$HADOOP_LOG_DIR"
- chown $HADOOP_IDENT_STRING $HADOOP_LOG_DIR
-fi
-
if [ "$HADOOP_PID_DIR" = "" ]; then
HADOOP_PID_DIR=/tmp
fi
--- hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/SharedFileDescriptorFactory.java.orig 2014-06-21 09:40:10.000000000 +0400
+++ hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/SharedFileDescriptorFactory.java 2014-07-04 13:49:25.000000000 +0400
@@ -53,7 +53,7 @@
if (!NativeIO.isAvailable()) {
return "NativeIO is not available.";
}
- if (!SystemUtils.IS_OS_UNIX) {
+ if (false) {
return "The OS is not UNIX.";
}
return null;
--- hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_user_info.c.bak 2014-06-21 09:40:12.000000000 +0400
+++ hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_user_info.c 2014-07-04 10:53:05.000000000 +0400
@@ -36,7 +36,7 @@
struct hadoop_user_info *hadoop_user_info_alloc(void)
{
struct hadoop_user_info *uinfo;
- size_t buf_sz;
+ long buf_sz;
char *buf;
uinfo = calloc(1, sizeof(struct hadoop_user_info));
@@ -193,7 +193,7 @@
ngroups = uinfo->gids_size;
ret = getgrouplist(uinfo->pwd.pw_name, uinfo->pwd.pw_gid,
uinfo->gids, &ngroups);
- if (ret > 0) {
+ if (ret > 0 /* Linux */ || ret == 0 /* FreeBSD */) {
uinfo->num_gids = ngroups;
ret = put_primary_gid_first(uinfo);
if (ret) {
--- hadoop-yarn-project/hadoop-yarn/bin/yarn-daemon.sh.orig 2014-06-21 09:41:15.000000000 +0400
+++ hadoop-yarn-project/hadoop-yarn/bin/yarn-daemon.sh 2014-07-04 15:48:29.000000000 +0400
@@ -79,10 +79,6 @@
export YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
fi
-if [ ! -w "$YARN_LOG_DIR" ] ; then
- mkdir -p "$YARN_LOG_DIR"
- chown $YARN_IDENT_STRING $YARN_LOG_DIR
-fi
if [ "$YARN_PID_DIR" = "" ]; then
YARN_PID_DIR=/tmp
@@ -104,8 +100,6 @@
(start)
- [ -w "$YARN_PID_DIR" ] || mkdir -p "$YARN_PID_DIR"
-
if [ -f $pid ]; then
if kill -0 `cat $pid` > /dev/null 2>&1; then
echo $command running as process `cat $pid`. Stop it first.
# Patch from https://issues.apache.org/jira/browse/YARN-1327
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
@@ -19,6 +19,7 @@
#include "configuration.h"
#include "container-executor.h"
+#include <libgen.h>
#include <dirent.h>
#include <fcntl.h>
#include <fts.h>
@@ -496,9 +497,10 @@ static struct passwd* get_user_info(const char* user) {
int is_whitelisted(const char *user) {
char **whitelist = get_values(ALLOWED_SYSTEM_USERS_KEY);
char **users = whitelist;
+ int logname_size = sysconf(_SC_LOGIN_NAME_MAX);
if (whitelist != NULL) {
for(; *users; ++users) {
- if (strncmp(*users, user, LOGIN_NAME_MAX) == 0) {
+ if (strncmp(*users, user, logname_size) == 0) {
free_values(whitelist);
return 1;
}
@@ -1236,6 +1238,10 @@ void chown_dir_contents(const char *dir_path, uid_t uid, gid_t gid) {
* hierarchy: the top directory of the hierarchy for the NM
*/
int mount_cgroup(const char *pair, const char *hierarchy) {
+#ifndef __linux
+ fprintf(LOGFILE, "cgroups are supported only on Linux OS\n");
+ return -1;
+#else
char *controller = malloc(strlen(pair));
char *mount_path = malloc(strlen(pair));
char hier_path[PATH_MAX];
@@ -1270,7 +1276,7 @@ int mount_cgroup(const char *pair, const char *hierarchy) {
free(controller);
free(mount_path);
-
return result;
+#endif
}
# Patch from https://issues.apache.org/jira/browse/YARN-1327
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c
@@ -476,7 +476,7 @@ void test_signal_container_group() {
printf("FAIL: fork failed\n");
exit(1);
} else if (child == 0) {
- setpgrp();
+ setpgid(0,0);
if (change_user(user_detail->pw_uid, user_detail->pw_gid) != 0) {
exit(1);
}
#!/bin/sh
#
# $FreeBSD$
#
# PROVIDE: resourcemanager
# REQUIRE: LOGIN
# KEYWORD: shutdown
#
# resourcemanager_enable (bool): Set to NO by default.
# Set it to YES to enable resourcemanager.
. /etc/rc.subr
name=resourcemanager
rcvar=resourcemanager_enable
load_rc_config "${name}"
: ${resourcemanager_enable:=NO}
: ${resourcemanager_user:=%%MAPRED_USER%%}
command="%%PREFIX%%/sbin/yarn-daemon.sh"
command_args='--config %%ETCDIR%% start resourcemanager'
stop_cmd=resourcemanager_stop
resourcemanager_stop () {
su -m ${resourcemanager_user} -c "${command} --config ${HADOOP_CONF_DIR} stop resourcemanager"
}
run_rc_command "$1"
#!/bin/sh
#
# $FreeBSD$
#
# PROVIDE: secondarynamenode
# REQUIRE: LOGIN
# KEYWORD: shutdown
#
# secondarynamenode_enable (bool): Set to NO by default.
# Set it to YES to enable secondarynamenode.
. /etc/rc.subr
name=secondarynamenode
rcvar=secondarynamenode_enable
load_rc_config "${name}"
: ${secondarynamenode_enable:=NO}
: ${secondarynamenode_user:=%%HDFS_USER%%}
command="%%PREFIX%%/sbin/hadoop-daemon.sh"
command_args='--config %%ETCDIR%% start secondarynamenode'
stop_cmd=secondarynamenode_stop
secondarynamenode_stop () {
su -m ${secondarynamenode_user} -c "${command} --config ${HADOOP_CONF_DIR} stop secondarynamenode"
}
run_rc_command "$1"
<settings>
<localRepository>WORK/m2</localRepository>
</settings>
The Apache Hadoop software library is a framework that allows for the
distributed processing of large data sets across clusters of computers
using a simple programming model.
WWW: http://hadoop.apache.org/
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment