集群节点规划
hadoop01:192.168.58.200
hadoop02:192.168.58.201
hadoop03:192.168.58.202
zookeeper:hadoop01,hadoop02,hadoop03
namenode:hadoop01,hadoop02
datanode:hadoop01,hadoop02,hadoop03
resoucemanager:hadoop01
nodemanager:hadoop01,hadoop02,hadoop03
安装zookeeper
略
安装Java下载jdk安装包
http://www.oracle.com/technetwork/java/javase/downloads/index.html
安装
>mkdir /home/hadoop/sdk/java/
>cd /home/hadoop/sdk/java/
>tar -xzvf jdk1.8.tar.gz
>su root
#创建软连接
>ln -s /home/hadoop/sdk/java/ /usr/local/java
#修改权限
>chown -R hadoop:hadoop /usr/local/java
>vim /etc/profile
export JAVA_HOME=/usr/local/java
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib
export PATH=$JAVA_HOME/bin:$PATH
>source /etc/profile
>su hadoop
>java -version
配置免密码登录
#进入hadoop01
>cd ~/.ssh
>ssh-keygen -t rsa
#一路回车,生成id_rsa, id_rsa.pub
#将id_rsa.pub复制到hadoop02,hadoop03
>ssh-copy-id -i hadoop02
>ssh-copy-id -i hadoop03
#进入hadoop02,hadoop03重复上面操作,并分别向另外两台机器复制公钥
安装hadoop下载安装包
http://hadoop.apache.org/#Download+Hadoop
安装
>mkdir -p /home/hadoop/applications/hadoop
>cd /home/hadoop/applications/hadoop
>tar -xzvf hadoop2.7.5.tar.gz
#创建软连接
>su root
>ln -s /home/hadoop/applications/hadoop/hadoop2.7.5 /usr/local/hadoop
>chown -R hadoop:hadoop /usr/local/hadoop
#添加hadoop环境变量
>vim /etc/profile
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_HOME=$HADOOP_HOME
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
>source /etc/profile
#配置HDFS
>su hadoop
>cd /usr/local/hadoop/etc/hadoop
#hadoop-env.sh
>vim hadoop-env.sh
export JAVA_HOME=/usr/local/java
#core-site.xml
<configuration>
<!-- 指定hdfs的nameservice名称空间为ns -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns</value>
</property>
<!-- 指定hadoop临时目录,默认在/tmp/{$user}目录下,不安全,每次开机都会被清空-->
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/hpdata/</value>
<description>需要手动创建hdpdata目录</description>
</property>
<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop01:2181,hadoop02:2181,hadoop03:2181</value>
<description>zookeeper地址,多个用逗号隔开</description>
</property></configuration>
#hdfs-site.xml<configuration>
<!-- NameNode HA配置 -->
<property>
<name>dfs.nameservices</name>
<value>ns</value>
<description>指定hdfs的nameservice为ns,需要和core-site.xml中的保持一致</description>
</property>
<property>
<name>dfs.ha.namenodes.ns</name>
<value>nn1,nn2</value>
<description>ns命名空间下有两个NameNode,逻辑代号,随便起名字,分别是nn1,nn2</description>
</property>
<property>
<name>dfs.namenode.rpc-address.ns.nn1</name>
<value>hadoop01:9000</value>
<description>nn1的RPC通信地址</description>
</property>
<property>
<name>dfs.namenode.http-address.ns.nn1</name>
<value>nhadoop01:50070</value>
<description>nn1的http通信地址</description>
</property>
<property>
<name>dfs.namenode.rpc-address.ns.nn2</name>
<value>hadoop02:9000</value>
<description>nn2的RPC通信地址</description>
</property>
<property>
<name>dfs.namenode.http-address.ns.nn2</name>
<value>hadoop02:50070</value>
<description>nn2的http通信地址</description>
</property>
<!--JournalNode配置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop01:8485;hadoop02:8485;hadoop03:8485/ns</value>
<description>指定NameNode的edits元数据在JournalNode上的存放位置</description>
</property> <property>
<name>dfs.journalnode.edits.dir</name>
<value>/usr/local/hadoop/journaldata</value>
<description>指定JournalNode在本地磁盘存放数据的位置</description>
</property>
<!--namenode高可用主备切换配置 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
<description>开启NameNode失败自动切换</description>
</property>
<property>
<name>dfs.client.failover.proxy.provider.ns</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
<description>配置失败自动切换实现方式,使用内置的zkfc</description>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
<description>配置隔离机制,多个机制用换行分割,先执行sshfence,执行失败后执行shell(/bin/true),/bin/true会直接返回0表示成功</description>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
<description>使用sshfence隔离机制时需要ssh免登陆</description>
</property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
<description>配置sshfence隔离机制超时时间</description>
</property>
<!--dfs文件属性设置-->
<property>
<name>dfs.replication</name>
<value>3</value>
<description>设置block副本数为3</description>
</property>
<property>
<name>dfs.block.size</name>
<value>134217728</value>
<description>设置block大小是128M</description>
</property></configuration>
#yarn-site.xml<configuration>
<!-- 开启RM高可用 -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- 指定RM的cluster id,一组高可用的rm共同的逻辑id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yarn-ha</value>
</property>
<!-- 指定RM的名字,可以随便自定义 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 分别指定RM的地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop01</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name> <value>${yarn.resourcemanager.hostname.rm1}:8088</value>
<description>HTTP访问的端口号</description>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop02</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>${yarn.resourcemanager.hostname.rm2}:8088</value>
</property>
<!-- 指定zookeeper集群地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>hadoop01:2181,hadoop02:2181,hadoop03:2181</value>
</property>
<!--NodeManager上运行的附属服务,需配置成mapreduce_shuffle,才可运行MapReduce程序-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 开启日志聚合 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 日志聚合HDFS目录 -->
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/usr/local/hadoop/yarn-logs</value>
</property>
<!-- 日志保存时间3days,单位秒 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>259200</value>
</property>
</configuration>
#mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>指定mr框架为yarn方式
</description>
</property>
<!-- 历史日志服务jobhistory相关配置 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop03:10020</value>
<description>历史服务器端口号</description>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop03:19888</value>
<description>历史服务器的WEB UI端口号</description>
</property>
<property>
<name>mapreduce.jobhistory.joblist.cache.size</name>
<value>2000</value>
<description>内存中缓存的historyfile文件信息(主要是job对应的文件目录)</description>
</property>
</configuration>
#在/usr/local/hadoop下创建目录
>mkdir hdpdata >mkdir yarn-logs #修改slave
>vim slaves
hadoop01
hadoop02
hadoop03
#将hadoop安装包拷贝到另外两台机器上
>scop -r /usr/local/hadoop/hadoop2.7.5 hadoop@hadoop02:/usr/local/hadoop/
>scop -r /usr/local/hadoop/hadoop2.7.5 hadoop@hadoop02:/usr/local/hadoop/
#在第二个和第三个节点上创建软连接并配置hadoop画家变量
#格式化hdfs
>hdfs namenode -format
#格式化zk
>hdfs zkfc -formatZK
#将hadoop01中格式化生成到hadoop_home/hpdata下到文件复制到hadoop02上
#启动hadoop
>start-all.sh
登录 | 立即注册