ambari安装hdp3.1.0
#https://docs.hortonworks.com/HDPDocuments/Ambari-2.7.3.0/bk_ambari-installation/content/prepare_the_environment.html
#https://www.cnblogs.com/wangdaxianer/p/10524976.html
# https://blog.csdn.net/sinat_28224453/article/details/51538449
# https://blog.csdn.net/weixin_40809627/article/details/82707266
# https://www.linuxidc.com/Linux/2016-12/138142.htm
# 注意,不建议使用maven编译官方http://ambari.apache.org的ambari源码了(个人发现,编译过程中的某些资源地址不可用了,改成新的地址,仍存在资源包匹配失败)
#产品支持列表
https://supportmatrix.hortonworks.com
Ambari 2.7.3只支持HDP-3.1.0和HDF-3.2.0
#角色
192.168.1.100 ambariserver ansible管理端
192.168.1.101 namenode01
192.168.1.102 namenode02
192.168.1.103 datanode01
192.168.1.104 datanode02
#环境准备(ambariserver上执行)
#创建虚拟机(宿主机上执行,记得多给点硬件资源,尤其内存,唉我的本地PC跑着吃力死了)
使用vagrant软件创建virtualbox虚拟机
#https://www.cnblogs.com/wangdaxianer/p/10524976.html
# https://blog.csdn.net/sinat_28224453/article/details/51538449
# https://blog.csdn.net/weixin_40809627/article/details/82707266
# https://www.linuxidc.com/Linux/2016-12/138142.htm
# 注意,不建议使用maven编译官方http://ambari.apache.org的ambari源码了(个人发现,编译过程中的某些资源地址不可用了,改成新的地址,仍存在资源包匹配失败)
#产品支持列表
https://supportmatrix.hortonworks.com
Ambari 2.7.3只支持HDP-3.1.0和HDF-3.2.0
#角色
192.168.1.100 ambariserver ansible管理端
192.168.1.101 namenode01
192.168.1.102 namenode02
192.168.1.103 datanode01
192.168.1.104 datanode02
#环境准备(ambariserver上执行)
#创建虚拟机(宿主机上执行,记得多给点硬件资源,尤其内存,唉我的本地PC跑着吃力死了)
使用vagrant软件创建virtualbox虚拟机
# -*- mode: ruby -*-
# vi: set ft=ruby :
servers = {
"ambariserver" => "192.168.1.100",
"namenode01" => "192.168.1.101",
"namenode02" => "192.168.1.102",
"datanode01" => "192.168.1.103",
"datanode02" => "192.168.1.104",
}
Vagrant.configure("2") do |config|
config.vm.box = "centos/7"
config.vm.box_check_update = false
servers.each do |server_name, server_ip|
config.vm.define server_name do |server_config|
server_config.vm.provider "virtualbox" do |vb|
vb.gui = false
vb.name = server_ip.to_s
vb.cpus = 6
vb.memory = "8192"
end
server_config.vm.hostname = server_name
server_config.vm.network "public_network", ip: server_ip
server_config.vm.provision "shell", inline: <<-SHELL
sudo echo "123456" | sudo passwd --stdin root
sudo su - root
# 开启sshd口令登录
sed -i 's/^PasswordAuthentication no/PasswordAuthentication yes/g' /etc/ssh/sshd_config
# 关闭selinux
sed -i '/^SELINUX=/cSELINUX=disabled' /etc/selinux/config
# 开启networking
echo "NETWORKING=yes" >> /etc/sysconfig/network
# 设置所有用户umask
echo "umask 0022" >> /etc/profile
# 添加hosts名称解析
echo -e "192.168.1.100 ambariserver\n192.168.1.101 namenode01\n192.168.1.102 namenode02\n192.168.1.103 datanode01\n192.168.1.104 datanode02\n" >> /etc/hosts
# 设置file-max
echo "fs.file-max = 6553560" >> /etc/sysctl.conf && sysctl --system
# 设置ulimit最大文件打开数
echo -e "* soft nofile 65535\n* hard nofile 65535\n" >> /etc/security/limits.conf
echo "session required pam_limits.so" >> /etc/pam.d/login
echo "session required pam_limits.so" >> /etc/pam.d/system-auth
echo "session required pam_limits.so" >> /etc/pam.d/sshd
systemctl reload sshd
# 开启ntp时钟同步
yum install -y ntp lrzsz
systemctl enable ntpd
systemctl start ntpd
# 关闭防火墙
systemctl disable firewalld
systemctl stop firewalld
# 生成ssh id_rsa
yes y | ssh-keygen -t rsa -P "" -f ~/.ssh/id_rsa > /dev/null
# 重启下
reboot
SHELL
end
end
end
#创建ssh免密登录
ssh 192.168.1.100上执行以下命令:
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 root@192.168.1.100
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 root@192.168.1.101
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 root@192.168.1.102
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 root@192.168.1.103
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 root@192.168.1.104
#ambariserver上安装ansible管理端,方便多服务器的管理配置
yum install ansible -y
#其他节点安装libselinux-python(默认应该已安装)
# yum install libselinux-python -y
#定义ansible可管理的主机
cat <<EOF >> /etc/ansible/hosts
[ambari]
192.168.1.100
[hadoop]
192.168.1.101
192.168.1.102
192.168.1.103
192.168.1.104
EOF
# #开启ntp时钟同步
# ansible ambari,hadoop -m yum -a "name=ntp state=present"
# ansible ambari,hadoop -m service -a "name=ntpd enabled=yes state=started"
# ansible ambari,hadoop -m yum -a "name=lrzsz"
# ansible ambari,hadoop -m command -a "date"
# # Check DNS and NSCD
# # 修改/etc/hosts
# ansible ambari,hadoop -m blockinfile -a 'path=/etc/hosts block="192.168.1.100 ambariserver\n192.168.1.101 namenode01\n192.168.1.102 namenode02\n192.168.1.103 datanode01\n192.168.1.104 datanode02\n"'
# ansible ambari,hadoop -m command -a "cat /etc/hosts"
# # 修改/etc/sysconfig/network
# ansible ambari,hadoop -m blockinfile -a 'path=/etc/sysconfig/network block="NETWORKING=yes"'
# ansible ambari,hadoop -m command -a "cat /etc/sysconfig/network"
# #关闭防火墙
# ansible ambari,hadoop -m service -a "name=firewalld enabled=no state=stopped"
# #永久关闭SELinux和PackageKit并且设置login session umask 为0022
# #关闭selinux
# ansible ambari,hadoop -m lineinfile -a 'path=/etc/selinux/config regexp="^SELINUX=" line="SELINUX=disabled"'
# ansible ambari,hadoop -m command -a 'cat /etc/selinux/config'
# # On an installation host running RHEL/CentOS with PackageKit installed, open /etc/yum/pluginconf.d/refresh-packagekit.conf using a text editor. Make the following change:
# # enabled=0
# # 设置umask 0022
# # 为当前登录会话设置umask:
# # umask 0022 or
# # 检查您当前的umask:
# # umask
# # 永久更改所有交互式用户的umask:
# # echo umask 0022 >> /etc/profile
# ansible ambari,hadoop -m blockinfile -a 'path=~/.bash_profile block="umask 0022"'
# ansible ambari,hadoop -a 'cat ~/.bash_profile'
#配置数据库(这一步主要的就是本机安装好了mysql或者postgresql等数据库,并且开启了远程访问,安装了数据库connector jar包,准备好了管理员帐号,供后面的集群建设使用,以下部分配置仅供参考)
# 配置postgresql
# 安装数据库
yum install -y postgresql postgresql-server postgresql-devel postgresql-jdbc
postgresql-setup initdb
# 开启postgresq远程访问权限
# vi /var/lib/pgsql/data/pg_hba.conf and update to the following
host all all 0.0.0.0/0 md5
# vi /var/lib/pgsql/data/postgresql.conf and update to the following:
listen_addresses = '*'
# 启动数据库
systemctl enable postgresql
systemctl start postgresql
# 配置超级用户postgrs
su - postgres
psql
ALTER USER postgres WITH PASSWORD 'postgres';
# # Configuring PostgreSQL for Ranger
# CREATE DATABASE ranger;
# CREATE USER ranger WITH PASSWORD 'ranger';
# GRANT ALL PRIVILEGES ON DATABASE ranger TO ranger;
# # Configure SAM and Schema Registry Metadata Stores in Postgres
# create database registry;
# CREATE USER registry WITH PASSWORD 'registry';
# GRANT ALL PRIVILEGES ON DATABASE "registry" to registry;
# create database streamline;
# CREATE USER streamline WITH PASSWORD 'streamline';
# GRANT ALL PRIVILEGES ON DATABASE "streamline" to streamline;
# # Configure Druid and Superset Metadata Stores in Postgres
# create database druid;
# CREATE USER druid WITH PASSWORD 'druid';
# GRANT ALL PRIVILEGES ON DATABASE "druid" to druid;
# create database superset;
# CREATE USER superset WITH PASSWORD 'superset';
# GRANT ALL PRIVILEGES ON DATABASE "superset" to superset;
# 配置maridb
# 安装数据库
yum install -y mariadb mariadb-server mariadb-devel mysql-connector-java
#启动数据库
systemctl enable mariadb
systemctl start mariadb
#配置超级用户和权限
mysql
grant all privileges on *.* to root@'%' identified by 'password' with grant option;
grant all privileges on *.* to root@'localhost' identified by 'password' with grant option;
flush privileges;
#配置本地Ambari Repositories和HDP Stack Repositories(当然非联网环境也可以自己配置本地repository,具体请查看官网教程)(ambariserver上执行)
# 手动方式创建本地的Repository
yum install -y yum-utils createrepo
# 安装http server
yum install -y httpd
systemctl enable httpd
systemctl start httpd
mkdir -p /var/www/html/ /var/www/html/hdf /var/www/html/hdp
cd /var/www/html/
wget http://public-repo-1.hortonworks.com/ambari/centos7/2.x/updates/2.7.3.0/ambari-2.7.3.0-centos7.tar.gz
tar -xzvf ambari-2.7.3.0-centos7.tar.gz
# cd /var/www/html/hdf
# wget http://public-repo-1.hortonworks.com/HDF/centos7/3.x/updates/3.1.0.0/HDF-3.1.0.0-centos7-rpm.tar.gz
# wget http://public-repo-1.hortonworks.com/HDF-UTILS-1.1.0.22/repos/centos7/HDF-UTILS-1.1.0.22-centos7.tar.gz
# wget http://public-repo-1.hortonworks.com/HDF-GPL/centos7/3.x/updates/3.1.0.0/HDF-GPL-3.1.0.0-centos7-gpl.tar.gz
cd /var/www/html/hdp
wget http://public-repo-1.hortonworks.com/HDP/centos7/3.x/updates/3.1.0.0/HDP-3.1.0.0-centos7-rpm.tar.gz
wget http://public-repo-1.hortonworks.com/HDP-UTILS-1.1.0.22/repos/centos7/HDP-UTILS-1.1.0.22-centos7.tar.gz
wget http://public-repo-1.hortonworks.com/HDP-GPL/centos7/3.x/updates/3.1.0.0/HDP-GPL-3.1.0.0-centos7-gpl.tar.gz
tar -xzvf HDP-3.1.0.0-centos7-rpm.tar.gz
tar -xzvf HDP-UTILS-1.1.0.22-centos7.tar.gz
tar -xzvf HDP-GPL-3.1.0.0-centos7-gpl.tar.gz
# 综上base url为:(后面的ambari web配置也需要)
http://192.168.1.100/ambari/centos7/2.7.3.0-139
http://192.168.1.100/hdp/HDP/centos7/3.1.0.0-78
http://192.168.1.100/hdp/HDP-GPL/centos7/3.1.0.0-78
http://192.168.1.100/hdp/HDP-UTILS/centos7/1.1.0.22
# 配置使用本地库
cat <<EOF > /etc/yum.repos.d/ambari.repo
#VERSION_NUMBER=2.7.3.0-139
[ambari-2.7.3.0]
#json.url = http://public-repo-1.hortonworks.com/HDP/hdp_urlinfo.json
name=ambari Version - ambari-2.7.3.0
baseurl=http://192.168.1.100/ambari/centos7/2.7.3.0-139
gpgcheck=1
gpgkey=http://192.168.1.100/ambari/centos7/2.7.3.0-139/RPM-GPG-KEY/RPM-GPG-KEY-Jenkins
enabled=1
priority=1
EOF
cat <<EOF > /etc/yum.repos.d/hdp.repo
#VERSION_NUMBER=3.1.0.0-78
[HDP-3.1.0.0]
name=HDP Version - HDP-3.1.0.0
baseurl=http://192.168.1.100/hdp/HDP/centos7/3.1.0.0-78
gpgcheck=1
gpgkey=http://192.168.1.100/hdp/HDP/centos7/3.1.0.0-78/RPM-GPG-KEY/RPM-GPG-KEY-Jenkins
enabled=1
priority=1
[HDP-UTILS-1.1.0.22]
name=HDP-UTILS Version - HDP-UTILS-1.1.0.22
baseurl=http://192.168.1.100/hdp/HDP-UTILS/centos7/1.1.0.22
gpgcheck=1
gpgkey=http://192.168.1.100/hdp/HDP-UTILS/centos7/1.1.0.22/RPM-GPG-KEY/RPM-GPG-KEY-Jenkins
enabled=1
priority=1
EOF
# 查看当前启用的repo
yum repolist
# 使用 ambari 部署 hadoop 集群时,需要在集群的每台机器上下载相关的安装包。为了使用上面配置的 web 服务做为 yum 的源(即安装包从上面配置的 web 服务器上下载),需要在集群的每台机器上做如下的配置
ansible ambari,hadoop -m yum -a 'name=yum-plugin-priorities'
cat <<EOF > /etc/yum/pluginconf.d/priorities.conf
[main]
enabled=1
gpgcheck=0
EOF
ansible hadoop -m copy -a 'src=/etc/yum/pluginconf.d/priorities.conf dest=/etc/yum/pluginconf.d/priorities.conf'
# 也可以直接配置互联网源,安装hadoop组件时,连接互联网下载(但并不推荐此方式,因为软件很多,体积也大,中断就需要重来,不如采用本地yum库的方式)
yum install -y yum-utils
yum-config-manager --add-repo http://public-repo-1.hortonworks.com/ambari/centos7/2.x/updates/2.7.3.0/ambari.repo
yum-config-manager --add-repo http://public-repo-1.hortonworks.com/HDP/centos7/3.x/updates/3.1.0.0/hdp.repo
yum repolist
#安装ambari-server(ambariserver上执行)
yum install -y ambari-server
#配置ambari-server
# WARNING: JDK must be installed on all hosts and JAVA_HOME must be valid on all hosts.
# WARNING: JCE Policy files are required for configuring Kerberos security. If you plan to use Kerberos,please make sure JCE Unlimited Strength Jurisdiction Policy Files are valid on all hosts.
ambari-server setup
# 会交互式选择或者输入
# Customize user account for ambari-server daemon [y/n] (n)? y
# Enter user account for ambari-server daemon (root):
# Checking firewall status...
# Checking JDK...
# [1] Oracle JDK 1.8 + Java Cryptography Extension (JCE) Policy Files 8
# [2] Custom JDK
# ==============================================================================
# Enter choice (1): 1
# To download the Oracle JDK and the Java Cryptography Extension (JCE) Policy Files you must accept the license terms found at http://www.oracle.com/technetwork/java/javase/terms/license/index.html and not accepting will cancel the Ambari Server setup and you must install the JDK and JCE files manually.
# Do you accept the Oracle Binary Code License Agreement [y/n] (y)?
# Downloading JDK from http://public-repo-1.hortonworks.com/ARTIFACTS/jdk-8u112-linux-x64.tar.gz to /var/lib/ambari-server/resources/jdk-8u112-linux-x64.tar.gz
# jdk-8u112-linux-x64.tar.gz... 100% (174.7 MB of 174.7 MB)
# Successfully downloaded JDK distribution to /var/lib/ambari-server/resources/jdk-8u112-linux-x64.tar.gz
# Installing JDK to /usr/jdk64/
# Successfully installed JDK to /usr/jdk64/
# Downloading JCE Policy archive from http://public-repo-1.hortonworks.com/ARTIFACTS/jce_policy-8.zip to /var/lib/ambari-server/resources/jce_policy-8.zip
# Successfully downloaded JCE Policy archive to /var/lib/ambari-server/resources/jce_policy-8.zip
# Installing JCE policy...
# Check JDK version for Ambari Server...
# JDK version found: 8
# Minimum JDK version is 8 for Ambari. Skipping to setup different JDK for Ambari Server.
# Checking GPL software agreement...
# GPL License for LZO: https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html
# Enable Ambari Server to download and install GPL Licensed LZO packages [y/n] (n)?
# Completing setup...
# Configuring database...
# Enter advanced database configuration [y/n] (n)?
# Configuring database...
# Default properties detected. Using built-in database.
# Configuring ambari database...
# Checking PostgreSQL...
# Configuring local database...
# Configuring PostgreSQL...
# Restarting PostgreSQL
# Creating schema and user...
# done.
# Creating tables...
# done.
# Extracting system views...
# ambari-admin-2.7.3.0.139.jar
# ....
# Ambari repo file contains latest json url http://public-repo-1.hortonworks.com/HDP/hdp_urlinfo.json, updating stacks repoinfos with it...
# Adjusting ambari-server permissions and ownership...
# Ambari Server 'setup' completed successfully.
#启动ambari-server
ambari-server start
# Using python /usr/bin/python
# Starting ambari-server
# Ambari Server running with administrator privileges.
# Organizing resource files at /var/lib/ambari-server/resources...
# Ambari database consistency check started...
# Server PID at: /var/run/ambari-server/ambari-server.pid
# Server out at: /var/log/ambari-server/ambari-server.out
# Server log at: /var/log/ambari-server/ambari-server.log
# Waiting for server start.....................
# Server started listening on 8080
# DB configs consistency check: no errors and warnings were found.
# Ambari Server 'start' completed successfully.
# #检查Ambari Server processes
# ambari-server status
# # 停止ambari-server
# ambari-server stop
#创建hadoop集群(ambariserver上执行)
#登录ambari-server
sz ~/.ssh/id_rsa 后面登录web,创建hadoop集群的时候需要用到
http://192.168.1.100:8080 admin/admin
#使用web界面,创建集群
#Get Started
# 定义集群名称
# HadoopCluster
#选择hadoop版本
HDP-3.1 or HDP-3.0
# 定义使用本地Repositories
os redhat7
name HDP-3.1 Base URL http://192.168.1.100/hdp/HDP/centos7/3.1.0.0-78
name HDP-UTILS-1.1.0.22 Base URL http://192.168.1.100/hdp/HDP-UTILS/centos7/1.1.0.22
# 安装配置项
# 输入目标主机名称
namenode01
namenode02
datanode01
datanode02
# 提供ambariserver的私钥id_rsa
# 设置SSH User Account 为 root
# 设置SSH Port Number 为 22
# Confirm Hosts
All host checks passed on 4 registered hosts. Click here to see the check results.
# Choose Services
默认选中项即可(YARN + MapReduce2、hive、ZooKeeper等)
# Choose File System(就一个选项没得选)
# Assign Masters(建议所有节点都安装ZooKeeper Server(应该为奇数个)、Kafka Broker)
选择不同服务对应安装在不同主机上:
datanode01 (1.8 GB, 3 cores)
ZooKeeper Server Kafka Broker
datanode02 (1.8 GB, 3 cores)
ZooKeeper Server Kafka Broker
namenode01 (1.8 GB, 3 cores)
NameNode ResourceManager Timeline Service V2.0 Reader Timeline Service V1.5 YARN Registry DNS History Server HiveServer2 Hive Metastore HBase Master Oozie Server ZooKeeper Server Nimbus Storm UI Server DRPC Server Accumulo Master Accumulo Tracer Accumulo GC Accumulo Monitor Infra Solr Instance Metrics Collector Grafana Atlas Metadata Server Kafka Broker Knox Gateway Ranger Usersync Ranger Admin Ranger KMS Server Activity Explorer Activity Analyzer HST Server Spark2 History Server Zeppelin Notebook Druid Coordinator Druid Overlord Druid Router Druid Broker
namenode02 (1.8 GB, 3 cores)
SNameNode Kafka Broker
# Assign Slaves and Clients(建议所有节点都安装Supervisor、RegionServer和Client)
# Assign slave and client components to hosts you want to run them on.
# Hosts that are assigned master components are shown with ✵.
# "Client" will install HDFS Client, YARN Client, MapReduce2 Client, Tez Client, Hive Client, HBase Client, Pig Client, Sqoop Client, Oozie Client, ZooKeeper Client, Accumulo Client, Infra Solr Client, Atlas Metadata Client and Spark2 Client.
# Customize Services
# Please provide credentials for these services(已有默认密码的无需更改)
password
# Please choose and configure the appropriate databases for these services
# DRUID
# HIVE
# OOZIE
# RANGER 2
需要提供Ranger DB host和dba口令
DBhost : 192.168.1.100
dba : root
dba password : password
# RANGER KMS 2
需要提供Ranger KMS DB host和dba口令
DBhost : 192.168.1.100
dba : root
dba password : password
# 另外需要安装数据库 jdbc connector jar
yum install -y mysql-connector-java
ambari-server setup --jdbc-db=mysql --jdbc-driver=/usr/share/java/mysql-connector-java.jar
# Using python /usr/bin/python
# Setup ambari-server
# Copying /usr/share/java/mysql-connector-java.jar to /var/lib/ambari-server/resources/mysql-connector-java.jar
# If you are updating existing jdbc driver jar for mysql with mysql-connector-java.jar. Please remove the old driver jar, from all hosts. Restarting services that need the driver, will automatically copy the new jar to the hosts.
# JDBC driver was successfully initialized.
# Ambari Server 'setup' completed successfully.
# 最后分别点击RANGER 2和RANGER KMS 2底部的TEST CONNECTION,结果都为Connection OK即可
# 然后设置每个服务HDFS、YARN等的工作目录
请根据各自的业务需求填写或者修改
# Please review these settings for Service Accounts
所有用户和分组信息
# SETTINGS and ADVANCED(报红项需要处理)
Advanced druid-env
druid.broker.jvm.heap.memory 1024
xasecure.policymgr.clientssl.truststore.password password
ssl.client.keystore.password password
ssl.server.keystore.keypassword password
ssl.server.truststore.password password
# Please review the configuration before installation
# Admin Name : admin
# Cluster Name : HadoopCluster
# Total Hosts : 4 (4 new)
# Repositories:
# redhat7 (HDP-3.1):
# http://192.168.1.100/hdp/HDP/centos7/3.1.0.0-78
# redhat7 (HDP-UTILS-1.1.0.22):
# http://192.168.1.100/hdp/HDP-UTILS/centos7/1.1.0.22
# Services:
# HDFS
# DataNode : 2 hosts
# NameNode : namenode01
# NFSGateway : 0 host
# SNameNode : namenode02
# YARN + MapReduce2
# Timeline Service V1.5 : namenode02
# NodeManager : 2 hosts
# ResourceManager : namenode01
# Timeline Service V2.0 Reader : namenode01
# Registry DNS : namenode01
# Tez
# Clients : 2 hosts
# Hive
# Metastore : namenode02
# HiveServer2 : namenode02
# Database : New MySQL Database
# HBase
# Master : namenode01
# RegionServer : 2 hosts
# Phoenix Query Server : 0 host
# Pig
# Clients : 2 hosts
# Sqoop
# Clients : 2 hosts
# Oozie
# Server : namenode01
# Database : New Derby Database
# ZooKeeper
# Server : 3 hosts
# Storm
# DRPC Server : namenode01
# Nimbus : namenode01
# UI Server : namenode01
# Supervisor : 2 hosts
# Accumulo
# GC : namenode01
# Master : namenode01
# Monitor : namenode01
# Tracer : namenode01
# TServer : 2 hosts
# Infra Solr
# Infra Solr Instance : namenode01
# Ambari Metrics
# Metrics Collector : datanode01
# Grafana : namenode01
# Atlas
# Metadata Server : namenode02
# Kafka
# Broker : namenode01
# Knox
# Gateway : namenode01
# Ranger
# Admin : namenode02
# Tagsync : 1 host
# Usersync : namenode02
# Ranger KMS
# Ranger KMS Server : namenode02
# SmartSense
# Activity Analyzer : namenode01
# Activity Explorer : namenode01
# HST Server : namenode01
# Spark2
# Livy for Spark2 Server : 0 host
# History Server : namenode01
# Thrift Server : 0 host
# Zeppelin Notebook
# Notebook : namenode01
# Druid
# Broker : namenode01
# Coordinator : namenode01
# Historical : 2 hosts
# MiddleManager : 2 hosts
# Overlord : namenode01
# Router : namenode01
# 最后点击Deploy,进行集群的实际安装
# Install, Start and Test
# ambari重新安装hadoop最简单方法(直接删除ambari的数据库,然后重新配置ambari,重新安装hadoop)
# 关闭ambari
ambari-server stop
# 删除ambari库
su - postgres
psql
drop database ambari;
# 卸载部署的包
ansible ambari,hadoop -m shell -a "ambari-agent stop"
ansible hadoop -m shell -a "rm -rf /etc/oozie/conf"
ansible hadoop -m shell -a "rpm -qa | grep -E 'ambari-|3_1_0_0_78' | xargs yum remove -y"
ansible hadoop -m yum -a "name=smartsense-hst state=absent"
ansible hadoop -m file -a 'path=/etc/yum.repos.d/ambari.repo state=absent'
ansible hadoop -m file -a 'path=/etc/yum.repos.d/ambari-hdp-1.repo state=absent'
# 然后重新来一遍ambari-server setup等流程
# 遇到的错误
1、Ranger Admin Start连接zookeeper报错:Unable to read additional data from server sessionid 0x0, likely server has closed socket, closing socket connection and attempting reconnect
将ZooKeeper Server总数改为奇数个
# http://192.168.1.100:8080/#/main/hosts/datanode02/summary 关闭datanode02的ZooKeeper Server / ZooKeeper组件,然后删除此组件
还可能因为/etc/hosts中hostname 解析成了127.0.0.1,而不是真正的ip
导致连接zookeeper失败
# 其他细节就不写了,自己折腾吧
ssh 192.168.1.100上执行以下命令:
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 root@192.168.1.100
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 root@192.168.1.101
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 root@192.168.1.102
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 root@192.168.1.103
ssh-copy-id -i ~/.ssh/id_rsa.pub -p 22 root@192.168.1.104
#ambariserver上安装ansible管理端,方便多服务器的管理配置
yum install ansible -y
#其他节点安装libselinux-python(默认应该已安装)
# yum install libselinux-python -y
#定义ansible可管理的主机
cat <<EOF >> /etc/ansible/hosts
[ambari]
192.168.1.100
[hadoop]
192.168.1.101
192.168.1.102
192.168.1.103
192.168.1.104
EOF
# #开启ntp时钟同步
# ansible ambari,hadoop -m yum -a "name=ntp state=present"
# ansible ambari,hadoop -m service -a "name=ntpd enabled=yes state=started"
# ansible ambari,hadoop -m yum -a "name=lrzsz"
# ansible ambari,hadoop -m command -a "date"
# # Check DNS and NSCD
# # 修改/etc/hosts
# ansible ambari,hadoop -m blockinfile -a 'path=/etc/hosts block="192.168.1.100 ambariserver\n192.168.1.101 namenode01\n192.168.1.102 namenode02\n192.168.1.103 datanode01\n192.168.1.104 datanode02\n"'
# ansible ambari,hadoop -m command -a "cat /etc/hosts"
# # 修改/etc/sysconfig/network
# ansible ambari,hadoop -m blockinfile -a 'path=/etc/sysconfig/network block="NETWORKING=yes"'
# ansible ambari,hadoop -m command -a "cat /etc/sysconfig/network"
# #关闭防火墙
# ansible ambari,hadoop -m service -a "name=firewalld enabled=no state=stopped"
# #永久关闭SELinux和PackageKit并且设置login session umask 为0022
# #关闭selinux
# ansible ambari,hadoop -m lineinfile -a 'path=/etc/selinux/config regexp="^SELINUX=" line="SELINUX=disabled"'
# ansible ambari,hadoop -m command -a 'cat /etc/selinux/config'
# # On an installation host running RHEL/CentOS with PackageKit installed, open /etc/yum/pluginconf.d/refresh-packagekit.conf using a text editor. Make the following change:
# # enabled=0
# # 设置umask 0022
# # 为当前登录会话设置umask:
# # umask 0022 or
# # 检查您当前的umask:
# # umask
# # 永久更改所有交互式用户的umask:
# # echo umask 0022 >> /etc/profile
# ansible ambari,hadoop -m blockinfile -a 'path=~/.bash_profile block="umask 0022"'
# ansible ambari,hadoop -a 'cat ~/.bash_profile'
#配置数据库(这一步主要的就是本机安装好了mysql或者postgresql等数据库,并且开启了远程访问,安装了数据库connector jar包,准备好了管理员帐号,供后面的集群建设使用,以下部分配置仅供参考)
# 配置postgresql
# 安装数据库
yum install -y postgresql postgresql-server postgresql-devel postgresql-jdbc
postgresql-setup initdb
# 开启postgresq远程访问权限
# vi /var/lib/pgsql/data/pg_hba.conf and update to the following
host all all 0.0.0.0/0 md5
# vi /var/lib/pgsql/data/postgresql.conf and update to the following:
listen_addresses = '*'
# 启动数据库
systemctl enable postgresql
systemctl start postgresql
# 配置超级用户postgrs
su - postgres
psql
ALTER USER postgres WITH PASSWORD 'postgres';
# # Configuring PostgreSQL for Ranger
# CREATE DATABASE ranger;
# CREATE USER ranger WITH PASSWORD 'ranger';
# GRANT ALL PRIVILEGES ON DATABASE ranger TO ranger;
# # Configure SAM and Schema Registry Metadata Stores in Postgres
# create database registry;
# CREATE USER registry WITH PASSWORD 'registry';
# GRANT ALL PRIVILEGES ON DATABASE "registry" to registry;
# create database streamline;
# CREATE USER streamline WITH PASSWORD 'streamline';
# GRANT ALL PRIVILEGES ON DATABASE "streamline" to streamline;
# # Configure Druid and Superset Metadata Stores in Postgres
# create database druid;
# CREATE USER druid WITH PASSWORD 'druid';
# GRANT ALL PRIVILEGES ON DATABASE "druid" to druid;
# create database superset;
# CREATE USER superset WITH PASSWORD 'superset';
# GRANT ALL PRIVILEGES ON DATABASE "superset" to superset;
# 配置maridb
# 安装数据库
yum install -y mariadb mariadb-server mariadb-devel mysql-connector-java
#启动数据库
systemctl enable mariadb
systemctl start mariadb
#配置超级用户和权限
mysql
grant all privileges on *.* to root@'%' identified by 'password' with grant option;
grant all privileges on *.* to root@'localhost' identified by 'password' with grant option;
flush privileges;
#配置本地Ambari Repositories和HDP Stack Repositories(当然非联网环境也可以自己配置本地repository,具体请查看官网教程)(ambariserver上执行)
# 手动方式创建本地的Repository
yum install -y yum-utils createrepo
# 安装http server
yum install -y httpd
systemctl enable httpd
systemctl start httpd
mkdir -p /var/www/html/ /var/www/html/hdf /var/www/html/hdp
cd /var/www/html/
wget http://public-repo-1.hortonworks.com/ambari/centos7/2.x/updates/2.7.3.0/ambari-2.7.3.0-centos7.tar.gz
tar -xzvf ambari-2.7.3.0-centos7.tar.gz
# cd /var/www/html/hdf
# wget http://public-repo-1.hortonworks.com/HDF/centos7/3.x/updates/3.1.0.0/HDF-3.1.0.0-centos7-rpm.tar.gz
# wget http://public-repo-1.hortonworks.com/HDF-UTILS-1.1.0.22/repos/centos7/HDF-UTILS-1.1.0.22-centos7.tar.gz
# wget http://public-repo-1.hortonworks.com/HDF-GPL/centos7/3.x/updates/3.1.0.0/HDF-GPL-3.1.0.0-centos7-gpl.tar.gz
cd /var/www/html/hdp
wget http://public-repo-1.hortonworks.com/HDP/centos7/3.x/updates/3.1.0.0/HDP-3.1.0.0-centos7-rpm.tar.gz
wget http://public-repo-1.hortonworks.com/HDP-UTILS-1.1.0.22/repos/centos7/HDP-UTILS-1.1.0.22-centos7.tar.gz
wget http://public-repo-1.hortonworks.com/HDP-GPL/centos7/3.x/updates/3.1.0.0/HDP-GPL-3.1.0.0-centos7-gpl.tar.gz
tar -xzvf HDP-3.1.0.0-centos7-rpm.tar.gz
tar -xzvf HDP-UTILS-1.1.0.22-centos7.tar.gz
tar -xzvf HDP-GPL-3.1.0.0-centos7-gpl.tar.gz
# 综上base url为:(后面的ambari web配置也需要)
http://192.168.1.100/ambari/centos7/2.7.3.0-139
http://192.168.1.100/hdp/HDP/centos7/3.1.0.0-78
http://192.168.1.100/hdp/HDP-GPL/centos7/3.1.0.0-78
http://192.168.1.100/hdp/HDP-UTILS/centos7/1.1.0.22
# 配置使用本地库
cat <<EOF > /etc/yum.repos.d/ambari.repo
#VERSION_NUMBER=2.7.3.0-139
[ambari-2.7.3.0]
#json.url = http://public-repo-1.hortonworks.com/HDP/hdp_urlinfo.json
name=ambari Version - ambari-2.7.3.0
baseurl=http://192.168.1.100/ambari/centos7/2.7.3.0-139
gpgcheck=1
gpgkey=http://192.168.1.100/ambari/centos7/2.7.3.0-139/RPM-GPG-KEY/RPM-GPG-KEY-Jenkins
enabled=1
priority=1
EOF
cat <<EOF > /etc/yum.repos.d/hdp.repo
#VERSION_NUMBER=3.1.0.0-78
[HDP-3.1.0.0]
name=HDP Version - HDP-3.1.0.0
baseurl=http://192.168.1.100/hdp/HDP/centos7/3.1.0.0-78
gpgcheck=1
gpgkey=http://192.168.1.100/hdp/HDP/centos7/3.1.0.0-78/RPM-GPG-KEY/RPM-GPG-KEY-Jenkins
enabled=1
priority=1
[HDP-UTILS-1.1.0.22]
name=HDP-UTILS Version - HDP-UTILS-1.1.0.22
baseurl=http://192.168.1.100/hdp/HDP-UTILS/centos7/1.1.0.22
gpgcheck=1
gpgkey=http://192.168.1.100/hdp/HDP-UTILS/centos7/1.1.0.22/RPM-GPG-KEY/RPM-GPG-KEY-Jenkins
enabled=1
priority=1
EOF
# 查看当前启用的repo
yum repolist
# 使用 ambari 部署 hadoop 集群时,需要在集群的每台机器上下载相关的安装包。为了使用上面配置的 web 服务做为 yum 的源(即安装包从上面配置的 web 服务器上下载),需要在集群的每台机器上做如下的配置
ansible ambari,hadoop -m yum -a 'name=yum-plugin-priorities'
cat <<EOF > /etc/yum/pluginconf.d/priorities.conf
[main]
enabled=1
gpgcheck=0
EOF
ansible hadoop -m copy -a 'src=/etc/yum/pluginconf.d/priorities.conf dest=/etc/yum/pluginconf.d/priorities.conf'
# 也可以直接配置互联网源,安装hadoop组件时,连接互联网下载(但并不推荐此方式,因为软件很多,体积也大,中断就需要重来,不如采用本地yum库的方式)
yum install -y yum-utils
yum-config-manager --add-repo http://public-repo-1.hortonworks.com/ambari/centos7/2.x/updates/2.7.3.0/ambari.repo
yum-config-manager --add-repo http://public-repo-1.hortonworks.com/HDP/centos7/3.x/updates/3.1.0.0/hdp.repo
yum repolist
#安装ambari-server(ambariserver上执行)
yum install -y ambari-server
#配置ambari-server
# WARNING: JDK must be installed on all hosts and JAVA_HOME must be valid on all hosts.
# WARNING: JCE Policy files are required for configuring Kerberos security. If you plan to use Kerberos,please make sure JCE Unlimited Strength Jurisdiction Policy Files are valid on all hosts.
ambari-server setup
# 会交互式选择或者输入
# Customize user account for ambari-server daemon [y/n] (n)? y
# Enter user account for ambari-server daemon (root):
# Checking firewall status...
# Checking JDK...
# [1] Oracle JDK 1.8 + Java Cryptography Extension (JCE) Policy Files 8
# [2] Custom JDK
# ==============================================================================
# Enter choice (1): 1
# To download the Oracle JDK and the Java Cryptography Extension (JCE) Policy Files you must accept the license terms found at http://www.oracle.com/technetwork/java/javase/terms/license/index.html and not accepting will cancel the Ambari Server setup and you must install the JDK and JCE files manually.
# Do you accept the Oracle Binary Code License Agreement [y/n] (y)?
# Downloading JDK from http://public-repo-1.hortonworks.com/ARTIFACTS/jdk-8u112-linux-x64.tar.gz to /var/lib/ambari-server/resources/jdk-8u112-linux-x64.tar.gz
# jdk-8u112-linux-x64.tar.gz... 100% (174.7 MB of 174.7 MB)
# Successfully downloaded JDK distribution to /var/lib/ambari-server/resources/jdk-8u112-linux-x64.tar.gz
# Installing JDK to /usr/jdk64/
# Successfully installed JDK to /usr/jdk64/
# Downloading JCE Policy archive from http://public-repo-1.hortonworks.com/ARTIFACTS/jce_policy-8.zip to /var/lib/ambari-server/resources/jce_policy-8.zip
# Successfully downloaded JCE Policy archive to /var/lib/ambari-server/resources/jce_policy-8.zip
# Installing JCE policy...
# Check JDK version for Ambari Server...
# JDK version found: 8
# Minimum JDK version is 8 for Ambari. Skipping to setup different JDK for Ambari Server.
# Checking GPL software agreement...
# GPL License for LZO: https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html
# Enable Ambari Server to download and install GPL Licensed LZO packages [y/n] (n)?
# Completing setup...
# Configuring database...
# Enter advanced database configuration [y/n] (n)?
# Configuring database...
# Default properties detected. Using built-in database.
# Configuring ambari database...
# Checking PostgreSQL...
# Configuring local database...
# Configuring PostgreSQL...
# Restarting PostgreSQL
# Creating schema and user...
# done.
# Creating tables...
# done.
# Extracting system views...
# ambari-admin-2.7.3.0.139.jar
# ....
# Ambari repo file contains latest json url http://public-repo-1.hortonworks.com/HDP/hdp_urlinfo.json, updating stacks repoinfos with it...
# Adjusting ambari-server permissions and ownership...
# Ambari Server 'setup' completed successfully.
#启动ambari-server
ambari-server start
# Using python /usr/bin/python
# Starting ambari-server
# Ambari Server running with administrator privileges.
# Organizing resource files at /var/lib/ambari-server/resources...
# Ambari database consistency check started...
# Server PID at: /var/run/ambari-server/ambari-server.pid
# Server out at: /var/log/ambari-server/ambari-server.out
# Server log at: /var/log/ambari-server/ambari-server.log
# Waiting for server start.....................
# Server started listening on 8080
# DB configs consistency check: no errors and warnings were found.
# Ambari Server 'start' completed successfully.
# #检查Ambari Server processes
# ambari-server status
# # 停止ambari-server
# ambari-server stop
#创建hadoop集群(ambariserver上执行)
#登录ambari-server
sz ~/.ssh/id_rsa 后面登录web,创建hadoop集群的时候需要用到
http://192.168.1.100:8080 admin/admin
#使用web界面,创建集群
#Get Started
# 定义集群名称
# HadoopCluster
#选择hadoop版本
HDP-3.1 or HDP-3.0
# 定义使用本地Repositories
os redhat7
name HDP-3.1 Base URL http://192.168.1.100/hdp/HDP/centos7/3.1.0.0-78
name HDP-UTILS-1.1.0.22 Base URL http://192.168.1.100/hdp/HDP-UTILS/centos7/1.1.0.22
# 安装配置项
# 输入目标主机名称
namenode01
namenode02
datanode01
datanode02
# 提供ambariserver的私钥id_rsa
# 设置SSH User Account 为 root
# 设置SSH Port Number 为 22
# Confirm Hosts
All host checks passed on 4 registered hosts. Click here to see the check results.
# Choose Services
默认选中项即可(YARN + MapReduce2、hive、ZooKeeper等)
# Choose File System(就一个选项没得选)
# Assign Masters(建议所有节点都安装ZooKeeper Server(应该为奇数个)、Kafka Broker)
选择不同服务对应安装在不同主机上:
datanode01 (1.8 GB, 3 cores)
ZooKeeper Server Kafka Broker
datanode02 (1.8 GB, 3 cores)
ZooKeeper Server Kafka Broker
namenode01 (1.8 GB, 3 cores)
NameNode ResourceManager Timeline Service V2.0 Reader Timeline Service V1.5 YARN Registry DNS History Server HiveServer2 Hive Metastore HBase Master Oozie Server ZooKeeper Server Nimbus Storm UI Server DRPC Server Accumulo Master Accumulo Tracer Accumulo GC Accumulo Monitor Infra Solr Instance Metrics Collector Grafana Atlas Metadata Server Kafka Broker Knox Gateway Ranger Usersync Ranger Admin Ranger KMS Server Activity Explorer Activity Analyzer HST Server Spark2 History Server Zeppelin Notebook Druid Coordinator Druid Overlord Druid Router Druid Broker
namenode02 (1.8 GB, 3 cores)
SNameNode Kafka Broker
# Assign Slaves and Clients(建议所有节点都安装Supervisor、RegionServer和Client)
# Assign slave and client components to hosts you want to run them on.
# Hosts that are assigned master components are shown with ✵.
# "Client" will install HDFS Client, YARN Client, MapReduce2 Client, Tez Client, Hive Client, HBase Client, Pig Client, Sqoop Client, Oozie Client, ZooKeeper Client, Accumulo Client, Infra Solr Client, Atlas Metadata Client and Spark2 Client.
# Customize Services
# Please provide credentials for these services(已有默认密码的无需更改)
password
# Please choose and configure the appropriate databases for these services
# DRUID
# HIVE
# OOZIE
# RANGER 2
需要提供Ranger DB host和dba口令
DBhost : 192.168.1.100
dba : root
dba password : password
# RANGER KMS 2
需要提供Ranger KMS DB host和dba口令
DBhost : 192.168.1.100
dba : root
dba password : password
# 另外需要安装数据库 jdbc connector jar
yum install -y mysql-connector-java
ambari-server setup --jdbc-db=mysql --jdbc-driver=/usr/share/java/mysql-connector-java.jar
# Using python /usr/bin/python
# Setup ambari-server
# Copying /usr/share/java/mysql-connector-java.jar to /var/lib/ambari-server/resources/mysql-connector-java.jar
# If you are updating existing jdbc driver jar for mysql with mysql-connector-java.jar. Please remove the old driver jar, from all hosts. Restarting services that need the driver, will automatically copy the new jar to the hosts.
# JDBC driver was successfully initialized.
# Ambari Server 'setup' completed successfully.
# 最后分别点击RANGER 2和RANGER KMS 2底部的TEST CONNECTION,结果都为Connection OK即可
# 然后设置每个服务HDFS、YARN等的工作目录
请根据各自的业务需求填写或者修改
# Please review these settings for Service Accounts
所有用户和分组信息
# SETTINGS and ADVANCED(报红项需要处理)
Advanced druid-env
druid.broker.jvm.heap.memory 1024
xasecure.policymgr.clientssl.truststore.password password
ssl.client.keystore.password password
ssl.server.keystore.keypassword password
ssl.server.truststore.password password
# Please review the configuration before installation
# Admin Name : admin
# Cluster Name : HadoopCluster
# Total Hosts : 4 (4 new)
# Repositories:
# redhat7 (HDP-3.1):
# http://192.168.1.100/hdp/HDP/centos7/3.1.0.0-78
# redhat7 (HDP-UTILS-1.1.0.22):
# http://192.168.1.100/hdp/HDP-UTILS/centos7/1.1.0.22
# Services:
# HDFS
# DataNode : 2 hosts
# NameNode : namenode01
# NFSGateway : 0 host
# SNameNode : namenode02
# YARN + MapReduce2
# Timeline Service V1.5 : namenode02
# NodeManager : 2 hosts
# ResourceManager : namenode01
# Timeline Service V2.0 Reader : namenode01
# Registry DNS : namenode01
# Tez
# Clients : 2 hosts
# Hive
# Metastore : namenode02
# HiveServer2 : namenode02
# Database : New MySQL Database
# HBase
# Master : namenode01
# RegionServer : 2 hosts
# Phoenix Query Server : 0 host
# Pig
# Clients : 2 hosts
# Sqoop
# Clients : 2 hosts
# Oozie
# Server : namenode01
# Database : New Derby Database
# ZooKeeper
# Server : 3 hosts
# Storm
# DRPC Server : namenode01
# Nimbus : namenode01
# UI Server : namenode01
# Supervisor : 2 hosts
# Accumulo
# GC : namenode01
# Master : namenode01
# Monitor : namenode01
# Tracer : namenode01
# TServer : 2 hosts
# Infra Solr
# Infra Solr Instance : namenode01
# Ambari Metrics
# Metrics Collector : datanode01
# Grafana : namenode01
# Atlas
# Metadata Server : namenode02
# Kafka
# Broker : namenode01
# Knox
# Gateway : namenode01
# Ranger
# Admin : namenode02
# Tagsync : 1 host
# Usersync : namenode02
# Ranger KMS
# Ranger KMS Server : namenode02
# SmartSense
# Activity Analyzer : namenode01
# Activity Explorer : namenode01
# HST Server : namenode01
# Spark2
# Livy for Spark2 Server : 0 host
# History Server : namenode01
# Thrift Server : 0 host
# Zeppelin Notebook
# Notebook : namenode01
# Druid
# Broker : namenode01
# Coordinator : namenode01
# Historical : 2 hosts
# MiddleManager : 2 hosts
# Overlord : namenode01
# Router : namenode01
# 最后点击Deploy,进行集群的实际安装
# Install, Start and Test
# ambari重新安装hadoop最简单方法(直接删除ambari的数据库,然后重新配置ambari,重新安装hadoop)
# 关闭ambari
ambari-server stop
# 删除ambari库
su - postgres
psql
drop database ambari;
# 卸载部署的包
ansible ambari,hadoop -m shell -a "ambari-agent stop"
ansible hadoop -m shell -a "rm -rf /etc/oozie/conf"
ansible hadoop -m shell -a "rpm -qa | grep -E 'ambari-|3_1_0_0_78' | xargs yum remove -y"
ansible hadoop -m yum -a "name=smartsense-hst state=absent"
ansible hadoop -m file -a 'path=/etc/yum.repos.d/ambari.repo state=absent'
ansible hadoop -m file -a 'path=/etc/yum.repos.d/ambari-hdp-1.repo state=absent'
# 然后重新来一遍ambari-server setup等流程
# 遇到的错误
1、Ranger Admin Start连接zookeeper报错:Unable to read additional data from server sessionid 0x0, likely server has closed socket, closing socket connection and attempting reconnect
将ZooKeeper Server总数改为奇数个
# http://192.168.1.100:8080/#/main/hosts/datanode02/summary 关闭datanode02的ZooKeeper Server / ZooKeeper组件,然后删除此组件
还可能因为/etc/hosts中hostname 解析成了127.0.0.1,而不是真正的ip
导致连接zookeeper失败
# 其他细节就不写了,自己折腾吧