系统版本:18.04
内核:4.15.0
系统盘做 raid1,其余盘 raw
略(可以配置 bond)
18.04 版本推荐使用 chrony
ansible 安装版本参考ceph-ansible 文档
apt-get install python-pip
pip install notario
pip install ansible==2.6.10
git clone -b stable-3.2 https://github.com/ceph/ceph-ansible
ssh-keygen #一路回车生成秘钥
ssh-copy-id xxx@host
如果中控机也是节点,也要执行上述步骤
最后测试一下
sudo ansible -m ping -i hosts all
cp group_vars/all.yml.sample group_vars/all.yml
cp group_vars/osds.yml.sample group_vars/osds.yml
cp group_vars/clients.yml.sample group_vars/clients.yml
cp site.yml.sample site.yml
具体分组项参考 site.yml 前几行;不需要的安装的组件,一定不要写,或者注释掉!!!
### ceph
[mons]
192.168.x.1
192.168.x.2
192.168.x.3
[osds]
192.168.x.1
192.168.x.2
192.168.x.3
[mgrs]
192.168.x.1
192.168.x.2
192.168.x.3
[clients]
192.168.x.1
192.168.x.2
192.168.x.3
#[rgws]
#192.168.x.1
如果需要复制 admin key 到 client 节点,记得开启以下选项,默认关闭
copy_admin_key: true
注意:ceph 配置参数不要以空格间隔,都要以下划线_间隔!!!
openstack@ceph5:~/ceph-ansible$ more group_vars/all.yml|grep -v '^#' --- dummy: ceph_origin: repository #这里使用阿里云ceph源安装,其他安装方式参考官方文档 ceph_repository: community ceph_mirror: http://mirrors.aliyun.com/ceph/ ceph_stable_key: https://mirrors.aliyun.com/ceph/keys/release.asc ceph_stable_release: mimic ceph_stable_repo: "{{ ceph_mirror }}/debian-{{ ceph_stable_release }}" cephx: true monitor_interface: bond1 public_network: 192.168.x.0/24 radosgw_civetweb_port: 7480 radosgw_civetweb_num_threads: 200 rgw_multisite: False ceph_conf_overrides: global: rbd_default_features: 125 osd_pool_default_size: 2 osd_pool_default_min_size: 1 mon_pg_warn_min_per_osd: 8 mon_pg_warn_max_per_osd: 1024 osd_pool_default_pg_num: 128 osd_pool_default_pgp_num: 128 max_open_files: 131072 mon_pg_warn_max_object_skew: 50 mgr: mgr_modules: dashboard mon: mon_allow_pool_delete: true mon_clock_drift_allowed: 1 mon_osd_down_out_interval: 600 client: rbd_cache: true rbd_cache_size: 335544320 rbd_cache_max_dirty: 134217728 rbd_cache_max_dirty_age: 30 rbd_cache_writethrough_until_flush: false rbd_cache_max_dirty_object: 2 rbd_cache_target_dirty: 235544320 osd: osd_max_write_size: 512 osd_client_message_size_cap: 214783648 osd_deep_scrub_stride: 131072 osd_op_threads: 8 osd_disk_threads: 4 osd_map_cache_size: 1024 osd_map_cache_bl_size: 128 osd_recovery_op_priority: 4 osd_recovery_max_active: 10 osd_max_backfills: 4 osd_min_pg_log_entries: 30000 osd_max_pg_log_entries: 100000 osd_mon_heartbeat_interval: 40 osd_mount_options_xfs: "rw,noexec,nodev,noatime,nodiratime,nobarrier" ms_dispatch_throttle_bytes: 1048576000 objecter_inflight_ops: 819200 osd_op_log_threshold: 50 osd_crush_chooseleaf_type: 0 osd_max_scrubs: 1 osd_scrub_begin_hour: 0 osd_scrub_end_hour: 7 osd_recovery_max_chunk: 1048576 osd_scrub_sleep: 1 osd_scrub_chunk_max: 5 osd_scrub_during_recovery: false osd_client_message_size_cap: 2147483648 osd_crush_update_on_start: true osd_deep_scrub_stride: 131072 osd_disk_threads: 4 osd_map_cache_bl_size: 128 osd_max_object_name_len: 256 osd_max_object_namespace_len: 64 osd_max_write_size: 1024 osd_op_threads: 8
group_vars/osd.yml
openstack@ceph5:~/ceph-ansible$ more group_vars/osds.yml|grep -v '^#' --- dummy: osd_scenario: lvm osd_objectstore: bluestore lvm_volumes: - data: /dev/sdb - data: /dev/sdc - data: /dev/sdd - data: /dev/sde - data: /dev/sdf - data: /dev/sdg - data: /dev/sdh - data: /dev/sdi - data: /dev/sdj - data: /dev/sdk - data: /dev/sdl - data: /dev/sdm - data: /dev/sdn - data: /dev/sdo #这里根据设备号来添加
如果想让主机上的 osd-id 连续,可以修改 site.yml 源代码,增加 serial(https://docs.ansible.com/ansible/latest/user_guide/playbooks_delegation.html)
- hosts: osds serial: 1
如果想设置 crush rule 和 crush device-class 这块,推荐不要配置,让 osd 自动识别 class,之后如有需求,再手工修改。
如果还有其他需求,可以修改 group_vars 下其他 yml
ansible-playbook -i hosts site.yml
TIPS:
stable-3.2 的某些脚本陈旧,比如 shrink-osd.yml,只支持 ceph-disk,最新支持ceph-volume 的版本(来自 master 分支)
如果过程中报类似 containerized_deployment 变量不存在,建议直接-e 指定,而不是去修改 group_vars/all.yml
sudo ansible-playbook -i hosts shrink-osd.yml -e osd_to_kill=0,1,2,3 -e cluster=ceph -e docker_exec_cmd="" -e containerized_deployment=False
删除后,还需要手动删除 lvm 信息;最好不要一次性删除过多 osd,该脚本还不是 stable 状态。
第一步:修改 group_vars/osd.yml 添加新 osd 设备;
第二步:
ansible-playbook -i hosts site.yml
或者
修改 infrastructure-playbooks/add-osd.yml:
- hosts: osds
serial: 1
然后复制到 ceph-ansible 目录下
ansible-playbook -i hosts add-osd.yml
ceph dashboard create-self-signed-cert
ceph dashboard set-login-credentials <username> <password>
ceph config set mgr mgr/dashboard/mgr-hostname/server_addr mgr-hostip
默认使用https://mgr-ip:8443访问
hosts 文件添加 rgws
此处示例作为 secondary,不是 master,master 配置不一样
确认 group_vars/all.yml 内容
radosgw_civetweb_port: 7480
radosgw_civetweb_num_threads: 200
radosgw_interface: bond1
#############
# MULTISITE #
#############
rgw_multisite: True
# The following Multi-site related variables should be set by the user.
# rgw_zone is set to "default" to enable compression for clusters configured without rgw multi-site
# If multisite is configured rgw_zone should not be set to "default". See README-MULTISITE.md for an example.
rgw_zone: zone-bim01
rgw_zonemaster: false
rgw_zonesecondary: true
rgw_multisite_endpoint_addr: "{{ ansible_fqdn }}"
rgw_zonegroup: master # should be set by the user
rgw_zone_user: zone.user
rgw_realm: default # should be set by the user
system_access_key: ****************** # should be re-created by the user
system_secret_key: *********************************** # should be re-created by the user
# Multi-site remote pull URL variables
rgw_pull_port: "7480"
rgw_pull_proto: "http"
rgw_pullhost: 192.168.x.xx # rgw_pullhost only needs to be declared if there is a zone secondary. It should be the same as rgw_multisite_endpoint_addr for the master cluster
从Luminous开始,仅有以下池
确认 group_vars/rgws.yml
rgw_create_pools:
.rgw.root:
pg_num: 32
size: "2"
application: "rgw"
zone-bim01.rgw.control:
pg_num: 32
size: "2"
application: "rgw"
zone-bim01.rgw.log:
pg_num: 32
size: "2"
application: "rgw"
zone-bim01.rgw.meta:
pg_num: 32
size: "2"
application: "rgw"
zone-bim01.rgw.buckets.index:
pg_num: 32
size: "2"
application: "rgw"
zone-bim01.rgw.buckets.data:
pg_num: 512
size: "2"
application: "rgw"
#如果不设置size,就会用default size
创建 multisite 前,会创建 rgw 因此会产生 default.rgw.control、default.rgw.meta、default.rgw.log 也可以在此处编辑。
执行部署:
ansible-playbook -i hosts site.yml
查看 multisite 状态:
radosgw-admin sync status