#!/bin/sh ################################################################################ # # This file is part of the vhacs project. # Jerome Martin # based on RA by SUSE LINUX Products GmbH, Lars Marowsky-Bree # ################################################################################ # # ################################################################################ # # OCF instance parameters: # OCF_RESKEY_drbd_resource # OCF_RESKEY_drbdconf # OCF_RESKEY_CRM_meta_clone_max # OCF_RESKEY_CRM_meta_clone_node_max # OCF_RESKEY_master_max # OCF_RESKEY_master_node_max # ################################################################################ # # Source common vhacs OCF RA code. . ${OCF_ROOT}/resource.d/vhacs/.ocf-vhacs # ################################################################################ rsc_meta_data() { cat <<- EOF 1.1 Master/Slave OCF Resource Agent for DRBD This resource agent manages a Distributed Replicated Block Device (DRBD) object as a master/slave resource. DRBD is a mechanism for replicating storage; please see the documentation for setup details. The name of the drbd resource from the drbd.conf file. drbd resource name Full path to the drbd.conf file. Path to drbd.conf Number of clones of this drbd resource. Do not fiddle with the default. Number of clones Clones per node. Do not fiddle with the default. Number of nodes Maximum number of active primaries. Do not fiddle with the default. Number of primaries Maximum number of primaries per node. Do not fiddle with the default. Number of primaries per node EOF } rsc_start() { # Start the drbd device #ocf_lock_RA ocf_cmd "${DRBDADM} up ${RESOURCE}" || return ${OCF_ERR_GENERIC} #ocf_unlock_RA # If there where no errors, return success return ${OCF_SUCCESS} } rsc_stop() { # Clear preference for becoming master ocf_log info "rsc_stop trying to delete preference for becoming master" ocf_cmd "${CRM_MASTER} -D" # Shutdown the drbd device ocf_log info "rsc_stop trying to shut down drbd device ${RESOURCE}" #ocf_lock_RA ocf_cmd "${DRBDADM} down ${RESOURCE}" #ocf_unlock_RA # We've done all we could to stop that resource, let monitor check it return ${OCF_SUCCESS} } rsc_monitor() { if [ ${DRBD_MINOR} -lt 10 ]; then DRBD_MINOR_PADDED=" ${DRBD_MINOR}" else DRBD_MINOR_PADDED="${DRBD_MINOR}" fi DRBD_CSTATE=$(awk "/^${DRBD_MINOR_PADDED}:/ {print \$2}" /proc/drbd | sed 's/cs://g' | tr -d '\n') DRBD_STATE_LOCAL=$(awk "/^${DRBD_MINOR_PADDED}:/ {print \$3}" /proc/drbd | sed 's/st://g' | awk -F / '{print $1}' | tr -d '\n') DRBD_STATE_REMOTE=$(awk "/^${DRBD_MINOR_PADDED}:/ {print \$3}" /proc/drbd | awk -F / '{print $2}' | tr -d '\n') ocf_log info "${RESOURCE} status: local state is ${DRBD_STATE_LOCAL}, remote state is ${DRBD_STATE_REMOTE} and cstate is ${DRBD_CSTATE}" # Sanitize the various states, drbdadm is quite annoying; so if it # outputs something which doesn't make sense, translate it into # a harmless state: case "${DRBD_STATE_LOCAL}" in "Not configured"|"Primary"|"Secondary") ;; *) DRBD_STATE_LOCAL="Not configured" ;; esac case "${DRBD_STATE_REMOTE}" in "Primary"|"Secondary"|"Unknown") ;; *) DRBD_STATE_REMOTE="Not configured" ;; esac case "${DRBD_CSTATE}" in cs:Unconfigured|cs:StandAlone|cs:Unconnected|cs:Timeout|cs:BrokenPipe) ;; cs:NetworkFailure|cs:WFConnection|cs:WFReportParams|cs:Connected|cs:SkippedSyncS) ;; cs:SkippedSyncT|cs:WFBitMapS|cs:WFBitMapT|cs:SyncSource|cs:SyncTarget) ;; cs:PausedSyncS|cs:PausedSyncT) ;; *) DRBD_CSTATE="Unconfigured" ;; esac case ${DRBD_STATE_LOCAL} in "Not configured") return ${OCF_NOT_RUNNING} ;; "Primary") ocf_cmd ${CRM_MASTER} -v 100 return ${OCF_RUNNING_MASTER} ;; "Secondary") ocf_cmd ${CRM_MASTER} -v 100 return ${OCF_SUCCESS} ;; *) ocf_log err "${RESOURCE} monitor: unexpected local state: ${DRBD_STATE_LOCAL}" return ${OCF_ERR_GENERIC} ;; esac } rsc_promote() { # Promote the device to primary ocf_log info "rsc_promote trying to promote drbd device ${RESOURCE} to primary" ocf_cmd "${DRBDADM} primary ${RESOURCE}" || return ${OCF_ERR_GENERIC} # If there where no errors, return success return ${OCF_SUCCESS} } rsc_demote() { # Demote the device to secondary ocf_cmd "${DRBDADM} secondary ${RESOURCE}" || return ${OCF_ERR_GENERIC} # If there where no errors, return success return ${OCF_SUCCESS} } rsc_notify() { local n_type="${OCF_RESKEY_CRM_meta_notify_type}" local n_op="${OCF_RESKEY_CRM_meta_notify_operation}" set -- ${OCF_RESKEY_CRM_meta_notify_active_resource} local n_active="$#" set -- ${OCF_RESKEY_CRM_meta_notify_stop_resource} local n_stop="$#" set -- ${OCF_RESKEY_CRM_meta_notify_start_resource} local n_start="$#" ocf_log info "${RESOURCE} notify: ${n_type} for ${n_op} - counts: active ${n_active} - starting ${n_start} - stopping ${n_stop}" case ${n_type} in pre) case ${n_op} in promote) # TODO: # Resist promotion of the other side in case we # are already primary - though the CRM should # not even attempt that. ;; esac ;; post) # TODO: Entire case statement which follows redundant? case ${n_op} in start) if [ "${n_active}" -eq 2 ]; then # The other side is running, so we ought # to connect and wait for that. ${DRBDADM} connect ${RESOURCE} ${DRBDADM} wait_connect ${RESOURCE} # TODO: If this can cause a hang if the # other side isn't connected or goes # away during that, maybe just sleep # here for 5-10s or take out the entire # case statement fi ;; stop) # TODO BUG: disconnect seems to force # non-primary mode?!? #### ${DRBDADM} disconnect $RESOURCE # TODO: If we are secondary, do we need to do # anything about a stopped primary in case we # had an outdated flag...? ;; esac ;; esac return ${OCF_SUCCESS} } rsc_validate_all () { # First check the configuration file if [ -n "${DRBDCONF}" ] && [ ! -f "${DRBDCONF}" ]; then ocf_log err "Configuration file does not exist: ${DRBDCONF}" return ${OCF_ERR_INSTALLED} fi # Check the resource name, it should appear in DRBDCONF if [ -z "${RESOURCE}" ]; then ocf_log err "No resource name specified!" return ${OCF_ERR_ARGS} fi if ${DRBDADM} dump ${RESOURCE} 2>/dev/null ; then : else ocf_log err "Invalid configuration file ${DRBDCONF}" return ${OCF_ERR_CONFIGURED} fi if [ "${OCF_RESKEY_CRM_meta-clone-max}" -ne 2 ] \ || [ "${OCF_RESKEY_CRM-meta-clone-node_max}" -ne 1 ] \ || [ "${OCF_RESKEY_master-node-max}" -ne 1 ] \ || [ "${OCF_RESKEY_master-max}" -ne 1 ] ; then ocf_log err "Clone options misconfigured." exit ${OCF_ERR_CONFIGURED} fi return ${OCF_SUCCESS} } rsc_setup() { CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot " RESOURCE="${OCF_RESKEY_drbd_resource}" CLONE_NO="${OCF_RESKEY_CRM_meta_clone}" DRBDCONF="${OCF_RESKEY_drbdconf:=/etc/drbd.conf}" DRBDADM="/sbin/drbdadm -c ${DRBDCONF}" if [ ! -f "${DRBDCONF}" ]; then return ${OCF_ERR_INSTALLED} fi # determine the drbd device number from the config file # this works with vhacs-generated drbd config files, # with only one single resource defined and device parameter # within 4 lines of node definition start DRBD_MAJOR=147 DRBD_MINOR=$(grep -A 4 "on $(uname -n)" ${DRBDCONF} | awk '/device/ {print $2}' | \ tr -d ';' | sed 's/[a-zA-Z/]*\([0-9]*\)/\1/' | tr -d '\n') [ -z "${DRBD_MINOR}" ] && return ${OCF_ERR_GENERIC} ocf_log info "drbd device is ${DRBD_MAJOR}:${DRBD_MINOR}" return ${OCF_SUCCESS} } ################################################################################ # # Call the main ocf RA logic # ################################################################################ ocf_main ${1}