Sunday, April 13, 2014

Hadoop zookeeper connections CDH4

I'm currently running two hadoop clusters  (Cloudera CDH4) and both suffer from the same problem, stale zookeeper connections that choke it to death.
After digging around, I've located two culprits. One is the thrift hive server we run manually for Tableau connection and the other is Hue.
To make a long story short, here's my solution:

#!/bin/bash
# Function check thrift process id
check_pid() {
        pid=`cat ~/thrift.pid`
    #echo $pid
    ps -A|grep $pid|grep java > /dev/null 2>&1
        result=$?
}


# Function restart Hue service
restart_hue() {
    curl -X POST -u 'admin:admin' http://localhost:7180/api/v2/clusters/TEST%20-%20CDH4/services/hue1/commands/restart  >/dev/null 2>&1
}


# Function kill thrift server
kill_thrift() {
        kill $pid
        sleep 2
        check_pid
        if [ "$result" == 0 ]; then {
                kill -9 $pid
                sleep 2
        }
        fi
    check_pid
    if [ "$result" == 0 ]; then {
        echo "Error: Failed to kill server"
        exit 1
        }
    fi
}


# Sanity
if [ ! -f ~/thrift.pid ]; then {
    echo "Error: thrift process id file ~/thrift.pid not found."
    exit 1
}
fi


if [ `whoami` != "admin" ]; then {
    echo "Error: you are not logged in or executing as user admin, please do so."
    exit 1
}
fi


# Main
restart_hue
result=$?
if [ "$result" -gt 0 ]; then {
        echo Error restarting Hue service, please check cluster integrity.
        exit 1
}
fi
zookeeper_connections=`echo srvr|nc localhost 2181|grep -i connections`
result=$?
if [ "$result" -gt 0 ]; then {
    echo Error: no reply from Zookeeper.
    exit 1
}
fi
zookeeper_connections=${zookeeper_connections##*: }
#echo $zookeeper_connections
if [ $zookeeper_connections -gt 100 ]; then {
    echo "Zookeeper has $zookeeper_connections open conenctions."
    check_pid
    if [ "$result" -gt 0 ]; then {
        echo "Error: can not find thrift server PID"
        exit 1
    }
    fi   
    echo "Found Thrift server, restarting"
    kill_thrift
    export HIVE_PORT=10001
    hive --service hiveserver &
    pid=$!
    echo $pid> ~/thrift.pid
    return=$?
}
else {
    echo "Zookeeper has $zookeeper_connections open conenctions."
}
fi
exit $return
This script is executed every morning via crontab to keep zookeeper from choking. It's been running happily for a few months now :)

1 comment: