Ganeti Debootstrap

These scripts are only useful if installing a VM with the deboostrap method of ganeti (not useful if installing from an ISO for instance).

/etc/ganeti/instance-debootstrap.d/ :

Munin

Kvm plugins

Configuration

Allow anyone to query munin node:

  • etc/munin/munin-node.conf

    diff -r 8373493dd9cf etc/munin/munin-node.conf
    a b  
    3333# network notation unless the perl module Net::CIDR is installed.  You 
    3434# may repeat the allow line as many times as you'd like 
    3535 
    36 allow ^127\.0\.0\.1$ 
     36allow ^.*$ 
    3737 
    3838# If you have installed the Net::CIDR perl module, you can use 
    3939# multiple cidr_allow and cidr_deny address/mask patterns.  A 

Aggregates views

  • wget -O /usr/lib/cgi-bin/muninAggregator.pl http://munin-monitoring.org/attachment/wiki/WishList/muninAggregator.3.pl
  • chmod +x /usr/lib/cgi-bin/muninAggregator.pl
  • cd /etc/munin ; patch :
    • templates/munin-overview.tmpl

      diff -r 25ef042b684c templates/munin-overview.tmpl
      a b  
      2121 <ul> 
      2222  <li><span class="domain"><a href="<TMPL_VAR NAME="DOMAIN">/index.html"><TMPL_VAR ESCAPE="HTML" NAME="DOMAIN"></a></span><TMPL_IF NAME="COMPARE"> :: [ <a href="<TMPL_VAR NAME="DOMAIN">/comparison-day.html">day</a> <a href="<TMPL_VAR NAME="DOMAIN">/comparison-week.html">week</a> <a href="<TMPL_VAR NAME="DOMAIN">/comparison-month.html">month</a> <a href="<TMPL_VAR NAME="DOMAIN">/comparison-year.html">year</a> ]</TMPL_IF> 
      2323      <ul>       
       24       <li>Aggregate views :: [ <a href="/cgi-bin/muninAggregator.pl?refresh=1&nodeGroup=<TMPL_VAR NAME="DOMAIN">&graphType=if_eth0&timeScale=day&go=go">Network/eth0</a>&nbsp; 
       25        <a href="/cgi-bin/muninAggregator.pl?refresh=1&nodeGroup=<TMPL_VAR NAME="DOMAIN">&graphType=if_eth1&timeScale=day&go=go">Network/eth1</a>&nbsp; 
       26        <a href="/cgi-bin/muninAggregator.pl?refresh=1&nodeGroup=<TMPL_VAR NAME="DOMAIN">&graphType=cpu&timeScale=day&go=go">CPU</a>&nbsp; 
       27        <a href="/cgi-bin/muninAggregator.pl?refresh=1&nodeGroup=<TMPL_VAR NAME="DOMAIN">&graphType=kvm_cpu&timeScale=day&go=go">KVM/cpu</a>&nbsp; 
       28        <a href="/cgi-bin/muninAggregator.pl?refresh=1&nodeGroup=<TMPL_VAR NAME="DOMAIN">&graphType=kvm_io&timeScale=day&go=go">KVM/io</a>&nbsp; 
       29        <a href="/cgi-bin/muninAggregator.pl?refresh=1&nodeGroup=<TMPL_VAR NAME="DOMAIN">&graphType=kvm_mem&timeScale=day&go=go">KVM/mem</a>&nbsp; 
       30        <a href="/cgi-bin/muninAggregator.pl?refresh=1&nodeGroup=<TMPL_VAR NAME="DOMAIN">&graphType=kvm_net&timeScale=day&go=go">KVM/net</a>&nbsp; 
       31        <a href="/cgi-bin/muninAggregator.pl?refresh=1&nodeGroup=<TMPL_VAR NAME="DOMAIN">">Custom</a>] 
       32        </li> 
      2433        <TMPL_LOOP NAME="NODES"> 
      2534        <li><span class="host"><a href="<TMPL_VAR NAME="DOMAIN">/<TMPL_VAR NAME="NODE">.html"><TMPL_VAR ESCAPE="HTML" NAME="NODE"></a></span> ::  
      2635        [ <TMPL_LOOP NAME="CATEGORIES"><a <TMPL_IF NAME="STATE_WARNING">class="warn"</TMPL_IF> <TMPL_IF NAME="STATE_CRITICAL">class="crit"</TMPL_IF> href="<TMPL_VAR NAME="DOMAIN">/<TMPL_VAR NAME="NODE">.html#<TMPL_VAR NAME="NAME">"><TMPL_VAR ESCAPE="HTML" NAME="NAME"></a> </TMPL_LOOP>]</li> 

Nagios

DRBD

http://code.google.com/p/ganeti/wiki/DrbdDevicesMonitoring with attachment:check_drbd (already patched according to the HOWTO) on each node

Ganeti redundancy

On the ganeti master (z2-3.host.gnt in the nagios command below)

  • wget -O /usr/local/bin/check_ganeti http://trac.fsffrance.org/raw-attachment/wiki/PatchInventory/check_ganeti
  • append the following where 1.1.1.1 is the IP of the nagios host
    from="1.1.1.1",command="/usr/local/bin/check_ganeti" ssh-rsa AAAAB3NzaC1yc2EAAAABIwA...
    
  • add a nagios command
    define command{
            command_name    check_ganeti_redundancy
            command_line    /usr/lib/nagios/plugins/check_by_ssh -i /etc/nagios3/id_rsa -H z2-3.host.gnt -l root -C "/usr/local/bin/check_ganeti \
    $HOSTADDRESS$"
    }
    
  • add the nagios ganeti-service definition (note the longer check_interval : 600 instead of 5
    # generic ganeti service template definition
    define service{
            name                            ganeti-service; The 'name' of this service template
            active_checks_enabled           1       ; Active service checks are enabled
            passive_checks_enabled          1       ; Passive service checks are enabled/accepted
            parallelize_check               1       ; Active service checks should be parallelized (disabling this can lead to major performance problems\
    )
            obsess_over_service             1       ; We should obsess over this service (if necessary)
            check_freshness                 0       ; Default is to NOT check service 'freshness'
            notifications_enabled           1       ; Service notifications are enabled
            event_handler_enabled           1       ; Service event handler is enabled
            flap_detection_enabled          1       ; Flap detection is enabled
            failure_prediction_enabled      1       ; Failure prediction is enabled
            process_perf_data               1       ; Process performance data
            retain_status_information       1       ; Retain status information across program restarts
            retain_nonstatus_information    1       ; Retain non-status information across program restarts
                    notification_interval           0               ; Only send notifications on status change by default.
                    is_volatile                     0
                    check_period                    24x7
                    normal_check_interval           600
                    retry_check_interval            30
                    max_check_attempts              4
                    notification_period             24x7
                    notification_options            w,u,c,r
                    contact_groups                  admins
            register                        0       ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
            }
    
  • for each ganeti node N add
    define service {
      host_name             N.host.gnt
      service_description   ganeti
      check_command         check_ganeti_redundancy
      use                   ganeti-service
    }
    

drupal menu

  • attachment:check_menu in /usr/lib/nagios/plugins : checks that drupal menu is ok. Nagios corresponding config :
    define command{
    	command_name    check_poker_menu
    	command_line    /usr/lib/nagios/plugins/check_menu $ARG1$ $ARG2$ $HOSTADDRESS$
    }
    

pokersocial

  • attachment:check_pokersocial in /usr/lib/nagios/plugins : test POKER_REST with pokersocial. Nagios corresponding config :
    define command{
    	command_name    poker_rest_social_drupal6
    	command_line    /usr/lib/nagios/plugins/check_pokersocial -H $HOSTADDRESS$ --container /drupal6
    }
    

recipe #311

Attachments