#!/usr/bin/env ruby # A (wannabe) load balancer for Xen host # based on mcollective, and inspired by VmWare features require 'mcollective' require 'csv' include MCollective::RPC mc = rpcclient("xenagent") mc.progress = false @config = { :interval => 30, # polling interval :load_threshold => 0.4, # cpu max load :daemonize => false, # do we go in background ? :max_over => 1, # max MINUTES where load is over threshold. # this should be _at least_ interval*2 :debug => true, # being verbose :max_vm_per_host => 10, # try not to go over 9000^Wthis many VMs per host :max_load_candidate => 0.8, # if we reach this load we're # no more eligible as a host for vms :host_mapping => "/etc/mcollective/xen-mappings.csv" # the hypervisor name / IP mapping } @load_counter = {} @domu_times = {} @domu_counter = {} @hypervisors = {} def load_hypervisors() CSV.read(@config[:host_mapping]).find_all { |r| @hypervisors[r[0].to_s]=r[1].to_s } end def debug(msg) if @config[:debug] then puts "[+] " + msg end end def log(msg) puts msg end def choose_from_times(hostname) if !@domu_times_used.empty? then highest = @domu_times_used.sort_by { |v| v[1] } debug("VM key : " + highest[-1][0]) debug("Time consumed in a run (interval is "+@config[:interval].to_s+"s) : " + highest[-1][1].to_s) domu_name = highest[-1][0].to_s.sub(/^#{hostname}-/,"") else nil end end loop do load_hypervisors() mc.stat.each do |resp| # clean up time consumption at each loop @domu_times_used = {} # know how many VMs each host has (may be used when migrating) @domu_counter[resp[:sender]]=resp[:data][:slices].count debug(resp[:sender] + " : " + resp[:data][:load].to_s + " load and " + @domu_counter[resp[:sender]].to_s + " slice(s) running") # Do we hit the limit ? if resp[:data][:load] > @config[:load_threshold] then if @load_counter[resp[:sender]] then @load_counter[resp[:sender]] +=1 else # init/reset it, we want load spikes to be consecutives @load_counter[resp[:sender]] = 1 end else # if no reinit counter @load_counter[resp[:sender]] = 0 debug("init/reset load counter for " + resp[:sender]) end # store & calculate domU time consumption if !resp[:data][:times].empty? then resp[:data][:times].each_pair { |k,v| v=v.to_f key=resp[:sender]+"-"+k # does not have a value for this domU on this dom0 if !@domu_times.has_key?(key) then @domu_times[key]=v @domu_times_used[key]=0 debug("added #{k} on #{resp[:sender]} with 0 CPU time (registered #{@domu_times[key]} as a reference)") else # we have one, calculate time consumption @domu_times_used[key]=v-@domu_times[key] @domu_times[key]=v debug("updated #{k} on #{resp[:sender]} with #{@domu_times_used[key]} CPU time eaten (registered #{@domu_times[key]} as a reference)") end } else debug("#{resp[:sender]} has no slices consuming CPU time") end end # End of mc.stat loop # Time for analysis & decision @load_counter.each_pair { |k,v| # Yes, this machine has reached the limit if v > (@config[:max_over]*60)/@config[:interval] then debug("#{k} has #{v} threshold overload") debug("Time to see if we can migrate a VM from #{k}") vm=choose_from_times(k) # now look for a new home if vm != nil # Let's exclude those that don't match config criterias # first, max vms : host_list=@domu_counter.map { |d| if d[1] < @config[:max_vm_per_host] then d[0] end } host_list.delete(nil) # drop nil host_list.delete(k) # drop ourself if !host_list.empty? and @config[:debug] then host_list.each { |h| debug("#{h} is a candidate for being a host (step 1 : max VMs)") } end # next, max load load_excluded = @load_counter.map { |l| if l[1] > @config[:max_load_candidate] then l[0] end } host_list -= load_excluded if !host_list.empty? and @config[:debug] then host_list.each { |h| debug("#{h} is a candidate for being a host (step 2 : max load)") } # there is at least 1 host, so let's sort by load and take the first one new_host = @load_counter.sort_by { |h| h[1] }[0][0] if @hypervisors.has_key? new_host then new_host_ip = @hypervisors[new_host] else puts "FIXME : implement DNS resolution" exit! end log("trying to migrate #{vm} from #{k} to #{new_host} (#{new_host_ip})") # We create a new client, with filters mc_migration = rpcclient("xenagent") mc_migration.progress = false mc_migration.verbose = false mc_migration.fact_filter("hostname",k) result = mc_migration.migrate(:slice => vm, :newhost => new_host_ip) if result[0][:data][:result] == true then log("Successfully migrated #{vm} !") else log("Failed to migrate #{vm}. You should take a look") end # VM migrated, reset load_counter @load_counter[k]=0 # drop times values @domu_times.delete(vm) @domu_times_used.delete(vm) else debug("no more candidates, doing nothing...") end else debug("could not guess the name of the VM to migrate, weird") exit! 1 end end } debug("sleeping for " + @config[:interval].to_s + " seconds") log("") sleep(@config[:interval]) end