fluke/devops-service/lib/executors/server_executor.rb

544 lines
19 KiB
Ruby

require "lib/knife/knife_factory"
require "lib/executors/expiration_scheduler"
require "hooks"
require 'net/ssh'
module Devops
module Executor
class ServerExecutor
include Hooks
ERROR_CODES = {
server_bootstrap_fail: 2,
server_cannot_update_tags: 3,
server_bootstrap_private_ip_unset: 4,
server_not_in_chef_nodes: 5,
server_bootstrap_unknown_error: 7,
deploy_unknown_error: 6,
deploy_failed: 8,
creating_server_unknown_error: 9,
creating_server_in_cloud_failed: 10
}
# waiting for 5*60 seconds (5 min)
MAX_SSH_RETRIES_AMOUNT = 60
#params:
# out - container for output data
# deploy_info - hash with deploy data
define_hook :before_deploy
define_hook :after_deploy
define_hook :before_create
define_hook :after_create
#params:
# out - container for output data
define_hook :before_bootstrap
define_hook :after_bootstrap
before_deploy :add_run_list_to_deploy_info
attr_accessor :server, :deploy_env, :report, :project
def initialize server, out, options={}
if server
@project = Devops::Db.connector.project(server.project)
@deploy_env = @project.deploy_env(server.deploy_env)
end
@server = server
@out = out
@out.class.send(:define_method, :flush) { } unless @out.respond_to?(:flush)
@current_user = options[:current_user]
end
def self.error_code(reason)
ERROR_CODES.fetch(reason)
end
def self.reason_from_error_code(integer_code)
ERROR_CODES.key(integer_code) || :unknown_error
end
def self.bootstrap_errors_reasons
[:server_bootstrap_fail, :server_not_in_chef_nodes, :server_bootstrap_unknown_error]
end
def error_code(reason)
self.class.error_code(reason)
end
def create_server_object options
Devops::Model::Server.new({
"project" => @project.id,
"deploy_env" => @deploy_env.identifier,
"created_by" => options["created_by"],
"provider" => @deploy_env.provider,
"provider_account" => @deploy_env.provider_account,
"private_ip" => options["private_ip"]
})
end
def create_server options
@server = create_server_object(options)
@server.set_last_operation(Devops::Model::Server::OperationType::CREATION, options["created_by"])
provider = @deploy_env.provider_instance
mongo = ::Devops::Db.connector
begin
@out.puts "Using '#{@deploy_env.provider}' account '#{@deploy_env.provider_account}'\n" if @deploy_env.provider_account
@out.puts "Create server..."
@out.flush
@server.run_list = options["run_list"] || []
@server.chef_node_name = options["name"]
@server.key = options["key"] || provider.ssh_key
i = mongo.image(@deploy_env.image)
@server.remote_user = i.remote_user
res = {}
@out << "\nBefore create hooks...\n"
res[:before] = self.run_hook :before_create
@out << "Done\n"
unless provider.create_server(@server, @deploy_env.image, @deploy_env.flavor, @deploy_env.subnets, @deploy_env.groups, @out)
return error_code(:creating_server_in_cloud_failed)
end
mongo.server_insert @server
@out << "\nAfter create hooks...\n"
res[:after] = self.run_hook :after_create
@out << "Done\n"
@out.flush
DevopsLogger.logger.info "Server with parameters: #{@server.to_hash.inspect} is running"
schedule_expiration()
unless options["without_bootstrap"]
bootstrap_options = {
bootstrap_template: i.bootstrap_template
}
return two_phase_bootstrap(bootstrap_options)
else
return 0
end
rescue => e
@out.puts e.message
@out.puts e.backtrace.join("\n")
DevopsLogger.logger.error e.message
roll_back
mongo.server_delete @server.id
error_code(:creating_server_unknown_error)
end
end
# options:
# :run_list (optional)
# :bootstrap_template (optional)
# :chef_environment (optional)
# :config (optional)
def bootstrap options
@out << "\n\nBootstrap...\n"
@out.flush
k = Devops::Db.connector.key(@server.key)
cert_path = k.path
options[:cert_path] = k.path
@out << "\nBefore bootstrap hooks...\n"
res = self.run_hook(:before_bootstrap, @out)
@out << "Done\n"
if @server.private_ip.nil?
@out << "Error: Private IP is null"
return error_code(:server_bootstrap_private_ip_unset)
end
ja = {
:provider => @server.provider,
:provider_account => @server.provider_account,
:devops_host => `hostname`.strip
}
ip = @server.private_ip
unless @server.public_ip.nil?
ip = @server.public_ip
@out << "\nPublic IP is present\n"
end
address = "#{@server.remote_user}@#{ip}"
cmd = check_ssh_command(cert_path, address)
@out << "\nWaiting for SSH..."
@out << "\nTest command: '#{cmd}'\n"
@out.flush
retries_amount = 0
begin
sleep(5)
res = execute_system_command(cmd)
retries_amount += 1
if retries_amount >= MAX_SSH_RETRIES_AMOUNT
@out.puts "Can not connect to #{address}"
@out.puts res
@out.flush
DevopsLogger.logger.error "Can not connect with command '#{cmd}':\n#{res}"
return error_code(:server_bootstrap_fail)
end
raise ArgumentError.new("Can not connect with command '#{cmd}' ") unless last_command_successful?
rescue ArgumentError => e
@out.puts "SSH command failed, retry (#{retries_amount}/#{MAX_SSH_RETRIES_AMOUNT})"
@out.flush
retry
end
provider = @server.provider_instance
@server.chef_node_name = provider.create_default_chef_node_name(@server) if @server.chef_node_name.nil?
r = knife_instance.knife_bootstrap(@out, ip, self.bootstrap_options(ja, options))
if r == 0
@out << "Chef node name: #{@server.chef_node_name}\n"
::Devops::Db.connector.server_set_chef_node_name @server
@out << "Chef node name has been updated\n"
@out << "After bootstrap hooks...\n"
res = self.run_hook(:after_bootstrap, @out)
@out << "Done\n"
msg = "Server with id '#{@server.id}' is bootstraped"
@out.puts msg
@out.flush
DevopsLogger.logger.info msg
r
else
@out << "Can not bootstrap node '#{@server.id}', error code: #{r}"
@out.flush
error_code(:server_bootstrap_fail)
end
end
# options:
# :cert_path (required)
# :run_list (optional)
# :bootstrap_template (optional)
# :chef_environment (optional)
# :config (optional)
def bootstrap_options attributes, options
bootstrap_options = [
"-x #{@server.remote_user}",
"-i #{options[:cert_path]}",
"--json-attributes '#{attributes.to_json}'"
]
bootstrap_options.push "-N #{@server.chef_node_name}" if @server.chef_node_name
bootstrap_options.push "--sudo" unless @server.remote_user == "root"
bootstrap_options.push "-t #{options[:bootstrap_template]}" if options[:bootstrap_template]
bootstrap_options.push "-E #{options[:chef_environment]}" if options[:chef_environment]
rl = options[:run_list]
bootstrap_options.push "-r #{rl.join(",")}" unless rl.nil? or rl.empty?
bootstrap_options.push "-c #{options[:config]}" if options[:config]
bootstrap_options
end
def prepare_two_phase_bootstrap options
@out << "Prepare bootstrap...\n"
@out << "Done\n"
end
# essentially, it just bootstrap and then deploy
def two_phase_bootstrap options
prepare_two_phase_bootstrap(options)
# bootstrap phase
begin
provider = @server.provider_instance
mongo = ::Devops::Db.connector
options[:run_list] = provider.run_list
bootstrap_status = bootstrap(options)
if bootstrap_status == 0
if check_server_on_chef_server
@out << "Server #{@server.chef_node_name} is created"
else
@out.puts "Can not find client or node on chef-server"
roll_back
@out.flush
mongo.server_delete @server.id
return error_code(:server_not_in_chef_nodes)
end
else
# @out << roll_back
# mongo.server_delete @server.id
msg = "Failed while bootstraping server with id '#{@server.id}'\n"
msg << "Bootstraping operation result was #{bootstrap_status}"
DevopsLogger.logger.error msg
@out.puts msg
@out.flush
return error_code(:server_bootstrap_fail)
end
rescue => e
@out << "\nError: #{e.message}\n"
@out.flush
return error_code(:server_bootstrap_unknown_error)
end
# deploy phase. Assume that all servers are bootstraped successfully here.
begin
run_list = compute_run_list
@out << "\n\nComputed run list: #{run_list.join(", ")}"
@out.flush
knife_instance.set_run_list(@server.chef_node_name, run_list)
deploy_info = options[:deploy_info] || @project.deploy_info(@deploy_env)
deploy_status = deploy_server(deploy_info)
if deploy_status == 0
0
else
msg = "Failed on chef-client with run list, server with id '#{@server.id}'"
msg << "\nDeploing server operation status was #{deploy_status}"
DevopsLogger.logger.error msg
@out << "\n" + msg + "\n"
error_code(:deploy_failed)
end
rescue => e
@out << "\nError: #{e.message}\n"
DevopsLogger.logger.error(e.message + "\n" + e.backtrace.join("\n"))
error_code(:deploy_unknown_error)
end
end
def check_server_on_chef_server
knife_instance.chef_node_list.include?(@server.chef_node_name) and knife_instance.chef_client_list.include?(@server.chef_node_name)
end
# returns a hash with :chef_node, :chef_client and :server keys
def unbootstrap
k = Devops::Db.connector.key(@server.key)
cert_path = k.path
i = 0
res = delete_from_chef_server(@server.chef_node_name)
begin
new_name = "/etc/chef.backup_#{Time.now.strftime("%d-%m-%Y_%H.%M.%S")}"
cmd = (@server.remote_user == 'root' ? "" : "sudo ")
cmd = cmd + "/bin/sh -c 'if [[ -d /etc/chef ]]; then mv /etc/chef #{new_name} && echo ok; else echo not found; fi'"
DevopsLogger.logger.info("SSH: trying to run command '#{cmd}'")
Net::SSH.start(@server.private_ip, @server.remote_user, :keys => [cert_path]) do |session|
session.open_channel do |channel|
channel.request_pty(:modes => { Net::SSH::Connection::Term::ECHO => 0 }) do |c, success|
raise "could not request pty" unless success
channel.exec cmd
channel.on_data do |c_, data|
if data == 'not found'
res[:server] = "Directory '/etc/chef' does not exists"
elsif data == 'ok'
res[:server] = "'/etc/chef' renamed to '#{new_name}'"
else
DevopsLogger.logger.error "Unexpected error: " + data
raise(data)
end
end
end
end
end
rescue => e
DevopsLogger.logger.error "Unbootstrap error: " + e.message
i += 1
sleep(1)
retry unless i == 5
return {error: e.message}
end
res
end
def deploy_server_with_tags tags, deploy_info
return deploy_server(deploy_info) if tags.empty?
old_tags_str = knife_instance.tags_list(@server.chef_node_name).join(" ")
new_tags_str = tags.join(" ")
@out.puts "Temporarily changing tags (#{old_tags_str}) to (#{new_tags_str})"
unless knife_instance.swap_tags(@server.chef_node_name, old_tags_str, new_tags_str)
m = "Error: Cannot add tags '#{new_tags_str}' to server '#{@server.chef_node_name}'"
DevopsLogger.logger.error(m)
@out.puts m
return error_code(:server_cannot_update_tags)
end
DevopsLogger.logger.info("Set tags for '#{@server.chef_node_name}': #{new_tags_str}")
begin
deploy_result = deploy_server deploy_info
ensure
@out.puts "Restoring tags"
knife_instance.swap_tags(@server.chef_node_name, new_tags_str, old_tags_str)
DevopsLogger.logger.info("Restoring tags for #{@server.chef_node_name}: from #{new_tags_str} back to (#{old_tags_str})")
end
deploy_result
end
def deploy_server deploy_info
@out << "\nBefore deploy hooks...\n"
res = self.run_hook(:before_deploy, @out, deploy_info)
@out << "Done\n"
@out << "\nRun chef-client on '#{@server.chef_node_name}'\n"
cmd = "chef-client --no-color"
if deploy_info["use_json_file"]
deploy_info.delete("use_json_file")
json = nil
dir = DevopsConfig.config[:project_info_dir]
file = deploy_info.delete("json_file") || "#{@server.project}_#{@server.deploy_env}_#{Time.new.to_i}"
path = File.join(dir, file)
if File.exists?(path)
json = File.read(path)
else
json = JSON.pretty_generate(deploy_info)
File.open(File.join(dir, file), "w") do |f|
f.write json
end
end
@out.puts "Deploy Input Parameters:"
@out.puts json
@out.flush
cmd << " -j http://#{DevopsConfig.config[:address]}:#{DevopsConfig.config[:port]}/#{DevopsConfig.config[:url_prefix]}/v2.0/deploy/data/#{file}"
else
if @deploy_env.chef_client_options
cmd << " #{@deploy_env.chef_client_options}"
else
cmd << " -r #{deploy_info["run_list"].join(",")}" unless @server.stack.nil?
end
end
ip = if @server.public_ip.nil?
@server.private_ip
else
@out << "Public IP detected\n"
@server.public_ip
end
@out.flush
k = Devops::Db.connector.key(@server.key)
lline = knife_instance.ssh_stream(@out, cmd, ip, @server.remote_user, k.path)
r = /Chef\sClient\sfinished/i
if lline && lline[r]
@out << "\nAfter deploy hooks...\n"
res = self.run_hook(:after_deploy, @out, deploy_info)
@out << "Done\n"
@server.set_last_operation(Devops::Model::Server::OperationType::DEPLOY, @current_user)
Devops::Db.connector.server_update(@server)
0
else
@out << "An error occured during knife command executing"
1
end
end
def delete_from_chef_server node_name
{
:chef_node => knife_instance.chef_node_delete(node_name),
:chef_client => knife_instance.chef_client_delete(node_name)
}
end
def delete_server
mongo = ::Devops::Db.connector
if @server.static?
if !@server.chef_node_name.nil?
unbootstrap
end
mongo.server_delete @server.id
puts_and_flush "Static server '#{@server.id}' is removed"
return 0
end
puts_and_flush "Deleting from chef server:"
delete_from_chef_server(@server.chef_node_name).each do |key, result|
@out.puts "#{key} - #{result}"
end
puts_and_flush "Deleting from cloud:"
provider = @server.provider_instance
begin
puts_and_flush provider.delete_server @server
rescue Fog::Compute::OpenStack::NotFound, Fog::Compute::AWS::NotFound
puts_and_flush "Server with id '#{@server.id}' not found among '#{provider.name}' servers"
end
mongo.server_delete @server.id
puts_and_flush "Server '#{@server.id}' with name '#{@server.chef_node_name}' for project '#{@server.project}-#{@server.deploy_env}' is removed."
0
end
def roll_back
@out.puts "Trying to roll back..."
unless @server.id.nil?
@out.puts "Server '#{@server.chef_node_name}' with id '#{@server.id}' is not created"
@out.puts delete_from_chef_server(@server.chef_node_name).values.join("\n")
begin
@out.puts @server.provider_instance.delete_server(@server)
rescue => e
@out.puts e.message
end
@out << "\nRolled back\n"
end
end
def add_run_list_to_deploy_info out, deploy_info
out << "\nGenerate run list hook...\n"
if deploy_info["run_list"]
out << "Deploy info already contains 'run_list': #{deploy_info["run_list"].join(", ")}\n"
return
end
out << "Project run list: #{@project.run_list.join(", ")}\n"
out << "Deploy environment run list: #{@deploy_env.run_list.join(", ")}\n"
out << "Server run list: #{@server.run_list.join(", ")}\n"
deploy_info["run_list"] = compute_run_list
out << "New deploy run list: #{deploy_info["run_list"].join(", ")}\nRun list has been generated\n\n"
end
def compute_run_list
rlist = []
[@deploy_env.provider_instance.run_list, @project.run_list, @deploy_env.run_list, @server.run_list].each do |sub_run_list|
rlist += sub_run_list if sub_run_list
end
if @server.stack
stack = Devops::Db.connector.stack(@server.stack)
srl = stack.run_list
rlist += srl if srl
end
rlist.uniq
end
private
def puts_and_flush(message)
@out.puts message
@out.flush
end
def schedule_expiration
if @deploy_env.expires
@out << "Planning expiration in #{@deploy_env.expires}"
ExpirationScheduler.new(@deploy_env.expires, @server).schedule_expiration!
end
end
def check_ssh_command(cert_path, address)
cmd = 'ssh '
cmd << "-i #{cert_path} "
cmd << '-q '
cmd << '-o StrictHostKeyChecking=no '
cmd << '-o ConnectTimeout=2 -o ConnectionAttempts=1 '
cmd << "#{address} 'exit'"
cmd << " 2>&1"
cmd
end
def execute_system_command(cmd)
`#{cmd}`
end
def last_command_successful?
$?.success?
end
def knife_instance
@knife_instance ||= KnifeFactory.instance
end
end
end
end