job waiting refactoring

This commit is contained in:
Anton Chuchkalov 2016-03-22 22:39:31 +03:00
parent 48333a9097
commit 39253e8605
5 changed files with 89 additions and 41 deletions

View File

@ -64,8 +64,9 @@ module Devops
ERROR_CODES.key(integer_code) || :unknown_error ERROR_CODES.key(integer_code) || :unknown_error
end end
def self.bootstrap_errors_reasons def self.error_occured_during_bootstrap?(code)
[:server_bootstrap_fail, :server_not_in_chef_nodes, :server_bootstrap_unknown_error] reason = reason_from_error_code(code)
[:server_bootstrap_fail, :server_not_in_chef_nodes, :server_bootstrap_unknown_error].include?(reason)
end end
def error_code(reason) def error_code(reason)

View File

@ -0,0 +1,35 @@
require 'workers/helpers/job_waiter'
require "db/mongo/models/report"
RSpec.describe JobWaiter, stubbed_connector: true do
let(:job_waiter) { described_class.new('job_id') }
before do
@report_double = instance_double(Devops::Model::Report)
allow(stubbed_connector).to receive(:report) { @report_double }
allow(job_waiter).to receive(:sleep)
end
it 'it returns 0 when job become completed' do
allow(@report_double).to receive(:status) { 'completed' }
expect(job_waiter.wait).to eq 0
end
it 'returns error code when job failes' do
allow(@report_double).to receive(:status) { 'failed' }
allow(@report_double).to receive(:job_result_code) { 1 }
expect(job_waiter.wait).to eq 1
end
it 'sleeps until something happens' do
allow(@report_double).to receive(:status).and_return('running', 'running', 'running', 'completed')
expect(job_waiter).to receive(:sleep).exactly(4).times
job_waiter.wait
end
it 'raises JobWaiter::TimeoutReached if nothing happens for too long' do
allow(@report_double).to receive(:status) { 'running' }
expect { job_waiter.wait }.to raise_error(JobWaiter::TimeoutReached)
end
end

View File

@ -18,7 +18,7 @@ RSpec.describe StackServersBootstrapper, stubbed_connector: true do
allow(stubbed_connector).to receive(:report) do |subreport_id| allow(stubbed_connector).to receive(:report) do |subreport_id|
subreport_id == '100' ? subreport1 : subreport2 subreport_id == '100' ? subreport1 : subreport2
end end
allow(bootstrapper).to receive(:sleep) allow(bootstrapper).to receive(:get_bootstrap_result) { 0 }
end end
it 'start bootstrap workers' do it 'start bootstrap workers' do
@ -31,27 +31,26 @@ RSpec.describe StackServersBootstrapper, stubbed_connector: true do
bootstrap! bootstrap!
end end
it 'waits for job to end' do it 'delegates waiting to JobWaiter' do
allow(subreport1).to receive(:status).and_return('running', 'running', 'running', 'completed') allow(bootstrapper).to receive(:get_bootstrap_result).and_call_original
allow(subreport2).to receive(:status).and_return('running', 'running', 'running', 'completed') allow_any_instance_of(JobWaiter).to receive(:wait) { 0 }
expect(bootstrapper).to receive(:sleep).exactly(2*4).times expect_any_instance_of(JobWaiter).to receive(:wait)
bootstrap! bootstrapper.bootstrap(build_list(:server, 1))
end end
it 'raises StackServerBootstrapError if an error occured during bootstrap' do it 'raises StackServerBootstrapError if an error occured during bootstrap' do
allow(subreport1).to receive(:status) {'failed'} allow(bootstrapper).to receive(:get_bootstrap_result) { 2 }
allow(subreport1).to receive(:job_result_code) { Devops::Executor::ServerExecutor.error_code(:server_bootstrap_fail) }
expect { bootstrap! }.to raise_error StackServerBootstrapError expect { bootstrap! }.to raise_error StackServerBootstrapError
end end
it 'raises StackServerDeployError if an error occured during deploy' do it 'raises StackServerDeployError if an error occured during deploy' do
allow(subreport1).to receive(:status) {'failed'} allow(bootstrapper).to receive(:get_bootstrap_result) { 8 }
allow(subreport1).to receive(:job_result_code) { Devops::Executor::ServerExecutor.error_code(:deploy_failed) }
expect { bootstrap! }.to raise_error StackServerDeployError expect { bootstrap! }.to raise_error StackServerDeployError
end end
it "raises StackServerBootstrapDeployTimeout if bootstrap and deploy hasn't been finished in 5000 seconds" do it "raises StackServerBootstrapDeployTimeout if bootstrap and deploy hasn't been finished in 5000 seconds" do
allow(subreport1).to receive(:status) {'running'} allow(bootstrapper).to receive(:get_bootstrap_result).and_call_original
allow_any_instance_of(JobWaiter).to receive(:wait) { raise JobWaiter::TimeoutReached }
expect { bootstrap! }.to raise_error StackServerBootstrapDeployTimeout expect { bootstrap! }.to raise_error StackServerBootstrapDeployTimeout
end end
end end

View File

@ -0,0 +1,23 @@
class JobWaiter
class TimeoutReached < StandardError; end
INTERVAL = 5
def initialize(job_id, timeout=5000)
@job_id, @timeout = job_id, timeout
end
def wait
(@timeout / INTERVAL).times do
sleep(INTERVAL)
report = ::Devops::Db.connector.report(@job_id)
case report.status
when Worker::STATUS::COMPLETED
return 0
when Worker::STATUS::FAILED
return report.job_result_code
end
end
raise TimeoutReached
end
end

View File

@ -1,5 +1,6 @@
require 'workers/bootstrap_worker' require 'workers/bootstrap_worker'
require "workers/stack_bootstrap/errors" require 'workers/stack_bootstrap/errors'
require 'workers/helpers/job_waiter'
class StackServersBootstrapper class StackServersBootstrapper
include PutsAndFlush include PutsAndFlush
@ -17,46 +18,35 @@ class StackServersBootstrapper
::Devops::Db.connector.add_report_subreports(@jid, servers_jobs_ids.values) ::Devops::Db.connector.add_report_subreports(@jid, servers_jobs_ids.values)
out.puts out.puts
servers_jobs_ids.each do |server_id, subreport_id| servers_jobs_ids.each do |server_id, job_id|
job_result_code = wait_for_job(server_id, subreport_id) bootstrap_result_code = get_bootstrap_result(server_id, job_id)
check_job_result!(server_id, job_result_code) check_bootstrap_result!(server_id, bootstrap_result_code, job_id)
end end
puts_and_flush "Stack servers have been bootstraped" puts_and_flush "Stack servers have been bootstraped"
end end
private private
def check_job_result!(server_id, job_result_code) def check_bootstrap_result!(server_id, result_code, job_id)
return if job_result_code == 0 if result_code == 0
puts_and_flush "Server '#{server_id}' has been bootstraped (job #{job_id})."
return
end
reason = Devops::Executor::ServerExecutor.reason_from_error_code(job_result_code) reason = Devops::Executor::ServerExecutor.reason_from_error_code(result_code)
puts_and_flush "Operation result for #{server_id}: #{reason}" puts_and_flush "Server '#{server_id}' bootstraped failed (job #{job_id}). Reason: #{reason}"
if error_occured_during_bootstrap?(reason) if Devops::Executor::ServerExecutor.error_occured_during_bootstrap?(result_code)
raise StackServerBootstrapError # will cause rollback of a stack raise StackServerBootstrapError # will cause rollback of a stack
else else
raise StackServerDeployError #will not cause rollback of a stack raise StackServerDeployError # will not cause rollback of a stack
end end
end end
def error_occured_during_bootstrap?(reason) def get_bootstrap_result(server_id, job_id)
Devops::Executor::ServerExecutor.bootstrap_errors_reasons.include?(reason) JobWaiter.new(job_id).wait
end rescue JobWaiter::TimeoutReached
puts_and_flush "Waiting for job #{job_id} halted: timeout reached."
def wait_for_job(server_id, subreport_id)
1000.times do
sleep(5)
subreport = ::Devops::Db.connector.report(subreport_id)
case subreport.status
when Worker::STATUS::COMPLETED
puts_and_flush "Server '#{server_id}' has been bootstraped with job #{subreport_id}"
return 0
when Worker::STATUS::FAILED
puts_and_flush "Server '#{server_id}' hasn't been bootstraped with job #{subreport_id}. Job result code is '#{subreport.job_result_code}'"
return subreport.job_result_code
end
end
puts_and_flush "Waiting for job #{subreport_id} halted: timeout reached."
raise StackServerBootstrapDeployTimeout raise StackServerBootstrapDeployTimeout
end end