job waiting refactoring

This commit is contained in:
Anton Chuchkalov 2016-03-22 22:39:31 +03:00
parent 48333a9097
commit 39253e8605
5 changed files with 89 additions and 41 deletions

View File

@ -64,8 +64,9 @@ module Devops
ERROR_CODES.key(integer_code) || :unknown_error
end
def self.bootstrap_errors_reasons
[:server_bootstrap_fail, :server_not_in_chef_nodes, :server_bootstrap_unknown_error]
def self.error_occured_during_bootstrap?(code)
reason = reason_from_error_code(code)
[:server_bootstrap_fail, :server_not_in_chef_nodes, :server_bootstrap_unknown_error].include?(reason)
end
def error_code(reason)

View File

@ -0,0 +1,35 @@
require 'workers/helpers/job_waiter'
require "db/mongo/models/report"
RSpec.describe JobWaiter, stubbed_connector: true do
let(:job_waiter) { described_class.new('job_id') }
before do
@report_double = instance_double(Devops::Model::Report)
allow(stubbed_connector).to receive(:report) { @report_double }
allow(job_waiter).to receive(:sleep)
end
it 'it returns 0 when job become completed' do
allow(@report_double).to receive(:status) { 'completed' }
expect(job_waiter.wait).to eq 0
end
it 'returns error code when job failes' do
allow(@report_double).to receive(:status) { 'failed' }
allow(@report_double).to receive(:job_result_code) { 1 }
expect(job_waiter.wait).to eq 1
end
it 'sleeps until something happens' do
allow(@report_double).to receive(:status).and_return('running', 'running', 'running', 'completed')
expect(job_waiter).to receive(:sleep).exactly(4).times
job_waiter.wait
end
it 'raises JobWaiter::TimeoutReached if nothing happens for too long' do
allow(@report_double).to receive(:status) { 'running' }
expect { job_waiter.wait }.to raise_error(JobWaiter::TimeoutReached)
end
end

View File

@ -18,7 +18,7 @@ RSpec.describe StackServersBootstrapper, stubbed_connector: true do
allow(stubbed_connector).to receive(:report) do |subreport_id|
subreport_id == '100' ? subreport1 : subreport2
end
allow(bootstrapper).to receive(:sleep)
allow(bootstrapper).to receive(:get_bootstrap_result) { 0 }
end
it 'start bootstrap workers' do
@ -31,27 +31,26 @@ RSpec.describe StackServersBootstrapper, stubbed_connector: true do
bootstrap!
end
it 'waits for job to end' do
allow(subreport1).to receive(:status).and_return('running', 'running', 'running', 'completed')
allow(subreport2).to receive(:status).and_return('running', 'running', 'running', 'completed')
expect(bootstrapper).to receive(:sleep).exactly(2*4).times
bootstrap!
it 'delegates waiting to JobWaiter' do
allow(bootstrapper).to receive(:get_bootstrap_result).and_call_original
allow_any_instance_of(JobWaiter).to receive(:wait) { 0 }
expect_any_instance_of(JobWaiter).to receive(:wait)
bootstrapper.bootstrap(build_list(:server, 1))
end
it 'raises StackServerBootstrapError if an error occured during bootstrap' do
allow(subreport1).to receive(:status) {'failed'}
allow(subreport1).to receive(:job_result_code) { Devops::Executor::ServerExecutor.error_code(:server_bootstrap_fail) }
allow(bootstrapper).to receive(:get_bootstrap_result) { 2 }
expect { bootstrap! }.to raise_error StackServerBootstrapError
end
it 'raises StackServerDeployError if an error occured during deploy' do
allow(subreport1).to receive(:status) {'failed'}
allow(subreport1).to receive(:job_result_code) { Devops::Executor::ServerExecutor.error_code(:deploy_failed) }
allow(bootstrapper).to receive(:get_bootstrap_result) { 8 }
expect { bootstrap! }.to raise_error StackServerDeployError
end
it "raises StackServerBootstrapDeployTimeout if bootstrap and deploy hasn't been finished in 5000 seconds" do
allow(subreport1).to receive(:status) {'running'}
allow(bootstrapper).to receive(:get_bootstrap_result).and_call_original
allow_any_instance_of(JobWaiter).to receive(:wait) { raise JobWaiter::TimeoutReached }
expect { bootstrap! }.to raise_error StackServerBootstrapDeployTimeout
end
end

View File

@ -0,0 +1,23 @@
class JobWaiter
class TimeoutReached < StandardError; end
INTERVAL = 5
def initialize(job_id, timeout=5000)
@job_id, @timeout = job_id, timeout
end
def wait
(@timeout / INTERVAL).times do
sleep(INTERVAL)
report = ::Devops::Db.connector.report(@job_id)
case report.status
when Worker::STATUS::COMPLETED
return 0
when Worker::STATUS::FAILED
return report.job_result_code
end
end
raise TimeoutReached
end
end

View File

@ -1,5 +1,6 @@
require 'workers/bootstrap_worker'
require "workers/stack_bootstrap/errors"
require 'workers/stack_bootstrap/errors'
require 'workers/helpers/job_waiter'
class StackServersBootstrapper
include PutsAndFlush
@ -17,46 +18,35 @@ class StackServersBootstrapper
::Devops::Db.connector.add_report_subreports(@jid, servers_jobs_ids.values)
out.puts
servers_jobs_ids.each do |server_id, subreport_id|
job_result_code = wait_for_job(server_id, subreport_id)
check_job_result!(server_id, job_result_code)
servers_jobs_ids.each do |server_id, job_id|
bootstrap_result_code = get_bootstrap_result(server_id, job_id)
check_bootstrap_result!(server_id, bootstrap_result_code, job_id)
end
puts_and_flush "Stack servers have been bootstraped"
end
private
def check_job_result!(server_id, job_result_code)
return if job_result_code == 0
def check_bootstrap_result!(server_id, result_code, job_id)
if result_code == 0
puts_and_flush "Server '#{server_id}' has been bootstraped (job #{job_id})."
return
end
reason = Devops::Executor::ServerExecutor.reason_from_error_code(job_result_code)
puts_and_flush "Operation result for #{server_id}: #{reason}"
reason = Devops::Executor::ServerExecutor.reason_from_error_code(result_code)
puts_and_flush "Server '#{server_id}' bootstraped failed (job #{job_id}). Reason: #{reason}"
if error_occured_during_bootstrap?(reason)
if Devops::Executor::ServerExecutor.error_occured_during_bootstrap?(result_code)
raise StackServerBootstrapError # will cause rollback of a stack
else
raise StackServerDeployError # will not cause rollback of a stack
end
end
def error_occured_during_bootstrap?(reason)
Devops::Executor::ServerExecutor.bootstrap_errors_reasons.include?(reason)
end
def wait_for_job(server_id, subreport_id)
1000.times do
sleep(5)
subreport = ::Devops::Db.connector.report(subreport_id)
case subreport.status
when Worker::STATUS::COMPLETED
puts_and_flush "Server '#{server_id}' has been bootstraped with job #{subreport_id}"
return 0
when Worker::STATUS::FAILED
puts_and_flush "Server '#{server_id}' hasn't been bootstraped with job #{subreport_id}. Job result code is '#{subreport.job_result_code}'"
return subreport.job_result_code
end
end
puts_and_flush "Waiting for job #{subreport_id} halted: timeout reached."
def get_bootstrap_result(server_id, job_id)
JobWaiter.new(job_id).wait
rescue JobWaiter::TimeoutReached
puts_and_flush "Waiting for job #{job_id} halted: timeout reached."
raise StackServerBootstrapDeployTimeout
end