From patchwork Thu Aug 18 02:27:12 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Michael-Doyle Hudson X-Patchwork-Id: 3497 Return-Path: X-Original-To: patchwork@peony.canonical.com Delivered-To: patchwork@peony.canonical.com Received: from fiordland.canonical.com (fiordland.canonical.com [91.189.94.145]) by peony.canonical.com (Postfix) with ESMTP id 0DEB623E54 for ; Thu, 18 Aug 2011 02:27:16 +0000 (UTC) Received: from mail-ew0-f52.google.com (mail-ew0-f52.google.com [209.85.215.52]) by fiordland.canonical.com (Postfix) with ESMTP id 0290DA18760 for ; Thu, 18 Aug 2011 02:27:15 +0000 (UTC) Received: by mail-ew0-f52.google.com with SMTP id 28so858840ewy.11 for ; Wed, 17 Aug 2011 19:27:15 -0700 (PDT) Received: by 10.213.32.131 with SMTP id c3mr1573839ebd.94.1313634435549; Wed, 17 Aug 2011 19:27:15 -0700 (PDT) X-Forwarded-To: linaro-patchwork@canonical.com X-Forwarded-For: patch@linaro.org linaro-patchwork@canonical.com Delivered-To: patches@linaro.org Received: by 10.213.102.5 with SMTP id e5cs52602ebo; Wed, 17 Aug 2011 19:27:15 -0700 (PDT) Received: by 10.216.178.142 with SMTP id f14mr115295wem.85.1313634433757; Wed, 17 Aug 2011 19:27:13 -0700 (PDT) Received: from indium.canonical.com (indium.canonical.com [91.189.90.7]) by mx.google.com with ESMTPS id 46si4441794wel.92.2011.08.17.19.27.13 (version=TLSv1/SSLv3 cipher=OTHER); Wed, 17 Aug 2011 19:27:13 -0700 (PDT) Received-SPF: pass (google.com: best guess record for domain of bounces@canonical.com designates 91.189.90.7 as permitted sender) client-ip=91.189.90.7; Authentication-Results: mx.google.com; spf=pass (google.com: best guess record for domain of bounces@canonical.com designates 91.189.90.7 as permitted sender) smtp.mail=bounces@canonical.com Received: from ackee.canonical.com ([91.189.89.26]) by indium.canonical.com with esmtp (Exim 4.71 #1 (Debian)) id 1QtsK9-0000ar-1Y for ; Thu, 18 Aug 2011 02:27:13 +0000 Received: from ackee.canonical.com (localhost [127.0.0.1]) by ackee.canonical.com (Postfix) with ESMTP id F24A6E0304 for ; Thu, 18 Aug 2011 02:27:12 +0000 (UTC) MIME-Version: 1.0 X-Launchpad-Project: lava-scheduler X-Launchpad-Branch: ~linaro-validation/lava-scheduler/trunk X-Launchpad-Message-Rationale: Subscriber X-Launchpad-Branch-Revision-Number: 64 X-Launchpad-Notification-Type: branch-revision To: Linaro Patch Tracker From: noreply@launchpad.net Subject: [Branch ~linaro-validation/lava-scheduler/trunk] Rev 64: If the connection to the DB fails or drops for the scheduler daemon, close the Message-Id: <20110818022712.28308.88849.launchpad@ackee.canonical.com> Date: Thu, 18 Aug 2011 02:27:12 -0000 Reply-To: noreply@launchpad.net Sender: bounces@canonical.com Errors-To: bounces@canonical.com Precedence: bulk X-Generated-By: Launchpad (canonical.com); Revision="13697"; Instance="initZopeless config overlay" X-Launchpad-Hash: c62be6707809a28dcc5fd04e6679d6d8a5043d00 Merge authors: Michael Hudson-Doyle (mwhudson) ------------------------------------------------------------ revno: 64 [merge] committer: Michael-Doyle Hudson branch nick: trunk timestamp: Thu 2011-08-18 14:21:39 +1200 message: If the connection to the DB fails or drops for the scheduler daemon, close the connection so that we attempt to reopen it on the next access. Also improve the errbacks that handle errors to log the traceback as well. modified: lava_scheduler_daemon/board.py lava_scheduler_daemon/dbjobsource.py lava_scheduler_daemon/service.py --- lp:lava-scheduler https://code.launchpad.net/~linaro-validation/lava-scheduler/trunk You are subscribed to branch lp:lava-scheduler. To unsubscribe from this branch go to https://code.launchpad.net/~linaro-validation/lava-scheduler/trunk/+edit-subscription === modified file 'lava_scheduler_daemon/board.py' --- lava_scheduler_daemon/board.py 2011-08-16 04:07:08 +0000 +++ lava_scheduler_daemon/board.py 2011-08-18 02:19:55 +0000 @@ -199,7 +199,9 @@ self._maybeStartJob, self._ebCheckForJob) def _ebCheckForJob(self, result): - self.logger.exception(result.value) + self.logger.error( + '%s: %s\n%s', result.type.__name__, result.value, + result.getTraceback()) self._maybeStartJob(None) def _finish_stop(self): === modified file 'lava_scheduler_daemon/dbjobsource.py' --- lava_scheduler_daemon/dbjobsource.py 2011-08-17 03:09:36 +0000 +++ lava_scheduler_daemon/dbjobsource.py 2011-08-18 02:19:55 +0000 @@ -3,8 +3,10 @@ import logging from django.core.files.base import ContentFile +from django.db import connection from django.db import IntegrityError, transaction from django.db.models import Q +from django.db.utils import DatabaseError from twisted.internet.threads import deferToThread @@ -13,6 +15,13 @@ from lava_scheduler_app.models import Device, TestJob from lava_scheduler_daemon.jobsource import IJobSource +try: + from psycopg2 import InterfaceError, OperationalError +except ImportError: + class InterfaceError(Exception): + pass + class OperationalError(Exception): + pass class DatabaseJobSource(object): @@ -24,8 +33,28 @@ def getBoardList_impl(self): return [d.hostname for d in Device.objects.all()] + def deferForDB(self, func, *args, **kw): + def wrapper(*args, **kw): + try: + return func(*args, **kw) + except (DatabaseError, OperationalError, InterfaceError), error: + message = str(error) + if message == 'connection already closed' or \ + message.startswith( + 'terminating connection due to administrator command') or \ + message.startswith( + 'could not connect to server: Connection refused'): + self.logger.warning( + 'Forcing reconnection on next db access attempt') + if connection.connection: + if not connection.connection.closed: + connection.connection.close() + connection.connection = None + raise + return deferToThread(wrapper, *args, **kw) + def getBoardList(self): - return deferToThread(self.getBoardList_impl) + return self.deferForDB(self.getBoardList_impl) @transaction.commit_manually() def getJobForBoard_impl(self, board_name): @@ -79,7 +108,7 @@ return None def getJobForBoard(self, board_name): - return deferToThread(self.getJobForBoard_impl, board_name) + return self.deferForDB(self.getJobForBoard_impl, board_name) @transaction.commit_on_success() def jobCompleted_impl(self, board_name): @@ -94,7 +123,7 @@ job.save() def jobCompleted(self, board_name): - return deferToThread(self.jobCompleted_impl, board_name) + return self.deferForDB(self.jobCompleted_impl, board_name) @transaction.commit_on_success() def jobOobData_impl(self, board_name, key, value): @@ -106,5 +135,4 @@ device.current_job.save() def jobOobData(self, board_name, key, value): - return deferToThread(self.jobOobData_impl, board_name, key, value) - + return self.deferForDB(self.jobOobData_impl, board_name, key, value) === modified file 'lava_scheduler_daemon/service.py' --- lava_scheduler_daemon/service.py 2011-07-27 06:59:33 +0000 +++ lava_scheduler_daemon/service.py 2011-08-17 05:33:25 +0000 @@ -21,7 +21,12 @@ def _updateBoards(self): self.logger.debug("Refreshing board list") - return self.source.getBoardList().addCallback(self._cbUpdateBoards) + def _eb(failure): + self.logger.error( + '%s: %s\n%s', failure.type.__name__, failure.value, + failure.getTraceback()) + return self.source.getBoardList().addCallback( + self._cbUpdateBoards).addErrback(_eb) def _cbUpdateBoards(self, board_names): if set(board_names) == set(self.boards):