Skip to content

Commit

Permalink
Fix timeline verification before acquiring leader lock (#28)
Browse files Browse the repository at this point in the history
  • Loading branch information
munakoiso committed Jul 18, 2024
1 parent 8c3f94d commit b687fa1
Showing 1 changed file with 9 additions and 5 deletions.
14 changes: 9 additions & 5 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ def primary_iter(self, db_state, zk_state):
# We shouldn't try to acquire leader lock if our current timeline is incorrect
if self.zk.get_current_lock_holder() is None:
# Make sure local timeline corresponds to that of the cluster.
if not self._verify_timeline(db_state, zk_state, just_check=True):
if not self._verify_timeline(db_state, zk_state, without_leader_lock=True):
return None

if not self.zk.try_acquire_lock():
Expand Down Expand Up @@ -900,7 +900,7 @@ def _update_single_node_status(self, role):
else:
self._is_single_node = self.zk.exists_path(self.zk.SINGLE_NODE_PATH)

def _verify_timeline(self, db_state, zk_state, just_check=False):
def _verify_timeline(self, db_state, zk_state, without_leader_lock=False):
"""
Make sure current timeline corresponds to the rest of the cluster (@ZK).
Save timeline and some related info into zk
Expand All @@ -915,14 +915,16 @@ def _verify_timeline(self, db_state, zk_state, just_check=False):
# If it does, but there is no info on replicas,
# close local PG instance.
if tli_res:
if not just_check and zk_state['replics_info_written'] is False:
if zk_state.get('replics_info_written') is False:
logging.error('Some error with ZK.')
# Actually we should never get here but checking it just in case.
# Here we should end iteration and check and probably close primary
# at the begin of primary_iter
return None
# If ZK does not have timeline info, write it.
elif zk_state[self.zk.TIMELINE_INFO_PATH] is None and not just_check:
elif zk_state[self.zk.TIMELINE_INFO_PATH] is None:
if without_leader_lock:
return True
logging.warning('Could not get timeline from ZK. Saving it.')
self.zk.write(self.zk.TIMELINE_INFO_PATH, db_state['timeline'])
# If there is a mismatch in timeline:
Expand All @@ -946,7 +948,9 @@ def _verify_timeline(self, db_state, zk_state, just_check=False):
#
time.sleep(10 * self.config.getfloat('global', 'iteration_timeout'))
return None
elif zk_tli and zk_tli < db_tli and not just_check:
elif zk_tli and zk_tli < db_tli:
if without_leader_lock:
return True
logging.warning('Timeline in ZK is older than ours. Updating it it ZK.')
self.zk.write(self.zk.TIMELINE_INFO_PATH, db_tli)
logging.debug('Timeline verification succeeded')
Expand Down

0 comments on commit b687fa1

Please sign in to comment.