Skip to content

Commit f923434

Browse files
authored
Fixing race condition during restore process (#248)
* Fixing race condition during restore process * Ditch log
1 parent e61589a commit f923434

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

internal/flypg/restore.go

+28
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
const (
1515
pathToHBAFile = "/data/postgresql/pg_hba.conf"
1616
pathToHBABackup = "/data/postgresql/pg_hba.conf.bak"
17+
postmasterPath = "/data/postgresql/postmaster.pid"
1718
restoreLockFile = "/data/restore.lock"
1819
)
1920

@@ -85,6 +86,12 @@ func prepareRemoteRestore(ctx context.Context, node *Node) error {
8586

8687
svisor.Stop()
8788

89+
// Wait for the postmaster to exit
90+
// TODO - This should be done in the supervisor
91+
if err := waitForPostmasterExit(ctx); err != nil {
92+
return fmt.Errorf("failed to wait for postmaster to exit: %s", err)
93+
}
94+
8895
// Set the lock file so the init process knows not to restart
8996
// the restore process.
9097
if err := setRestoreLock(); err != nil {
@@ -98,6 +105,27 @@ func prepareRemoteRestore(ctx context.Context, node *Node) error {
98105
return nil
99106
}
100107

108+
func waitForPostmasterExit(ctx context.Context) error {
109+
ticker := time.NewTicker(1 * time.Second)
110+
timeout := time.After(10 * time.Second)
111+
defer ticker.Stop()
112+
for {
113+
select {
114+
case <-ticker.C:
115+
switch _, err := os.Stat(postmasterPath); {
116+
case os.IsNotExist(err):
117+
return nil
118+
case err != nil:
119+
return fmt.Errorf("error checking postmaster file: %v", err)
120+
}
121+
case <-timeout:
122+
return fmt.Errorf("timed out waiting for postmaster to exit")
123+
case <-ctx.Done():
124+
return ctx.Err()
125+
}
126+
}
127+
}
128+
101129
func isRestoreActive() (bool, error) {
102130
if _, err := os.Stat(restoreLockFile); err == nil {
103131
val, err := os.ReadFile(restoreLockFile)

0 commit comments

Comments
 (0)