diff --git a/cmd/metacli/main.go b/cmd/metacli/main.go index 20706d2..140bde6 100644 --- a/cmd/metacli/main.go +++ b/cmd/metacli/main.go @@ -35,15 +35,19 @@ func main() { defer db.Close() if *start { - metadata.StartMetadata(metadata.NoCloudConfig{ + if err := metadata.StartMetadata(metadata.NoCloudConfig{ VpcName: *vpc, Name: *vm_name, BindIP: *bind_ip, BindPort: *bind_port, Password: *password, SSHKEY: *ssh_key, - }, db, *dryrun) + }, db, *dryrun); err != nil { + fmt.Println(err) + } } else if *stop { - metadata.StopMetadata(*vm_name, db, *dryrun) + if err := metadata.StopMetadata(*vm_name, db, *dryrun); err != nil { + fmt.Println(err) + } } } diff --git a/internal/metadata/handle.go b/internal/metadata/handle.go index 33ac088..9ec5486 100644 --- a/internal/metadata/handle.go +++ b/internal/metadata/handle.go @@ -1,26 +1,40 @@ package metadata import ( + "fmt" + "git.g3e.fr/syonad/two/pkg/systemd" "github.com/dgraph-io/badger/v4" ) -func StartMetadata(config NoCloudConfig, db *badger.DB, dryrun bool) { - service, _ := systemd.New() +func StartMetadata(config NoCloudConfig, db *badger.DB, dryrun bool) error { + service, err := systemd.New() + if err != nil { + return fmt.Errorf("failed to connect to systemd: %w", err) + } defer service.Close() LoadNcCloudInDB(config, db) if !dryrun { - service.Start("metadata@" + config.Name) + if err := service.Start("metadata@" + config.Name + ".service"); err != nil { + return fmt.Errorf("failed to start metadata@%s: %w", config.Name, err) + } } + return nil } -func StopMetadata(vm_name string, db *badger.DB, dryrun bool) { - service, _ := systemd.New() +func StopMetadata(vm_name string, db *badger.DB, dryrun bool) error { + service, err := systemd.New() + if err != nil { + return fmt.Errorf("failed to connect to systemd: %w", err) + } defer service.Close() UnLoadNoCloudInDB(vm_name, db) if !dryrun { - service.Stop("metadata@" + vm_name) + if err := service.Stop("metadata@" + vm_name + ".service"); err != nil { + return fmt.Errorf("failed to stop metadata@%s: %w", vm_name, err) + } } + return nil } diff --git a/internal/netns/call_other.go b/internal/netns/call_other.go index 14a8924..1b5794e 100644 --- a/internal/netns/call_other.go +++ b/internal/netns/call_other.go @@ -2,6 +2,6 @@ package netns -func call(name string, fn func() error) error { +func call(_ string, fn func() error) error { return fn() } diff --git a/internal/netns/create_linux.go b/internal/netns/create_linux.go index 8ee0afa..51c739a 100644 --- a/internal/netns/create_linux.go +++ b/internal/netns/create_linux.go @@ -3,12 +3,17 @@ package netns import ( + "fmt" "os" + "runtime" "golang.org/x/sys/unix" ) func create(name string) error { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + base := "/var/run/netns" path := base + "/" + name @@ -16,6 +21,12 @@ func create(name string) error { return err } + // si le fichier existe déjà, le démonter d'abord + if _, err := os.Stat(path); err == nil { + unix.Unmount(path, unix.MNT_DETACH) + os.Remove(path) + } + // fichier cible f, err := os.Create(path) if err != nil { @@ -35,9 +46,12 @@ func create(name string) error { return err } - // bind mount du netns courant vers /var/run/netns/ + // bind mount du netns du thread courant vers /var/run/netns/ + // /proc/self/ns/net pointe vers le ns du processus (thread principal), + // pas du thread courant — il faut utiliser le tid explicitement + threadNsPath := fmt.Sprintf("/proc/self/task/%d/ns/net", unix.Gettid()) if err := unix.Mount( - "/proc/self/ns/net", + threadNsPath, path, "", unix.MS_BIND, diff --git a/internal/vpc/create.go b/internal/vpc/create.go index 3c3ad87..a6e6aaa 100644 --- a/internal/vpc/create.go +++ b/internal/vpc/create.go @@ -22,7 +22,7 @@ func CreateVPC(db *badger.DB, name string) error { } // create veth public for this netns - if err := netif.CreateVethToNetns("veth"+name+"ext", "vethpublicint", "/var/run/netns/"+name, 9000); err != nil { + if err := netif.CreateVethToNetns("vp-"+name+"-e", "vp-public-i", "/var/run/netns/"+name, 9000); err != nil { return err } @@ -34,24 +34,24 @@ func CreateVPC(db *badger.DB, name string) error { } // set veth to ext public bridge - if err := netif.BridgeSetMaster("veth"+name+"ext", "br-public"); err != nil { + if err := netif.BridgeSetMaster("vp-"+name+"-e", "br-public"); err != nil { return err } // set veth to int public bridge if err := netns.Call(name, func() error { - return netif.BridgeSetMaster("vethpublicint", "br-public") + return netif.BridgeSetMaster("vp-public-i", "br-public") }); err != nil { return err } // set set ext veth up - if err := netif.LinkSetUp("veth" + name + "ext"); err != nil { - return nil + if err := netif.LinkSetUp("vp-" + name + "-e"); err != nil { + return err } // set set int veth up if err := netns.Call(name, func() error { - return netif.LinkSetUp("vethpublicint") + return netif.LinkSetUp("vp-public-i") }); err != nil { return err } diff --git a/internal/vpc/delete.go b/internal/vpc/delete.go index a05e807..10c863d 100644 --- a/internal/vpc/delete.go +++ b/internal/vpc/delete.go @@ -12,7 +12,7 @@ func DeleteVPC(db *badger.DB, name string) error { if state, err := kv.GetFromDB(db, "vpc/"+name+"/state"); err != nil { return err } else if state == "deleting" { - if err := netif.DeleteLink(name + "-ext"); err != nil { + if err := netif.DeleteLink("vp-" + name + "-e"); err != nil { return err } diff --git a/pkg/db/kv/init.go b/pkg/db/kv/init.go index cc09cb0..677c656 100644 --- a/pkg/db/kv/init.go +++ b/pkg/db/kv/init.go @@ -6,7 +6,8 @@ import ( func InitDB(conf Config, readonly bool) *badger.DB { opts := badger.DefaultOptions(conf.Path). - WithReadOnly(readonly) + WithReadOnly(readonly). + WithBypassLockGuard(readonly) opts.Logger = nil opts.ValueLogFileSize = 10 << 20 // 10 Mo par fichier vlog opts.NumMemtables = 1 diff --git a/pkg/systemd/main.go b/pkg/systemd/main.go index ea5fef0..e89af6e 100644 --- a/pkg/systemd/main.go +++ b/pkg/systemd/main.go @@ -11,6 +11,7 @@ import ( const ( defaultTimeout = 5 * time.Second + jobTimeout = 30 * time.Second jobMode = "replace" ) @@ -28,10 +29,7 @@ type ServiceStatus struct { // New crée une connexion D-Bus systemd (scope système) func New() (*Manager, error) { - ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) - defer cancel() - - conn, err := dbus.NewSystemConnectionContext(ctx) + conn, err := dbus.NewSystemConnectionContext(context.Background()) if err != nil { return nil, err } @@ -57,17 +55,17 @@ func (m *Manager) Stop(service string) error { } func (m *Manager) job(method, service string) error { - ctx, cancel := context.WithTimeout(context.Background(), defaultTimeout) - defer cancel() + callCtx, callCancel := context.WithTimeout(context.Background(), defaultTimeout) + defer callCancel() ch := make(chan string, 1) var err error switch method { case "StartUnit": - _, err = m.conn.StartUnitContext(ctx, service, jobMode, ch) + _, err = m.conn.StartUnitContext(callCtx, service, jobMode, ch) case "StopUnit": - _, err = m.conn.StopUnitContext(ctx, service, jobMode, ch) + _, err = m.conn.StopUnitContext(callCtx, service, jobMode, ch) default: return errors.New("unsupported job method") } @@ -76,9 +74,16 @@ func (m *Manager) job(method, service string) error { return err } - result := <-ch - if result != "done" { - return fmt.Errorf("%s %s failed: %s", method, service, result) + waitCtx, waitCancel := context.WithTimeout(context.Background(), jobTimeout) + defer waitCancel() + + select { + case result := <-ch: + if result != "done" { + return fmt.Errorf("%s %s failed: %s", method, service, result) + } + case <-waitCtx.Done(): + return fmt.Errorf("%s %s timed out after %s", method, service, jobTimeout) } return nil