use crate ::{ GlobalConfig , InstanceConfig , QemuCommandBuilder } ;
use anyhow ::{ Context , Error } ;
use beau_collector ::BeauCollector ;
use qapi ::qmp ::{ QMP , Event } ;
use qapi ::{ Qmp , ExecuteError } ;
use std ::{ fmt , mem } ;
use std ::fmt ::{ Debug , Formatter } ;
use std ::fs ::{ read_link , OpenOptions , read_dir } ;
use std ::io ;
use std ::io ::{ BufReader , ErrorKind , Read , Write } ;
use std ::option ::Option ::Some ;
use std ::os ::unix ::net ::UnixStream ;
use std ::path ::PathBuf ;
use std ::process ::{ Child , Command } ;
use std ::result ::Result ::Ok ;
use std ::sync ::{ Arc , Mutex , MutexGuard } ;
use std ::time ::{ Duration , Instant } ;
use qapi_qmp ::QmpCommand ;
use std ::str ::FromStr ;
use libc ::{ cpu_set_t , CPU_SET , sched_setaffinity } ;
use crate ::cpu_list ::CpuList ;
#[ derive(Eq, PartialEq, Copy, Clone, Debug) ]
pub enum VirtualMachineState {
Stopped ,
Paused ,
Running ,
}
#[ derive(Debug) ]
pub struct VirtualMachine {
working_dir : PathBuf ,
state : VirtualMachineState ,
config : InstanceConfig ,
global_config : GlobalConfig ,
process : Option < Child > ,
control_socket : Option < ControlSocket > ,
}
struct ControlSocket {
unix_stream : CloneableUnixStream ,
qmp : Qmp < qapi ::Stream < BufReader < CloneableUnixStream > , CloneableUnixStream > > ,
_info : QMP ,
}
impl Debug for ControlSocket {
fn fmt ( & self , f : & mut Formatter < ' _ > ) -> fmt ::Result {
f . debug_tuple ( "ControlSocket" )
. field ( & self . unix_stream )
. finish ( )
}
}
const AUTO_UNBIND_BLACKLIST : & [ & str ] = & [ "nvidia" ] ;
impl VirtualMachine {
pub fn new (
config : InstanceConfig ,
global_config : & GlobalConfig ,
working_dir : PathBuf ,
) -> VirtualMachine {
VirtualMachine {
working_dir ,
state : VirtualMachineState ::Stopped ,
config ,
global_config : global_config . clone ( ) ,
process : None ,
control_socket : None ,
}
}
pub fn prepare ( & mut self , execute_fixes : bool , force : bool ) -> Result < ( ) , anyhow ::Error > {
let mut results = vec! [ ] ;
results . extend ( self . prepare_disks ( ) ) ;
results . extend ( self . prepare_vfio ( execute_fixes , force ) ) ;
results
. into_iter ( )
. bcollect ::< ( ) > ( )
. with_context ( | | format! ( "Failed to prepare VM {}" , self . config . name ) ) ? ;
Ok ( ( ) )
}
///
/// Doesn't really prepare them, but mostly checks if the user has permissions to read them
///
pub fn prepare_disks ( & self ) -> Vec < Result < ( ) , anyhow ::Error > > {
self . config
. disks
. iter ( )
. map ( | disk | {
OpenOptions ::new ( )
. read ( true )
. open ( & disk . path )
. with_context ( | | format! ( "Failed to open disk {}" , disk . path ) ) ? ;
Ok ( ( ) )
} )
. collect ::< Vec < _ > > ( )
}
/// Prepare VFIO related shenanigans,
/// This includes if requested via [execute_fixes] unbinding the requested vfio pci devices
/// And binding them to vfio-pci
///
/// With [execute_fixes] set to false, it will only check if everything is sane, and the correct driver is loaded
///
/// [force] can be given to auto-bind PCI devices that are blacklisted anyway. this can result in vore indefinitely hanging.
fn prepare_vfio ( & mut self , execute_fixes : bool , force : bool ) -> Vec < Result < ( ) , Error > > {
if self . config . vfio . is_empty ( ) {
return vec! [ ] ;
}
match Command ::new ( "modprobe" )
. arg ( "vfio-pci" )
. spawn ( )
. and_then ( | mut x | x . wait ( ) )
{
Err ( err ) = > return vec! [ Err ( err . into ( ) ) ] ,
Ok ( x ) if ! x . success ( ) = > {
return vec! [ Err ( anyhow ::anyhow ! (
"Failed to load vfio-pci kernel module. can't use VFIO"
) ) ] ;
}
Ok ( _ ) = > { }
}
self . config . vfio . iter ( ) . map ( | vfio | {
let pci_driver_path = format! ( "/sys/bus/pci/devices/{:#}/driver" , vfio . address ) ;
let driver = match read_link ( & pci_driver_path ) {
Ok ( driver_link ) = > {
let driver_path = driver_link . to_str ( ) . ok_or_else ( | | {
anyhow ::anyhow ! (
"Path to device driver for PCI device at {} is not valid utf-8" ,
vfio . address
)
} ) ? ;
let driver = driver_path . split ( "/" ) . last ( ) . ok_or_else ( | | {
anyhow ::anyhow ! (
"Path to device driver for PCI device at {} doesn't have a path to a driver" ,
vfio . address
)
} ) ? ;
driver . to_string ( )
}
Err ( err ) if err . kind ( ) = = ErrorKind ::NotFound = > "" . to_string ( ) ,
Err ( err ) = > return Err ( err . into ( ) ) ,
} ;
let is_blacklisted = AUTO_UNBIND_BLACKLIST . contains ( & driver . as_str ( ) ) & & ! force ;
if driver ! = "vfio-pci" & & ( ! execute_fixes | | is_blacklisted ) {
if ! driver . is_empty ( ) & & is_blacklisted {
anyhow ::bail ! ( "PCI device {} it's current driver is {}, but to be used with VFIO needs to be set to vfio-pci, this driver ({1}) has been blacklisted from automatic rebinding because it can't be cleanly unbound, please make sure this device is unbound before running vore" , vfio . address , driver )
} else if ! driver . is_empty ( ) {
anyhow ::bail ! ( "PCI device {} it's current driver is {}, but to be used with VFIO needs to be set to vfio-pci" , vfio . address , driver )
} else {
anyhow ::bail ! ( "PCI device at {} currently has no driver, but to be used with VFIO needs to be set to vfio-pci" , vfio . address )
}
}
if driver ! = "vfio-pci" & & execute_fixes & & ! is_blacklisted {
let address = format! ( "{:#}\n" , vfio . address ) . into_bytes ( ) ;
if ! driver . is_empty ( ) {
// Unbind the PCI device from the current driver
let mut unbind = std ::fs ::OpenOptions ::new ( ) . append ( true ) . open ( format! (
"/sys/bus/pci/devices/{:#}/driver/unbind" ,
vfio . address
) ) ? ;
unbind . write_all ( & address ) ? ;
}
{
// Set a driver override
let mut driver_override = OpenOptions ::new ( ) . append ( true ) . open ( format! (
"/sys/bus/pci/devices/{:#}/driver_override" ,
vfio . address
) ) ? ;
driver_override . write_all ( b" vfio-pci \n " ) ? ;
}
{
// Probe the PCI device so the driver override is picked up
let mut probe = OpenOptions ::new ( )
. append ( true )
. open ( "/sys/bus/pci/drivers_probe" ) ? ;
probe . write_all ( & address ) ? ;
}
let new_link = read_link ( & pci_driver_path ) ? ;
if ! new_link . ends_with ( "vfio-pci" ) {
anyhow ::bail ! ( "Tried to bind {} to vfio-pci but failed to do so (see /sys/bus/pci/devices/{:#} for more info)" , vfio . address , vfio . address )
}
}
Ok ( ( ) )
} )
. collect ::< Vec < _ > > ( )
}
pub fn get_cmd_line ( & self ) -> Result < Vec < String > , anyhow ::Error > {
let builder = QemuCommandBuilder ::new ( & self . global_config , self . working_dir . clone ( ) ) ? ;
builder . build ( & self . config )
}
pub fn pin_qemu_threads ( & self ) -> Result < ( ) , anyhow ::Error > {
let pid = if let Some ( child ) = & self . process {
child . id ( )
} else {
return Ok ( ( ) ) ;
} ;
let list = CpuList ::adjacent ( self . config . cpu . amount as usize ) ;
if list . is_none ( ) {
// If we are over provisioning CPU's there's not much use to pinning
return Ok ( ( ) ) ;
}
let list = list . unwrap ( ) ;
let mut kvm_threads = vec! [ ] ;
for item in read_dir ( format! ( "/proc/{}/task" , pid ) ) ? {
let entry = item ? ;
if ! entry . file_type ( ) ? . is_dir ( ) {
continue ;
}
let res = entry . file_name ( ) . to_str ( ) . ok_or_else ( | | anyhow ::anyhow ! ( "" ) ) . and_then ( | x | usize ::from_str ( x ) . map_err ( From ::from ) ) ;
if res . is_err ( ) {
continue ;
}
let tid = res . unwrap ( ) ;
let name = entry . path ( ) . join ( "comm" ) ;
let comm = std ::fs ::read_to_string ( name ) ? ;
if comm . starts_with ( "CPU " ) {
let nr = comm . chars ( ) . skip ( 4 ) . take_while ( | x | x . is_ascii_digit ( ) ) . collect ::< String > ( ) ;
let cpu_id = usize ::from_str ( & nr ) . unwrap ( ) ;
kvm_threads . push ( ( tid , cpu_id ) ) ;
}
}
for ( tid , cpu_id ) in kvm_threads {
if cpu_id > = list . len ( ) {
// ???
continue ;
}
let cpu = & list [ cpu_id ] ;
unsafe {
let mut set = mem ::zeroed ::< cpu_set_t > ( ) ;
CPU_SET ( cpu . id , & mut set ) ;
sched_setaffinity ( tid as i32 , mem ::size_of ::< cpu_set_t > ( ) , & set ) ;
}
}
Ok ( ( ) )
}
fn process_qmp_events ( & mut self ) -> Result < ( ) , anyhow ::Error > {
let events = if let Some ( qmp ) = self . control_socket . as_mut ( ) {
// While we could iter, we keep hold of the mutable reference, so it's easier to just collect the events
qmp . qmp . events ( ) . collect ::< Vec < _ > > ( )
} else {
return Ok ( ( ) ) ;
} ;
for event in events {
println! ( "Event: {:?}" , event ) ;
match event {
Event ::STOP { .. } = > {
if self . state ! = VirtualMachineState ::Stopped {
self . state = VirtualMachineState ::Paused ;
}
}
Event ::RESUME { .. } = > {
self . state = VirtualMachineState ::Running ;
}
Event ::SHUTDOWN { .. } = > {
self . state = VirtualMachineState ::Stopped ;
}
_ = > { }
}
}
Ok ( ( ) )
}
pub fn pause ( & mut self ) -> Result < ( ) , anyhow ::Error > {
if self . state ! = VirtualMachineState ::Running {
return Ok ( ( ) ) ;
}
self . send_qmp_command ( & qapi_qmp ::stop { } ) ? ;
Ok ( ( ) )
}
fn send_qmp_command < C : QmpCommand > ( & mut self , command : & C ) -> Result < C ::Ok , anyhow ::Error > {
let res = if let Some ( qmp ) = self . control_socket . as_mut ( ) {
qmp . qmp . execute ( command ) ?
} else {
anyhow ::bail ! ( "No control socket available" )
} ;
self . process_qmp_events ( ) ? ;
Ok ( res )
}
pub fn stop ( & mut self ) -> Result < ( ) , anyhow ::Error > {
if self . process . is_none ( ) | | self . control_socket . is_none ( ) | | self . state = = VirtualMachineState ::Stopped {
return Ok ( ( ) ) ;
}
self . send_qmp_command ( & qapi_qmp ::system_powerdown { } ) ? ;
Ok ( ( ) )
}
pub fn stop_now ( & mut self ) -> Result < ( ) , anyhow ::Error > {
self . stop ( ) ? ;
if let Some ( mut process ) = self . process . take ( ) {
self . wait ( Some ( Duration ::from_secs ( 30 ) ) , VirtualMachineState ::Stopped ) ? ;
self . quit ( ) ? ;
process . wait ( ) ? ;
}
Ok ( ( ) )
}
pub fn wait_till_stopped ( & mut self ) -> Result < ( ) , anyhow ::Error > {
self . wait ( None , VirtualMachineState ::Stopped ) ? ;
Ok ( ( ) )
}
pub fn quit ( & mut self ) -> Result < ( ) , anyhow ::Error > {
if self . control_socket . is_none ( ) {
return Ok ( ( ) ) ;
}
self . send_qmp_command ( & qapi_qmp ::quit { } )
. map ( | _ | ( ) )
. or_else ( | x |
if let Some ( ExecuteError ::Io ( err ) ) = x . downcast_ref ::< ExecuteError > ( ) {
if err . kind ( ) = = ErrorKind ::UnexpectedEof {
Ok ( ( ) )
} else {
Err ( x )
}
} else {
Err ( x )
} )
. map_err ( From ::from )
}
fn wait ( & mut self , duration : Option < Duration > , target_state : VirtualMachineState ) -> Result < bool , anyhow ::Error > {
let start = Instant ::now ( ) ;
while duration . map_or ( true , | dur | ( Instant ::now ( ) - start ) < dur ) {
let has_socket = self . control_socket . as_mut ( )
. map ( | x | x . qmp . nop ( ) )
. transpose ( ) ?
. is_some ( ) ;
if ! has_socket {
return Ok ( self . state = = target_state ) ;
}
self . process_qmp_events ( ) ? ;
if self . state = = target_state {
return Ok ( true ) ;
}
if duration . is_some ( ) {
std ::thread ::sleep ( Duration ::from_millis ( 500 ) ) ;
} else {
std ::thread ::sleep ( Duration ::from_secs ( 5 ) ) ;
}
}
Ok ( self . state = = target_state )
}
pub fn start ( & mut self ) -> Result < ( ) , anyhow ::Error > {
if let Some ( proc ) = & mut self . process {
if proc . try_wait ( ) ? . is_none ( ) {
return Ok ( ( ) ) ;
}
}
let mut command = Command ::new ( "qemu-system-x86_64" ) ;
command . args ( self . get_cmd_line ( ) ? ) ;
self . process = Some ( command . spawn ( ) ? ) ;
let mut res = | | {
let qemu_control_socket = format! ( "{}/qemu.sock" , self . working_dir . to_str ( ) . unwrap ( ) ) ;
let mut unix_stream = UnixStream ::connect ( & qemu_control_socket ) ;
let mut time = 30 ;
while let Err ( err ) = unix_stream {
if time < 0 {
Err ( err ) . context ( format! (
"After 30 seconds, QEMU Control socket ({}) didn't come up" ,
qemu_control_socket
) ) ? ;
}
std ::thread ::sleep ( Duration ::from_secs ( 1 ) ) ;
unix_stream = UnixStream ::connect ( & qemu_control_socket ) ;
if let Some ( proc ) = self . process . as_mut ( ) {
if let Some ( _ ) = proc . try_wait ( ) ? {
anyhow ::bail ! ( "QEMU quit early" )
}
}
time - = 1 ;
}
let unix_stream = unix_stream . unwrap ( ) ;
let unix_stream = CloneableUnixStream ( Arc ::new ( Mutex ::new ( unix_stream ) ) ) ;
let mut qmp = Qmp ::from_stream ( unix_stream . clone ( ) ) ;
let handshake = qmp . handshake ( ) ? ;
let mut control_socket = ControlSocket {
unix_stream ,
qmp ,
_info : handshake ,
} ;
// self.pin_qemu_threads()?;
control_socket
. qmp
. execute ( & qapi_qmp ::cont { } )
. context ( "Failed to send start command on qemu control socket" ) ? ;
control_socket . qmp . nop ( ) ? ;
self . control_socket = Some ( control_socket ) ;
self . process_qmp_events ( ) ? ;
Ok ( ( ) )
} ;
let result_ = res ( ) ;
if result_ . is_err ( ) {
if let Some ( mut qemu ) = self . process . take ( ) {
qemu . kill ( ) ? ;
qemu . wait ( ) ? ;
}
}
result_
}
}
#[ derive(Clone, Debug) ]
struct CloneableUnixStream ( Arc < Mutex < UnixStream > > ) ;
impl CloneableUnixStream {
pub fn lock ( & self ) -> Result < MutexGuard < ' _ , UnixStream > , std ::io ::Error > {
self . 0. lock ( ) . map_err ( | _ | {
io ::Error ::new (
ErrorKind ::Other ,
anyhow ::anyhow ! ( "Failed to lock UnixStream" ) ,
)
} )
}
}
impl Read for CloneableUnixStream {
fn read ( & mut self , buf : & mut [ u8 ] ) -> io ::Result < usize > {
let res = self . lock ( ) ? . read ( buf ) ;
if let Ok ( size ) = res {
println! ( "READ: {}" , String ::from_utf8_lossy ( & buf [ .. size ] ) ) ;
}
res
}
}
impl Write for CloneableUnixStream {
fn write ( & mut self , buf : & [ u8 ] ) -> io ::Result < usize > {
self . lock ( ) ? . write ( buf )
}
fn flush ( & mut self ) -> io ::Result < ( ) > {
self . lock ( ) ? . flush ( )
}
}