Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 23 Aug 2007 09:34:10 +0400
From:      "Yuriy Tsibizov" <yuriy.tsibizov@gmail.com>
To:        "Roman Divacky" <rdivacky@freebsd.org>
Cc:        freebsd-hackers@freebsd.org, freebsd-emulation@freebsd.org
Subject:   Re: User-mode Linux (Was: modify syscall nr on-the-fly)
Message-ID:  <c019b3090708222234i497da7cax488c7343471ed6c3@mail.gmail.com>
In-Reply-To: <20070822211047.GA35783@freebsd.org>
References:  <c019b3090708211153o7dce8365l945b24ad1c962d22@mail.gmail.com> <c019b3090708220242s690b673i601899c0be2b32d4@mail.gmail.com> <20070822211047.GA35783@freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
2007/8/23, Roman Divacky <rdivacky@freebsd.org>:
> here is a little review of mine... just little suggestions.
>
> Index: i386/i386/trap.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/i386/i386/trap.c,v
> retrieving revision 1.307
> diff -u -r1.307 trap.c
> --- i386/i386/trap.c    26 Jul 2007 15:32:55 -0000      1.307
> +++ i386/i386/trap.c    22 Aug 2007 08:53:19 -0000
> @@ -1004,6 +1004,32 @@
>
>                 PTRACESTOP_SC(p, td, S_PT_SCE);
>
> +               if (__predict_false(p->p_sysent->sv_name[0]=='L')) {
>
> please use __predict_true(p->p_sysent != &elf_linux_sysvec)

Will it be possible to link (GENERIC) kernel wih this check? I can't
find elf_linux_sysvec in my /boot/kernel/kernel...


>
> +                       if (code != frame->tf_eax) {
> +                       printf("linux sysctl patched: code %d return eax %d\n", code, frame->tf_eax);
> +                               /* retry */
> +                               code = frame->tf_eax;
> +
> +                               if (p->p_sysent->sv_prepsyscall)
> +                                       /*
> +                                        * The prep code is MP aware.
> +                                        */
> +                                       (*p->p_sysent->sv_prepsyscall)(frame, args, &code, &params);
> +                               /* else should always be null */
> +
> +                               if (p->p_sysent->sv_mask)
> +                                       code &= p->p_sysent->sv_mask;
>
> the sv_mask should be removed.. dont use it in your code. its entirely pointless when dealing
> with Linux binaries

ok

>
> +                               if (code >= p->p_sysent->sv_size)
> +                                       callp = &p->p_sysent->sv_table[0];
> +                               else
> +                                       callp = &p->p_sysent->sv_table[code];
> +
> +                               narg = callp->sy_narg;
> +                               /* retry ends */
> +                       }
> +               }
> +
>                 AUDIT_SYSCALL_ENTER(code, td);
>                 error = (*callp->sy_call)(td, args);
>                 AUDIT_SYSCALL_EXIT(error, td);
> Index: i386/linux/linux_ptrace.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/i386/linux/linux_ptrace.c,v
> retrieving revision 1.17
> diff -u -r1.17 linux_ptrace.c
> --- i386/linux/linux_ptrace.c   22 Feb 2006 18:57:49 -0000      1.17
> +++ i386/linux/linux_ptrace.c   22 Aug 2007 09:27:01 -0000
> @@ -78,6 +78,7 @@
>  #define PTRACE_SETFPXREGS      19
>
>  #define PTRACE_SETOPTIONS      21
> +#define PTRACE_O_TRACESYSGOOD  0x00000001
>
>  /*
>   * Linux keeps debug registers at the following
> @@ -95,6 +96,10 @@
>         return ((signum == SIGSTOP)? 0 : signum);
>  }
>
> +struct linux_pt_lreg {
> +       l_long reg[19];
> +};
> +
>  struct linux_pt_reg {
>         l_long  ebx;
>         l_long  ecx;
> @@ -103,17 +108,17 @@
>         l_long  edi;
>         l_long  ebp;
>         l_long  eax;
> -       l_int   xds;
> -       l_int   xes;
> -       l_int   xfs;
> -       l_int   xgs;
> +       l_long  xds;
> +       l_long  xes;
> +       l_long  xfs;
> +       l_long  xgs;
>         l_long  orig_eax;
>         l_long  eip;
> -       l_int   xcs;
> +       l_long  xcs;
>         l_long  eflags;
>         l_long  esp;
> -       l_int   xss;
> -};
> +       l_long  xss;
> +} __packed;
>
> why is this necessary? how does it affect amd64 linux32 emulator?

I'll need to re-check this. It should not access segment registers.


>
>  /*
>   *   Translate i386 ptrace registers between Linux and FreeBSD formats.
> @@ -247,6 +252,7 @@
>                 struct linux_pt_reg     reg;
>                 struct linux_pt_fpreg   fpreg;
>                 struct linux_pt_fpxreg  fpxreg;
> +               struct linux_pt_lreg    lreg;
>         } r;
>         union {
>                 struct reg              bsd_reg;
> @@ -429,20 +435,21 @@
>                  * as necessary.
>                  */
>                 if (uap->addr < sizeof(struct linux_pt_reg)) {
> +                       if (uap->addr == (11 << 2)) /* orig_eax */
> +                               uap->addr = (6 << 2); /* eax */
> +
>                         error = kern_ptrace(td, PT_GETREGS, pid, &u.bsd_reg, 0);
>                         if (error != 0)
>                                 break;
>
>                         map_regs_to_linux(&u.bsd_reg, &r.reg);
>                         if (req == PTRACE_PEEKUSR) {
> -                               error = copyout((char *)&r.reg + uap->addr,
> -                                   (void *)uap->data, sizeof(l_int));
> +                               error = copyout((l_long*)(&r.lreg.reg[uap->addr>>2]),
> +                                   (void *)uap->data, sizeof(l_long));
>                                 break;
>                         }
>
> -                       *(l_int *)((char *)&r.reg + uap->addr) =
> -                           (l_int)uap->data;
> -
> +                       r.lreg.reg[uap->addr>>2] = (l_long)uap->data;
>                         map_regs_from_linux(&u.bsd_reg, &r.reg);
>                         error = kern_ptrace(td, PT_SETREGS, pid, &u.bsd_reg, 0);
>                 }
> @@ -470,11 +477,34 @@
>                         error = kern_ptrace(td, PT_SETDBREGS, pid,
>                             &u.bsd_dbreg, 0);
>                 }
> -
> +       }
> +               break;
> +       case PTRACE_SETOPTIONS: {
> +               struct proc *p;
> +               if (uap->data == PTRACE_O_TRACESYSGOOD) {
> +                       p = td->td_proc;
> +                       PROC_LOCK(p);
> +                       p->p_stops |= S_PT_SYSGOOD;
> +                       PROC_UNLOCK(p);
> +                       break;
> +               }
> +               printf("linux: ptrace(21,...,%u) not implemented\n",
> +                   (unsigned int)uap->data);
> +               error = EINVAL;
> +               }
>                 break;
>
> braces around case "case" ? please remove (the blocking there is implicit) and introduce
> procedure-wide "p"

Similar blocks was in original code, I were trying to keep in it's style.

>
> +       case PTRACE_SYSCALL: {
> +               struct proc *p;
> +
> +               p = td->td_proc;
> +               PROC_LOCK(p);
> +               p->p_stops |= S_PT_LINUX;
> +               PROC_UNLOCK(p);
> +
> +               if (addr == NULL) addr = (void *)1;
> +               error = kern_ptrace(td, PT_SYSCALL, pid, addr, uap->data);
>         }
> -       case PTRACE_SYSCALL:
> -               /* fall through */
> +               break;
>
> ditto.
>
>         default:
>                 printf("linux: ptrace(%u, ...) not implemented\n",
>                     (unsigned int)uap->req);
> Index: sys/cdefs.h
> ===================================================================
> RCS file: /home/ncvs/src/sys/sys/cdefs.h,v
> retrieving revision 1.93
> diff -u -r1.93 cdefs.h
> --- sys/cdefs.h 21 Sep 2006 01:38:58 -0000      1.93
> +++ sys/cdefs.h 10 Aug 2007 18:01:34 -0000
> @@ -338,6 +338,10 @@
>  #endif
>
>  /* Compiler-dependent macros that rely on FreeBSD-specific extensions. */
> +#ifndef __FreeBSD_cc_version
> +#define __FreeBSD_cc_version 0
> +#endif
> +
>  #if __FreeBSD_cc_version >= 300001 && defined(__GNUC__) && !defined(__INTEL_COMPILER)
>  #define        __printf0like(fmtarg, firstvararg) \
>             __attribute__((__format__ (__printf0__, fmtarg, firstvararg)))
> Index: sys/ptrace.h
> ===================================================================
> RCS file: /home/ncvs/src/sys/sys/ptrace.h,v
> retrieving revision 1.28
> diff -u -r1.28 ptrace.h
> --- sys/ptrace.h        6 Feb 2006 09:41:56 -0000       1.28
> +++ sys/ptrace.h        22 Aug 2007 08:53:45 -0000
> @@ -103,7 +103,12 @@
>  #define        PTRACESTOP_SC(p, td, flag)                              \
>         if ((p)->p_flag & P_TRACED && (p)->p_stops & (flag)) {  \
>                 PROC_LOCK(p);                                   \
> -               ptracestop((td), SIGTRAP);                      \
> +               if (__predict_false(p->p_sysent->sv_name[0]=='L')) { \
>
> please use p->p_sysent instead like stated above
>
> +                       (p)->p_stops &= ~(S_PT_SCE | S_PT_SCX); \
> +                       ptracestop((td), SIGTRAP | 0x80);       \
> +                       }                                       \
> +               else                                            \
> +                       ptracestop((td), SIGTRAP);              \
>                 PROC_UNLOCK(p);                                 \
>         }
>  /*
> @@ -112,6 +117,16 @@
>   */
>  #define        S_PT_SCE        0x000010000
>  #define        S_PT_SCX        0x000020000
> +/*
> + * Linux ptrace conventions: clear S_PT_SCE and S_PT_SCX before raising
> + * signals
> + */
> +#define S_PT_LINUX     0x000040000
> +/*
> + * Linux ptrace option PTRACE_O_TRACESYSGOOD, when enabled, changes signal
> + * number to ( SIGTRAP | 0x80 )
> + */
> +#define S_PT_SYSGOOD   0x000080000
>
>  int    ptrace_set_pc(struct thread *_td, unsigned long _addr);
>  int    ptrace_single_step(struct thread *_td);
> Index: compat/linux/linux_misc.c
> ===================================================================
> RCS file: /home/ncvs/src/sys/compat/linux/linux_misc.c,v
> retrieving revision 1.213
> diff -u -r1.213 linux_misc.c
> --- compat/linux/linux_misc.c   12 Jun 2007 00:11:57 -0000      1.213
> +++ compat/linux/linux_misc.c   22 Aug 2007 09:07:34 -0000
> @@ -63,6 +63,7 @@
>  #include <sys/vmmeter.h>
>  #include <sys/vnode.h>
>  #include <sys/wait.h>
> +#include <sys/ptrace.h>
>
>  #include <security/mac/mac_framework.h>
>
> @@ -852,6 +853,8 @@
>
>         if (args->status) {
>                 tmpstat &= 0xffff;
> +               if (!(td->td_proc->p_stops & S_PT_SYSGOOD))
> +                       tmpstat &= 0x7fff;
>                 if (WIFSIGNALED(tmpstat))
>                         tmpstat = (tmpstat & 0xffffff80) |
>                             BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
> @@ -898,6 +901,8 @@
>
>         if (args->status) {
>                 tmpstat &= 0xffff;
> +               if (!(td->td_proc->p_stops & S_PT_SYSGOOD))
> +                       tmpstat &= 0x7fff;
>                 if (WIFSIGNALED(tmpstat))
>                         tmpstat = (tmpstat & 0xffffff80) |
>                             BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
>
>
> thnx for the patch!

Thanks for review,

I'm also trying to get PTRACE_SYSEMU to work (looks like the same as
PTRACE_SYSCTL, but it does not call syscall handler and relies on
SIGTRAP handler to do all syscall work).

UM Linux also uses PTRACE_FAULTINFO (returns pointer to something,
like a last page fault address)  and PTRACE_LDT (updates a process LDT
entry), but they are not included in stock Linux kernel and there is
no man page describing their behavior. It boots without them.

bad news that UM Linux can't access hdd image ("hostfs" does not work
too)-- it hangs detecting partitions.

Yuriy.



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?c019b3090708222234i497da7cax488c7343471ed6c3>