/* $NetBSD$ */ /*- * Copyright (c) 2017 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Taylor R. Campbell. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "opt_xen.h" #include __KERNEL_RCSID(0, "$NetBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NS_PER_TICK (1000000000ULL/hz) static uint64_t xen_vcputime_systime_ns(volatile struct vcpu_time_info *); static uint64_t xen_vcputime_raw_systime_ns(volatile struct vcpu_time_info *); static void xen_wallclock_time(struct timespec *); static uint64_t xen_global_systime_ns(void); static unsigned xen_get_timecount(struct timecounter *); static int xen_rtc_get(struct todr_chip_handle *, struct timeval *); static int xen_rtc_set(struct todr_chip_handle *, struct timeval *); static int xen_timer_handler(void *, struct intrframe *); /* * xen timecounter: * * Xen vCPU system time, plus an adjustment with rdtsc. */ static struct timecounter xen_timecounter = { .tc_get_timecount = xen_get_timecount, .tc_poll_pps = NULL, .tc_counter_mask = ~0U, .tc_frequency = 1000000000ULL, /* 1 GHz, i.e. units of nanoseconds */ .tc_name = "xen_system_time", .tc_quality = 10000, }; /* * xen_global_systime_ns_stamp * * The latest Xen vCPU system time that has been observed on any * CPU, for a global monotonic view of the Xen system time clock. */ static volatile uint64_t xen_global_systime_ns_stamp __cacheline_aligned; /* * xen time of day register: * * Xen wall clock time, plus a Xen vCPU system time adjustment. */ static struct todr_chip_handle xen_todr_chip = { .todr_gettime = xen_rtc_get, .todr_settime = xen_rtc_set, }; #ifdef DOM0OPS /* * xen timepush state: * * Callout to periodically, after a sysctl-configurable number of * NetBSD ticks, set the Xen hypervisor's wall clock time. */ static struct { struct callout ch; int ticks; } xen_timepush; static void xen_timepush_init(void); static void xen_timepush_intr(void *); static int sysctl_xen_timepush(SYSCTLFN_ARGS); #endif /* * startrtclock() * * Initialize the real-time clock from x86 machdep autoconf. */ void startrtclock(void) { todr_attach(&xen_todr_chip); } /* * setstatclockrate(rate) * * Set the statclock to run at rate, in units of ticks per second. * * Currently Xen does not have a separate statclock, so this is a * noop; instad the statclock runs in hardclock. */ void setstatclockrate(int rate) { } /* * idle_block() * * Called from the idle loop when we have nothing to do but wait * for an interrupt. */ void idle_block(void) { KASSERT(curcpu()->ci_ipending == 0); HYPERVISOR_block(); } /***************************************************************************** ###################### XXX BEGIN KLUDGERIFIC X86ISMS ######################## *****************************************************************************/ /* * xen_rdtsc_fence() * * Wait for all prior instructions to complete before allowing any * subsequent xen_rdtsc() to begin. Subsequent instructions may * be reordered to start earlier, however. * * In principle, this could be a noop if xen_rdtsc did rdtscp. * However, I'm not sure we can rely on rdtscp in Xen, if we can * use it at all. On Intel CPUs, according to the manuals, LFENCE * is enough; on AMD CPUs, according to the Linux source code, * MFENCE is needed. */ static inline void xen_rdtsc_fence(void) { x86_mfence(); } /* * xen_rdtsc() * * Read the tsc after all instructions before the prior * xen_rdtsc_fence() have completed. * * In principle, this could be an rdtscp, and xen_rdtsc_fence * could be a noop. However, I'm not sure we can rely on rdtscp * in Xen, if we can use it at all. */ static inline uint64_t xen_rdtsc(void) { uint32_t lo, hi; asm volatile("rdtsc" : "=a"(lo), "=d"(hi)); return curcpu()->ci_data.cpu_cc_skew + (((uint64_t)hi << 32) | lo); } /***************************************************************************** ###################### XXX END KLUDGERIFIC X86ISMS ########################## *****************************************************************************/ /* * struct xen_vcputime_ticket * * State for a vCPU read section, during which a caller may read * from fields of a struct vcpu_time_info. Caller must enter with * xen_vcputime_enter, exit with xen_vcputime_exit, and be * prepared to retry if xen_vcputime_exit fails. */ struct xen_vcputime_ticket { uint64_t version; }; /* * xen_vcputime_enter(vt, tp) * * Enter a vCPU time read section and store a ticket in *tp, which * the caller must use with xen_vcputime_exit. */ static inline void xen_vcputime_enter(volatile struct vcpu_time_info *vt, struct xen_vcputime_ticket *tp) { KASSERT(vt == &curcpu()->ci_vcpu->time); while (1 & (tp->version = vt->version)) SPINLOCK_BACKOFF_HOOK; membar_consumer(); } /* * xen_vcputime_exit(vt, tp) * * Exit a vCPU time read section with the ticket in *tp from * xen_vcputime_enter. Return true on success, false if caller * must retry. */ static inline bool xen_vcputime_exit(volatile struct vcpu_time_info *vt, struct xen_vcputime_ticket *tp) { KASSERT(vt == &curcpu()->ci_vcpu->time); membar_consumer(); return tp->version == vt->version; } /* * xen_tsc_to_ns_delta(delta_tsc, mul_frac, shift) * * Convert a difference in tsc units to a difference in * nanoseconds given a multiplier and shift for the unit * conversion. */ static inline uint64_t xen_tsc_to_ns_delta(uint64_t delta_tsc, uint32_t tsc_to_system_mul, int8_t tsc_shift) { uint32_t delta_tsc_hi, delta_tsc_lo; if (tsc_shift < 0) delta_tsc >>= -tsc_shift; else delta_tsc <<= tsc_shift; delta_tsc_hi = delta_tsc >> 32; delta_tsc_lo = delta_tsc & 0xffffffffUL; /* d*m/2^32 = (2^32 d_h + d_l)*m/2^32 = d_h*m + (d_l*m)/2^32 */ return ((uint64_t)delta_tsc_hi * tsc_to_system_mul) + (((uint64_t)delta_tsc_lo * tsc_to_system_mul) >> 32); } /* * xen_vcputime_systime_ns(vt) * * Return a snapshot of the Xen system time plus an adjustment * from the tsc, in units of nanoseconds. */ static uint64_t xen_vcputime_systime_ns(volatile struct vcpu_time_info *vt) { struct xen_vcputime_ticket ticket; uint64_t systime_ns, cached_tsc, fresh_tsc, delta_tsc, delta_ns; uint64_t cached_delta_tsc, cached_delta_ns; /* We'd better be bound to the CPU in _some_ way. */ KASSERT(cpu_intr_p() || cpu_softintr_p() || kpreempt_disabled() || (curlwp->l_flag & LP_BOUND)); KASSERT(vt == &curcpu()->ci_vcpu->time); /* * Repeatedly try to read the system time, corresponding tsc * timestamp, and tsc frequency until we get a consistent view. */ do { xen_vcputime_enter(vt, &ticket); systime_ns = vt->system_time; cached_tsc = vt->tsc_timestamp; fresh_tsc = xen_rdtsc(); delta_tsc = __predict_false(fresh_tsc < cached_tsc) ? 0 : fresh_tsc - cached_tsc; delta_ns = xen_tsc_to_ns_delta(delta_tsc, vt->tsc_to_system_mul, vt->tsc_shift); cached_delta_tsc = __predict_false(cached_tsc < curcpu()->ci_xen_last_tsc_timestamp) ? curcpu()->ci_xen_last_tsc_timestamp - cached_tsc : 0; cached_delta_ns = xen_tsc_to_ns_delta(cached_delta_tsc, vt->tsc_to_system_mul, vt->tsc_shift); } while (!xen_vcputime_exit(vt, &ticket)); /* * Notify the console if the Xen hypervisor's raw system_time * ran backwards. This shouldn't happen because the Xen * hypervisor is supposed to be smarter than that. */ if (__predict_false(systime_ns < curcpu()->ci_xen_last_raw_systime_ns)) { #if 0 /* XXX too noisy */ printf("xen vcpu_time_info system_time ran backwards" " %"PRIu64"ns\n", curcpu()->ci_xen_last_raw_systime_ns - systime_ns); #endif } else if (__predict_false((systime_ns - curcpu()->ci_xen_last_raw_systime_ns) < cached_delta_ns)) { printf("xen system time advanced but tsc retreated more:" " systime delta %"PRIu64"ns <" " tsc timestamp delta %"PRIu64" = " " %"PRIu64"ns\n", systime_ns - curcpu()->ci_xen_last_raw_systime_ns, cached_delta_tsc, cached_delta_ns); } /* * Notify the console if the Xen tsc timestamp ran backwards * while the system time remained the same. This shouldn't * happen because the Xen hypervisor is supposed to be smarter * than that. * * XXX But this is also a rather rigid criterion. What if the * systime advances just a wee bit, but the tsc timestamp * retreats a lot? */ if (__predict_false((systime_ns == curcpu()->ci_xen_last_raw_systime_ns) && (cached_tsc < curcpu()->ci_xen_last_tsc_timestamp))) { printf("xen vcpu_time_info tsc_timestamp ran backwards" " %"PRIu64"\n", curcpu()->ci_xen_last_tsc_timestamp - cached_tsc); } /* * Notify the console if the CPU's tsc ran backwards. This * shouldn't happen because the CPU tsc isn't supposed to * change, although maybe in cases of migration it will. */ if (__predict_false(fresh_tsc < curcpu()->ci_xen_last_tsc)) { printf("xen cpu tsc ran backwards %"PRIu64"\n", curcpu()->ci_xen_last_tsc - fresh_tsc); } /* * Notify the console if the CPU's tsc appeared to run behind * Xen's idea of the tsc. This shouldn't happen because the * Xen hypervisor is supposed to have read the tsc _before_ * writing to the vcpu_time_info page, _before_ we read the * tsc. Further, if we switched pCPUs after reading the tsc * timestamp but before reading the CPU's tsc, the hypervisor * had better notify us by updating the version too and forcing * us to retry the vCPU time read. */ if (__predict_false(fresh_tsc < cached_tsc)) { printf("xen cpu tsc %"PRIu64 " ran backwards from timestamp %"PRIu64 " by %"PRIu64"\n", fresh_tsc, cached_tsc, cached_tsc - fresh_tsc); } /* * Notify the console if the delta computation yielded a * negative. */ if (__predict_false((int64_t)delta_ns < 0)) { printf("xen tsc delta in ns went negative: %"PRId64"\n", delta_ns); } /* * Notify the console if the addition will wrap around. */ if (__predict_false((systime_ns + delta_ns) < systime_ns)) { printf("xen systime + delta wrapped around:" " %"PRIu64" + %"PRIu64" = %"PRIu64"\n", systime_ns, delta_ns, systime_ns + delta_ns); } /* * There remains one possibility we do NOT detect here: The Xen * raw system time changes by d_0 > 0, and the tsc delta */ /* Remember the various timestamps. */ curcpu()->ci_xen_last_raw_systime_ns = systime_ns; curcpu()->ci_xen_last_tsc_timestamp = cached_tsc; curcpu()->ci_xen_last_tsc = fresh_tsc; KASSERT(vt == &curcpu()->ci_vcpu->time); /* Add the delta to the raw system, in nanoseconds. */ return systime_ns + delta_ns; } /* * xen_vcputime_raw_systime_ns(vt) * * Return a snapshot of the current Xen system time to the * resolution of the Xen hypervisor tick, in units of nanoseconds. */ static uint64_t xen_vcputime_raw_systime_ns(volatile struct vcpu_time_info *vt) { struct xen_vcputime_ticket ticket; uint64_t systime_ns; KASSERT(vt == &curcpu()->ci_vcpu->time); do { xen_vcputime_enter(vt, &ticket); systime_ns = vt->system_time; } while (!xen_vcputime_exit(vt, &ticket)); KASSERT(vt == &curcpu()->ci_vcpu->time); return systime_ns; } /* * struct xen_wallclock_ticket * * State for a wall clock read section, during which a caller may * read from the wall clock fields of HYPERVISOR_shared_info. * Caller must enter with xen_wallclock_enter, exit with * xen_wallclock_exit, and be prepared to retry if * xen_wallclock_exit fails. */ struct xen_wallclock_ticket { uint32_t version; }; /* * xen_wallclock_enter(tp) * * Enter a wall clock read section and store a ticket in *tp, * which the caller must use with xen_wallclock_exit. Caller must * be prepared to retry if xen_wallclock_exit fails. During a * wall clock read section, caller may read from the wall clock * fields of HYPERVISOR_shared_info. */ static inline void xen_wallclock_enter(struct xen_wallclock_ticket *tp) { while (1 & (tp->version = HYPERVISOR_shared_info->wc_version)) SPINLOCK_BACKOFF_HOOK; membar_consumer(); } /* * xen_wallclock_exit(tp) * * Exit a wall clock read section with the ticket in *tp from * xen_wallclock_enter. Return true on success, false if caller * must retry. */ static inline bool xen_wallclock_exit(struct xen_wallclock_ticket *tp) { membar_consumer(); return tp->version == HYPERVISOR_shared_info->wc_version; } /* * xen_wallclock_time(tsp) * * Return a snapshot of the current low-resolution wall clock * time, as reported by the hypervisor, in tsp. */ static void xen_wallclock_time(struct timespec *tsp) { struct xen_wallclock_ticket ticket; uint64_t systime_ns; int bound; /* Prevent switching CPUs while we read the vCPU system time. */ bound = curlwp_bind(); /* Get the vCPU system time. */ systime_ns = xen_vcputime_systime_ns(&curcpu()->ci_vcpu->time); /* Read the last wall clock sample from the hypervisor. */ do { xen_wallclock_enter(&ticket); tsp->tv_sec = HYPERVISOR_shared_info->wc_sec; tsp->tv_nsec = HYPERVISOR_shared_info->wc_nsec; } while (!xen_wallclock_exit(&ticket)); /* All done on the CPU. */ curlwp_bindx(bound); /* Add the system time to the wall clock time. */ systime_ns += tsp->tv_nsec; tsp->tv_sec += systime_ns / 1000000000ull; tsp->tv_nsec = systime_ns % 1000000000ull; } /* * xen_global_systime_ns() * * Return a global monotonic view of the system time in * nanoseconds, computed by the per-CPU Xen raw system time plus * an rdtsc adjustment, and advance the view of the system time * for all other CPUs. */ static uint64_t xen_global_systime_ns(void) { uint64_t local, global, answer; int bound; /* * Find the local timecount on this CPU, and make sure it does * not precede the latest global timecount witnessed so far by * any CPU. * * If the CAS fails, then someone else has just advanced the * clock further in the future, but that's OK because whatever * they are doing in the privacy of their living room is * unordered with what we are doing in the privacy of our own * living room. * * The next time we try to read the global monotonic clock, we * will apply MAX again, &c. * * XXX Needs a slightly more detailed argument to give me * confidence. */ bound = curlwp_bind(); local = xen_vcputime_systime_ns(&curcpu()->ci_vcpu->time); global = xen_global_systime_ns_stamp + 1; answer = MAX(local, global); (void)atomic_cas_64(&xen_global_systime_ns_stamp, global, answer); curlwp_bindx(bound); return answer; } /* * xen_get_timecount(tc) * * Return the low 32 bits of a global monotonic view of the Xen * system time. */ static unsigned xen_get_timecount(struct timecounter *tc) { KASSERT(tc == &xen_timecounter); return (unsigned)xen_global_systime_ns(); } /* * xen_rtc_get(todr, tv) * * Get the current real-time clock from the Xen wall clock time * and vCPU system time adjustment. */ static int xen_rtc_get(struct todr_chip_handle *todr, struct timeval *tvp) { struct timespec ts; xen_wallclock_time(&ts); TIMESPEC_TO_TIMEVAL(tvp, &ts); return 0; } /* * xen_rtc_set(todr, tv) * * Set the Xen wall clock time, if we can. */ static int xen_rtc_set(struct todr_chip_handle *todr, struct timeval *tvp) { #ifdef DOM0OPS struct clock_ymdhms dt; #if __XEN_INTERFACE_VERSION__ < 0x00030204 dom0_op_t op; #else xen_platform_op_t op; #endif uint64_t systime_ns; if (xendomain_is_privileged()) { /* Convert to ymdhms and set the x86 ISA RTC. */ clock_secs_to_ymdhms(tvp->tv_sec, &dt); rtc_set_ymdhms(NULL, &dt); /* Get the global system time so we can preserve it. */ systime_ns = xen_global_systime_ns(); /* Set the hypervisor wall clock time. */ op.u.settime.nsecs = tvp->tv_sec; op.u.settime.nsecs = tvp->tv_usec * 1000; op.u.settime.system_time = systime_ns; #if __XEN_INTERFACE_VERSION__ < 0x00030204 op.cmd = DOM0_SETTIME; return HYPERVISOR_dom0_op(&op); #else op.cmd = XENPF_settime; return HYPERVISOR_platform_op(&op); #endif } #endif /* XXX Should this fail if not on privileged dom0? */ return 0; } /* * xen_delay(n) * * Wait approximately n microseconds. */ void xen_delay(unsigned n) { struct cpu_info *ci; int bound; /* Bind to the CPU so we don't compare tsc on different CPUs. */ bound = curlwp_bind(); ci = curcpu(); if (n < 500000) { /* * Xen system time is not precise enough for short * delays, so use the tsc instead. */ uint64_t start, end; /* * Get the start and end times. * * XXX cpu_frequency(ci) can easily get stale, from my * cursory read of cpu_get_tsc_freq. */ start = xen_rdtsc(); end = start + ((uint64_t)n * cpu_frequency(ci))/1000000; /* If the end time wrapped around, wait for us to wrap. */ if (end < start) { do { xen_rdtsc_fence(); } while (start < xen_rdtsc()); } /* Wait until we've passed the end. */ do { xen_rdtsc_fence(); } while (xen_rdtsc() < end); } else { /* Use the Xen system time. */ volatile struct vcpu_time_info *t = &ci->ci_vcpu->time; uint64_t start, end; /* * Get the start and end times. * * Nanoseconds since boot takes centuries to overflow, * so no need to worry about wrapping. We do not * bother with tsc adjustment for delays this long. * * XXX Do we ever need to issue delays this long? That * seems likely to be a bug. */ start = xen_vcputime_raw_systime_ns(t); end = start + 1000*(uint64_t)n; /* Wait until the system time has passed the end. */ do { HYPERVISOR_yield(); } while (xen_vcputime_raw_systime_ns(t) < end); } /* Unbind from the CPU if we weren't already bound. */ curlwp_bindx(bound); } /* * xen_suspendclocks(ci) * * Stop handling the Xen timer event on the CPU of ci. Caller * must be running on and bound to ci's CPU. * * Actually, caller must have kpreemption disabled, because that's * easier to assert at the moment. */ void xen_suspendclocks(struct cpu_info *ci) { int evtch; KASSERT(ci == curcpu()); KASSERT(kpreempt_disabled()); evtch = unbind_virq_from_evtch(VIRQ_TIMER); KASSERT(evtch != -1); hypervisor_mask_event(evtch); event_remove_handler(evtch, (int (*)(void *))xen_timer_handler, ci); aprint_verbose("Xen clock: removed event channel %d\n", evtch); } /* * xen_resumeclocks(ci) * * Start handling the Xen timer event on the CPU of ci. Caller * must be running on and bound to ci's CPU. * * Actually, caller must have kpreemption disabled, because that's * easier to assert at the moment. */ void xen_resumeclocks(struct cpu_info *ci) { int evtch; KASSERT(ci == curcpu()); KASSERT(kpreempt_disabled()); evtch = bind_virq_to_evtch(VIRQ_TIMER); KASSERT(evtch != -1); /* XXX sketchy function pointer cast */ event_set_handler(evtch, (int (*)(void *))xen_timer_handler, ci, IPL_CLOCK, "clock"); hypervisor_enable_event(evtch); aprint_verbose("Xen clock: using event channel %d\n", evtch); } /* * xen_timer_handler(cookie, regs) * * Periodic Xen timer event handler for NetBSD hardclock. Calls * to this may get delayed, so we run hardclock as many times as * we need to in order to cover the Xen system time that elapsed. * After that, re-arm the timer to run again at the next tick. */ static int xen_timer_handler(void *cookie, struct intrframe *regs) { struct cpu_info *ci = cookie; uint64_t last, now, delta, next; int error; KASSERT(cpu_intr_p()); KASSERT(ci == curcpu()); again: /* * Find how many nanoseconds of Xen system time has elapsed * since the last hardclock tick. */ last = ci->ci_xen_hardclock_systime_ns; now = xen_vcputime_systime_ns(&ci->ci_vcpu->time); if (now < last) { printf("xen systime ran backwards in hardclock %"PRIu64"ns\n", last - now); now = last; } delta = now - last; #if 1 if (delta >= NS_PER_TICK) { ci->ci_xen_hardclock_systime_ns = now; ci->ci_xen_systime_delta_ns = 0; hardclock((struct clockframe *)regs); ci->ci_xen_hardclock_evcnt.ev_count++; } #else /* * Run the hardclock timer as many times as necessary. We * maintain the charade that the Xen system time is as if we * ticked every NS_PER_TICK nanoseconds exactly, by setting * ci->ci_xen_systime_delta_ns to the current delta from the * theoretical hardclock tick system time and the current * system time. */ while (delta >= NS_PER_TICK) { ci->ci_xen_hardclock_systime_ns += NS_PER_TICK; ci->ci_xen_systime_delta_ns = (delta -= NS_PER_TICK); hardclock((struct clockframe *)regs); ci->ci_xen_hardclock_evcnt.ev_count++; } #endif /* * Re-arm the timer. If it fails, it's probably because the * time is in the past, so update our idea of what the Xen * system time is and try again. */ next = ci->ci_xen_hardclock_systime_ns + NS_PER_TICK; error = HYPERVISOR_set_timer_op(next); if (error) goto again; /* * Done with the charade about the Xen system time. Restore * the Xen system time delta to zero. */ ci->ci_xen_systime_delta_ns = 0; /* Success! */ return 0; } /* * xen_initclocks() * * Initialize the Xen clocks on the current CPU. */ void xen_initclocks(void) { struct cpu_info *ci = curcpu(); int error; printf("ohai, im in ur clocks, fixin ur monotonicity" "%02"PRIx8"\n", ci->ci_vcpu->time.pad1[0]); /* If this is the primary CPU, do global initialization first. */ if (ci == &cpu_info_primary) { /* Initialize the systemwide Xen timecounter. */ tc_init(&xen_timecounter); #ifdef DOM0OPS /* * If this is a privileged dom0, start pushing the wall * clock time back to the Xen hypervisor. */ if (xendomain_is_privileged()) xen_timepush_init(); #endif } /* Pretend the last hardclock happened right now. */ ci->ci_xen_hardclock_systime_ns = xen_vcputime_systime_ns(&ci->ci_vcpu->time); ci->ci_xen_systime_delta_ns = 0; /* Attach the hardclock event counter. */ evcnt_attach_dynamic(&ci->ci_xen_hardclock_evcnt, EVCNT_TYPE_INTR, NULL, device_xname(ci->ci_dev), "hardclock"); /* Disarm the periodic timer on Xen>=3.1 which is allegedly buggy. */ if (XEN_MAJOR(xen_version) > 3 || XEN_MINOR(xen_version) > 0) { error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, ci->ci_cpuid, NULL); KASSERT(error == 0); } /* Arm the timer. */ error = HYPERVISOR_set_timer_op(ci->ci_xen_hardclock_systime_ns + NS_PER_TICK); KASSERT(error == 0); /* Fire up the clocks. */ xen_resumeclocks(ci); } #ifdef DOM0OPS /* * xen_timepush_init() * * Initialize callout to periodically set Xen hypervisor's wall * clock time. */ static void xen_timepush_init(void) { struct sysctllog *log = NULL; const struct sysctlnode *node = NULL; int error; /* Start periodically updating the hypervisor's wall clock time. */ callout_init(&xen_timepush.ch, 0); callout_setfunc(&xen_timepush.ch, xen_timepush_intr, NULL); /* Pick a default frequency for timepush. */ xen_timepush.ticks = 53*hz + 3; /* avoid exact # of min/sec */ /* Create machdep.xen node. */ /* XXX Creation of the `machdep.xen' node should be elsewhere. */ error = sysctl_createv(&log, 0, NULL, &node, 0, CTLTYPE_NODE, "xen", SYSCTL_DESCR("Xen top level node"), NULL, 0, NULL, 0, CTL_MACHDEP, CTL_CREATE, CTL_EOL); if (error) goto fail; KASSERT(node != NULL); /* Create int machdep.xen.timepush_ticks knob. */ error = sysctl_createv(&log, 0, NULL, NULL, CTLFLAG_READWRITE, CTLTYPE_INT, "timepush_ticks", SYSCTL_DESCR("How often to update the hypervisor's time-of-day;" " 0 to disable"), sysctl_xen_timepush, 0, &xen_timepush.ticks, 0, CTL_CREATE, CTL_EOL); if (error) goto fail; /* Start the timepush callout. */ callout_schedule(&xen_timepush.ch, xen_timepush.ticks); /* Success! */ return; fail: sysctl_teardown(&log); } /* * xen_timepush_intr(cookie) * * Callout interrupt handler to push NetBSD's idea of the wall * clock time, usually synchronized with NTP, back to the Xen * hypervisor. */ static void xen_timepush_intr(void *cookie) { resettodr(); if (xen_timepush.ticks) callout_schedule(&xen_timepush.ch, xen_timepush.ticks); } /* * sysctl_xen_timepush(...) * * Sysctl handler to set machdep.xen.timepush_ticks. */ static int sysctl_xen_timepush(SYSCTLFN_ARGS) { struct sysctlnode node; int ticks; int error; ticks = xen_timepush.ticks; node = *rnode; node.sysctl_data = &ticks; error = sysctl_lookup(SYSCTLFN_CALL(&node)); if (error || newp == NULL) return error; if (ticks < 0) return EINVAL; if (ticks != xen_timepush.ticks) { xen_timepush.ticks = ticks; if (ticks == 0) callout_stop(&xen_timepush.ch); else callout_schedule(&xen_timepush.ch, ticks); } return 0; } #endif /* DOM0OPS */