summaryrefslogtreecommitdiffstats
path: root/arch/um/kernel/time.c
blob: f4db89b5b5a6f5a61675bee36f1646cb1a744038 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
 * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
 * Copyright (C) 2012-2014 Cisco Systems
 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
 * Copyright (C) 2019 Intel Corporation
 */

#include <linux/clockchips.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/jiffies.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/threads.h>
#include <asm/irq.h>
#include <asm/param.h>
#include <kern_util.h>
#include <os.h>
#include <linux/time-internal.h>
#include <linux/um_timetravel.h>
#include <shared/init.h>

#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
enum time_travel_mode time_travel_mode;
EXPORT_SYMBOL_GPL(time_travel_mode);

static bool time_travel_start_set;
static unsigned long long time_travel_start;
static unsigned long long time_travel_time;
static LIST_HEAD(time_travel_events);
static LIST_HEAD(time_travel_irqs);
static unsigned long long time_travel_timer_interval;
static unsigned long long time_travel_next_event;
static struct time_travel_event time_travel_timer_event;
static int time_travel_ext_fd = -1;
static unsigned int time_travel_ext_waiting;
static bool time_travel_ext_prev_request_valid;
static unsigned long long time_travel_ext_prev_request;
static bool time_travel_ext_free_until_valid;
static unsigned long long time_travel_ext_free_until;

static void time_travel_set_time(unsigned long long ns)
{
	if (unlikely(ns < time_travel_time))
		panic("time-travel: time goes backwards %lld -> %lld\n",
		      time_travel_time, ns);
	else if (unlikely(ns >= S64_MAX))
		panic("The system was going to sleep forever, aborting");

	time_travel_time = ns;
}

enum time_travel_message_handling {
	TTMH_IDLE,
	TTMH_POLL,
	TTMH_READ,
};

static void time_travel_handle_message(struct um_timetravel_msg *msg,
				       enum time_travel_message_handling mode)
{
	struct um_timetravel_msg resp = {
		.op = UM_TIMETRAVEL_ACK,
	};
	int ret;

	/*
	 * Poll outside the locked section (if we're not called to only read
	 * the response) so we can get interrupts for e.g. virtio while we're
	 * here, but then we need to lock to not get interrupted between the
	 * read of the message and write of the ACK.
	 */
	if (mode != TTMH_READ) {
		bool disabled = irqs_disabled();

		BUG_ON(mode == TTMH_IDLE && !disabled);

		if (disabled)
			local_irq_enable();
		while (os_poll(1, &time_travel_ext_fd) != 0) {
			/* nothing */
		}
		if (disabled)
			local_irq_disable();
	}

	ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));

	if (ret == 0)
		panic("time-travel external link is broken\n");
	if (ret != sizeof(*msg))
		panic("invalid time-travel message - %d bytes\n", ret);

	switch (msg->op) {
	default:
		WARN_ONCE(1, "time-travel: unexpected message %lld\n",
			  (unsigned long long)msg->op);
		break;
	case UM_TIMETRAVEL_ACK:
		return;
	case UM_TIMETRAVEL_RUN:
		time_travel_set_time(msg->time);
		break;
	case UM_TIMETRAVEL_FREE_UNTIL:
		time_travel_ext_free_until_valid = true;
		time_travel_ext_free_until = msg->time;
		break;
	}

	resp.seq = msg->seq;
	os_write_file(time_travel_ext_fd, &resp, sizeof(resp));
}

static u64 time_travel_ext_req(u32 op, u64 time)
{
	static int seq;
	int mseq = ++seq;
	struct um_timetravel_msg msg = {
		.op = op,
		.time = time,
		.seq = mseq,
	};
	unsigned long flags;

	/*
	 * We need to save interrupts here and only restore when we
	 * got the ACK - otherwise we can get interrupted and send
	 * another request while we're still waiting for an ACK, but
	 * the peer doesn't know we got interrupted and will send
	 * the ACKs in the same order as the message, but we'd need
	 * to see them in the opposite order ...
	 *
	 * This wouldn't matter *too* much, but some ACKs carry the
	 * current time (for UM_TIMETRAVEL_GET) and getting another
	 * ACK without a time would confuse us a lot!
	 *
	 * The sequence number assignment that happens here lets us
	 * debug such message handling issues more easily.
	 */
	local_irq_save(flags);
	os_write_file(time_travel_ext_fd, &msg, sizeof(msg));

	while (msg.op != UM_TIMETRAVEL_ACK)
		time_travel_handle_message(&msg, TTMH_READ);

	if (msg.seq != mseq)
		panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
		      msg.op, msg.seq, mseq, msg.time);

	if (op == UM_TIMETRAVEL_GET)
		time_travel_set_time(msg.time);
	local_irq_restore(flags);

	return msg.time;
}

void __time_travel_wait_readable(int fd)
{
	int fds[2] = { fd, time_travel_ext_fd };
	int ret;

	if (time_travel_mode != TT_MODE_EXTERNAL)
		return;

	while ((ret = os_poll(2, fds))) {
		struct um_timetravel_msg msg;

		if (ret == 1)
			time_travel_handle_message(&msg, TTMH_READ);
	}
}
EXPORT_SYMBOL_GPL(__time_travel_wait_readable);

static void time_travel_ext_update_request(unsigned long long time)
{
	if (time_travel_mode != TT_MODE_EXTERNAL)
		return;

	/* asked for exactly this time previously */
	if (time_travel_ext_prev_request_valid &&
	    time == time_travel_ext_prev_request)
		return;

	/*
	 * if we're running and are allowed to run past the request
	 * then we don't need to update it either
	 */
	if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
	    time < time_travel_ext_free_until)
		return;

	time_travel_ext_prev_request = time;
	time_travel_ext_prev_request_valid = true;
	time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
}

void __time_travel_propagate_time(void)
{
	static unsigned long long last_propagated;

	if (last_propagated == time_travel_time)
		return;

	time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time);
	last_propagated = time_travel_time;
}
EXPORT_SYMBOL_GPL(__time_travel_propagate_time);

/* returns true if we must do a wait to the simtime device */
static bool time_travel_ext_request(unsigned long long time)
{
	/*
	 * If we received an external sync point ("free until") then we
	 * don't have to request/wait for anything until then, unless
	 * we're already waiting.
	 */
	if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
	    time < time_travel_ext_free_until)
		return false;

	time_travel_ext_update_request(time);
	return true;
}

static void time_travel_ext_wait(bool idle)
{
	struct um_timetravel_msg msg = {
		.op = UM_TIMETRAVEL_ACK,
	};

	time_travel_ext_prev_request_valid = false;
	time_travel_ext_free_until_valid = false;
	time_travel_ext_waiting++;

	time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);

	/*
	 * Here we are deep in the idle loop, so we have to break out of the
	 * kernel abstraction in a sense and implement this in terms of the
	 * UML system waiting on the VQ interrupt while sleeping, when we get
	 * the signal it'll call time_travel_ext_vq_notify_done() completing the
	 * call.
	 */
	while (msg.op != UM_TIMETRAVEL_RUN)
		time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL);

	time_travel_ext_waiting--;

	/* we might request more stuff while polling - reset when we run */
	time_travel_ext_prev_request_valid = false;
}

static void time_travel_ext_get_time(void)
{
	time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
}

static void __time_travel_update_time(unsigned long long ns, bool idle)
{
	if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns))
		time_travel_ext_wait(idle);
	else
		time_travel_set_time(ns);
}

static struct time_travel_event *time_travel_first_event(void)
{
	return list_first_entry_or_null(&time_travel_events,
					struct time_travel_event,
					list);
}

static void __time_travel_add_event(struct time_travel_event *e,
				    unsigned long long time)
{
	struct time_travel_event *tmp;
	bool inserted = false;

	if (e->pending)
		return;

	e->pending = true;
	e->time = time;

	list_for_each_entry(tmp, &time_travel_events, list) {
		/*
		 * Add the new entry before one with higher time,
		 * or if they're equal and both on stack, because
		 * in that case we need to unwind the stack in the
		 * right order, and the later event (timer sleep
		 * or such) must be dequeued first.
		 */
		if ((tmp->time > e->time) ||
		    (tmp->time == e->time && tmp->onstack && e->onstack)) {
			list_add_tail(&e->list, &tmp->list);
			inserted = true;
			break;
		}
	}

	if (!inserted)
		list_add_tail(&e->list, &time_travel_events);

	tmp = time_travel_first_event();
	time_travel_ext_update_request(tmp->time);
	time_travel_next_event = tmp->time;
}

static void time_travel_add_event(struct time_travel_event *e,
				  unsigned long long time)
{
	if (WARN_ON(!e->fn))
		return;

	__time_travel_add_event(e, time);
}

void time_travel_periodic_timer(struct time_travel_event *e)
{
	time_travel_add_event(&time_travel_timer_event,
			      time_travel_time + time_travel_timer_interval);
	deliver_alarm();
}

void deliver_time_travel_irqs(void)
{
	struct time_travel_event *e;
	unsigned long flags;

	/*
	 * Don't do anything for most cases. Note that because here we have
	 * to disable IRQs (and re-enable later) we'll actually recurse at
	 * the end of the function, so this is strictly necessary.
	 */
	if (likely(list_empty(&time_travel_irqs)))
		return;

	local_irq_save(flags);
	irq_enter();
	while ((e = list_first_entry_or_null(&time_travel_irqs,
					     struct time_travel_event,
					     list))) {
		WARN(e->time != time_travel_time,
		     "time moved from %lld to %lld before IRQ delivery\n",
		     time_travel_time, e->time);
		list_del(&e->list);
		e->pending = false;
		e->fn(e);
	}
	irq_exit();
	local_irq_restore(flags);
}

static void time_travel_deliver_event(struct time_travel_event *e)
{
	if (e == &time_travel_timer_event) {
		/*
		 * deliver_alarm() does the irq_enter/irq_exit
		 * by itself, so must handle it specially here
		 */
		e->fn(e);
	} else if (irqs_disabled()) {
		list_add_tail(&e->list, &time_travel_irqs);
		/*
		 * set pending again, it was set to false when the
		 * event was deleted from the original list, but
		 * now it's still pending until we deliver the IRQ.
		 */
		e->pending = true;
	} else {
		unsigned long flags;

		local_irq_save(flags);
		irq_enter();
		e->fn(e);
		irq_exit();
		local_irq_restore(flags);
	}
}

static bool time_travel_del_event(struct time_travel_event *e)
{
	if (!e->pending)
		return false;
	list_del(&e->list);
	e->pending = false;
	return true;
}

static void time_travel_update_time(unsigned long long next, bool idle)
{
	struct time_travel_event ne = {
		.onstack = true,
	};
	struct time_travel_event *e;
	bool finished = idle;

	/* add it without a handler - we deal with that specifically below */
	__time_travel_add_event(&ne, next);

	do {
		e = time_travel_first_event();

		BUG_ON(!e);
		__time_travel_update_time(e->time, idle);

		/* new events may have been inserted while we were waiting */
		if (e == time_travel_first_event()) {
			BUG_ON(!time_travel_del_event(e));
			BUG_ON(time_travel_time != e->time);

			if (e == &ne) {
				finished = true;
			} else {
				if (e->onstack)
					panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n",
					      time_travel_time, e->time, e);
				time_travel_deliver_event(e);
			}
		}

		e = time_travel_first_event();
		if (e)
			time_travel_ext_update_request(e->time);
	} while (ne.pending && !finished);

	time_travel_del_event(&ne);
}

void time_travel_ndelay(unsigned long nsec)
{
	time_travel_update_time(time_travel_time + nsec, false);
}
EXPORT_SYMBOL(time_travel_ndelay);

void time_travel_add_irq_event(struct time_travel_event *e)
{
	BUG_ON(time_travel_mode != TT_MODE_EXTERNAL);

	time_travel_ext_get_time();
	/*
	 * We could model interrupt latency here, for now just
	 * don't have any latency at all and request the exact
	 * same time (again) to run the interrupt...
	 */
	time_travel_add_event(e, time_travel_time);
}
EXPORT_SYMBOL_GPL(time_travel_add_irq_event);

static void time_travel_oneshot_timer(struct time_travel_event *e)
{
	deliver_alarm();
}

void time_travel_sleep(void)
{
	/*
	 * Wait "forever" (using S64_MAX because there are some potential
	 * wrapping issues, especially with the current TT_MODE_EXTERNAL
	 * controller application.
	 */
	unsigned long long next = S64_MAX;

	if (time_travel_mode == TT_MODE_BASIC)
		os_timer_disable();

	time_travel_update_time(next, true);

	if (time_travel_mode == TT_MODE_BASIC &&
	    time_travel_timer_event.pending) {
		if (time_travel_timer_event.fn == time_travel_periodic_timer) {
			/*
			 * This is somewhat wrong - we should get the first
			 * one sooner like the os_timer_one_shot() below...
			 */
			os_timer_set_interval(time_travel_timer_interval);
		} else {
			os_timer_one_shot(time_travel_timer_event.time - next);
		}
	}
}

static void time_travel_handle_real_alarm(void)
{
	time_travel_set_time(time_travel_next_event);

	time_travel_del_event(&time_travel_timer_event);

	if (time_travel_timer_event.fn == time_travel_periodic_timer)
		time_travel_add_event(&time_travel_timer_event,
				      time_travel_time +
				      time_travel_timer_interval);
}

static void time_travel_set_interval(unsigned long long interval)
{
	time_travel_timer_interval = interval;
}

static int time_travel_connect_external(const char *socket)
{
	const char *sep;
	unsigned long long id = (unsigned long long)-1;
	int rc;

	if ((sep = strchr(socket, ':'))) {
		char buf[25] = {};
		if (sep - socket > sizeof(buf) - 1)
			goto invalid_number;

		memcpy(buf, socket, sep - socket);
		if (kstrtoull(buf, 0, &id)) {
invalid_number:
			panic("time-travel: invalid external ID in string '%s'\n",
			      socket);
			return -EINVAL;
		}

		socket = sep + 1;
	}

	rc = os_connect_socket(socket);
	if (rc < 0) {
		panic("time-travel: failed to connect to external socket %s\n",
		      socket);
		return rc;
	}

	time_travel_ext_fd = rc;

	time_travel_ext_req(UM_TIMETRAVEL_START, id);

	return 1;
}
#else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
#define time_travel_start_set 0
#define time_travel_start 0
#define time_travel_time 0
#define time_travel_ext_waiting 0

static inline void time_travel_update_time(unsigned long long ns, bool retearly)
{
}

static inline void time_travel_handle_real_alarm(void)
{
}

static void time_travel_set_interval(unsigned long long interval)
{
}

/* fail link if this actually gets used */
extern u64 time_travel_ext_req(u32 op, u64 time);

/* these are empty macros so the struct/fn need not exist */
#define time_travel_add_event(e, time) do { } while (0)
#define time_travel_del_event(e) do { } while (0)
#endif

void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{
	unsigned long flags;

	/*
	 * In basic time-travel mode we still get real interrupts
	 * (signals) but since we don't read time from the OS, we
	 * must update the simulated time here to the expiry when
	 * we get a signal.
	 * This is not the case in inf-cpu mode, since there we
	 * never get any real signals from the OS.
	 */
	if (time_travel_mode == TT_MODE_BASIC)
		time_travel_handle_real_alarm();

	local_irq_save(flags);
	do_IRQ(TIMER_IRQ, regs);
	local_irq_restore(flags);
}

static int itimer_shutdown(struct clock_event_device *evt)
{
	if (time_travel_mode != TT_MODE_OFF)
		time_travel_del_event(&time_travel_timer_event);

	if (time_travel_mode != TT_MODE_INFCPU &&
	    time_travel_mode != TT_MODE_EXTERNAL)
		os_timer_disable();

	return 0;
}

static int itimer_set_periodic(struct clock_event_device *evt)
{
	unsigned long long interval = NSEC_PER_SEC / HZ;

	if (time_travel_mode != TT_MODE_OFF) {
		time_travel_del_event(&time_travel_timer_event);
		time_travel_set_event_fn(&time_travel_timer_event,
					 time_travel_periodic_timer);
		time_travel_set_interval(interval);
		time_travel_add_event(&time_travel_timer_event,
				      time_travel_time + interval);
	}

	if (time_travel_mode != TT_MODE_INFCPU &&
	    time_travel_mode != TT_MODE_EXTERNAL)
		os_timer_set_interval(interval);

	return 0;
}

static int itimer_next_event(unsigned long delta,
			     struct clock_event_device *evt)
{
	delta += 1;

	if (time_travel_mode != TT_MODE_OFF) {
		time_travel_del_event(&time_travel_timer_event);
		time_travel_set_event_fn(&time_travel_timer_event,
					 time_travel_oneshot_timer);
		time_travel_add_event(&time_travel_timer_event,
				      time_travel_time + delta);
	}

	if (time_travel_mode != TT_MODE_INFCPU &&
	    time_travel_mode != TT_MODE_EXTERNAL)
		return os_timer_one_shot(delta);

	return 0;
}

static int itimer_one_shot(struct clock_event_device *evt)
{
	return itimer_next_event(0, evt);
}

static struct clock_event_device timer_clockevent = {
	.name			= "posix-timer",
	.rating			= 250,
	.cpumask		= cpu_possible_mask,
	.features		= CLOCK_EVT_FEAT_PERIODIC |
				  CLOCK_EVT_FEAT_ONESHOT,
	.set_state_shutdown	= itimer_shutdown,
	.set_state_periodic	= itimer_set_periodic,
	.set_state_oneshot	= itimer_one_shot,
	.set_next_event		= itimer_next_event,
	.shift			= 0,
	.max_delta_ns		= 0xffffffff,
	.max_delta_ticks	= 0xffffffff,
	.min_delta_ns		= TIMER_MIN_DELTA,
	.min_delta_ticks	= TIMER_MIN_DELTA, // microsecond resolution should be enough for anyone, same as 640K RAM
	.irq			= 0,
	.mult			= 1,
};

static irqreturn_t um_timer(int irq, void *dev)
{
	if (get_current()->mm != NULL)
	{
        /* userspace - relay signal, results in correct userspace timers */
		os_alarm_process(get_current()->mm->context.id.u.pid);
	}

	(*timer_clockevent.event_handler)(&timer_clockevent);

	return IRQ_HANDLED;
}

static u64 timer_read(struct clocksource *cs)
{
	if (time_travel_mode != TT_MODE_OFF) {
		/*
		 * We make reading the timer cost a bit so that we don't get
		 * stuck in loops that expect time to move more than the
		 * exact requested sleep amount, e.g. python's socket server,
		 * see https://bugs.python.org/issue37026.
		 *
		 * However, don't do that when we're in interrupt or such as
		 * then we might recurse into our own processing, and get to
		 * even more waiting, and that's not good - it messes up the
		 * "what do I do next" and onstack event we use to know when
		 * to return from time_travel_update_time().
		 */
		if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
		    !time_travel_ext_waiting)
			time_travel_update_time(time_travel_time +
						TIMER_MULTIPLIER,
						false);
		return time_travel_time / TIMER_MULTIPLIER;
	}

	return os_nsecs() / TIMER_MULTIPLIER;
}

static struct clocksource timer_clocksource = {
	.name		= "timer",
	.rating		= 300,
	.read		= timer_read,
	.mask		= CLOCKSOURCE_MASK(64),
	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
};

static void __init um_timer_setup(void)
{
	int err;

	err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL);
	if (err != 0)
		printk(KERN_ERR "register_timer : request_irq failed - "
		       "errno = %d\n", -err);

	err = os_timer_create();
	if (err != 0) {
		printk(KERN_ERR "creation of timer failed - errno = %d\n", -err);
		return;
	}

	err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER);
	if (err) {
		printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
		return;
	}
	clockevents_register_device(&timer_clockevent);
}

void read_persistent_clock64(struct timespec64 *ts)
{
	long long nsecs;

	if (time_travel_mode != TT_MODE_OFF)
		nsecs = time_travel_start + time_travel_time;
	else
		nsecs = os_persistent_clock_emulation();

	set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
				  nsecs % NSEC_PER_SEC);
}

void __init time_init(void)
{
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
	switch (time_travel_mode) {
	case TT_MODE_EXTERNAL:
		time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
		/* controller gave us the *current* time, so adjust by that */
		time_travel_ext_get_time();
		time_travel_start -= time_travel_time;
		break;
	case TT_MODE_INFCPU:
	case TT_MODE_BASIC:
		if (!time_travel_start_set)
			time_travel_start = os_persistent_clock_emulation();
		break;
	case TT_MODE_OFF:
		/* we just read the host clock with os_persistent_clock_emulation() */
		break;
	}
#endif

	timer_set_signal_handler();
	late_time_init = um_timer_setup;
}

#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
unsigned long calibrate_delay_is_known(void)
{
	if (time_travel_mode == TT_MODE_INFCPU ||
	    time_travel_mode == TT_MODE_EXTERNAL)
		return 1;
	return 0;
}

int setup_time_travel(char *str)
{
	if (strcmp(str, "=inf-cpu") == 0) {
		time_travel_mode = TT_MODE_INFCPU;
		timer_clockevent.name = "time-travel-timer-infcpu";
		timer_clocksource.name = "time-travel-clock";
		return 1;
	}

	if (strncmp(str, "=ext:", 5) == 0) {
		time_travel_mode = TT_MODE_EXTERNAL;
		timer_clockevent.name = "time-travel-timer-external";
		timer_clocksource.name = "time-travel-clock-external";
		return time_travel_connect_external(str + 5);
	}

	if (!*str) {
		time_travel_mode = TT_MODE_BASIC;
		timer_clockevent.name = "time-travel-timer";
		timer_clocksource.name = "time-travel-clock";
		return 1;
	}

	return -EINVAL;
}

__setup("time-travel", setup_time_travel);
__uml_help(setup_time_travel,
"time-travel\n"
"This option just enables basic time travel mode, in which the clock/timers\n"
"inside the UML instance skip forward when there's nothing to do, rather than\n"
"waiting for real time to elapse. However, instance CPU speed is limited by\n"
"the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
"clock (but quicker when there's nothing to do).\n"
"\n"
"time-travel=inf-cpu\n"
"This enables time travel mode with infinite processing power, in which there\n"
"are no wall clock timers, and any CPU processing happens - as seen from the\n"
"guest - instantly. This can be useful for accurate simulation regardless of\n"
"debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
"easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
"\n"
"time-travel=ext:[ID:]/path/to/socket\n"
"This enables time travel mode similar to =inf-cpu, except the system will\n"
"use the given socket to coordinate with a central scheduler, in order to\n"
"have more than one system simultaneously be on simulated time. The virtio\n"
"driver code in UML knows about this so you can also simulate networks and\n"
"devices using it, assuming the device has the right capabilities.\n"
"The optional ID is a 64-bit integer that's sent to the central scheduler.\n");

int setup_time_travel_start(char *str)
{
	int err;

	err = kstrtoull(str, 0, &time_travel_start);
	if (err)
		return err;

	time_travel_start_set = 1;
	return 1;
}

__setup("time-travel-start", setup_time_travel_start);
__uml_help(setup_time_travel_start,
"time-travel-start=<seconds>\n"
"Configure the UML instance's wall clock to start at this value rather than\n"
"the host's wall clock at the time of UML boot.\n");
#endif