summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/backing-dev.h14
-rw-r--r--include/linux/sched.h7
-rw-r--r--include/linux/writeback.h33
-rw-r--r--include/trace/events/writeback.h161
4 files changed, 178 insertions, 37 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 3b2f9cb82986..b1038bd686ac 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -40,6 +40,7 @@ typedef int (congested_fn)(void *, int);
enum bdi_stat_item {
BDI_RECLAIMABLE,
BDI_WRITEBACK,
+ BDI_DIRTIED,
BDI_WRITTEN,
NR_BDI_STAT_ITEMS
};
@@ -74,10 +75,20 @@ struct backing_dev_info {
struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS];
unsigned long bw_time_stamp; /* last time write bw is updated */
+ unsigned long dirtied_stamp;
unsigned long written_stamp; /* pages written at bw_time_stamp */
unsigned long write_bandwidth; /* the estimated write bandwidth */
unsigned long avg_write_bandwidth; /* further smoothed write bw */
+ /*
+ * The base dirty throttle rate, re-calculated on every 200ms.
+ * All the bdi tasks' dirty rate will be curbed under it.
+ * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit
+ * in small steps and is much more smooth/stable than the latter.
+ */
+ unsigned long dirty_ratelimit;
+ unsigned long balanced_dirty_ratelimit;
+
struct prop_local_percpu completions;
int dirty_exceeded;
@@ -107,7 +118,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
void bdi_unregister(struct backing_dev_info *bdi);
int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+ enum wb_reason reason);
void bdi_start_background_writeback(struct backing_dev_info *bdi);
int bdi_writeback_thread(void *data);
int bdi_has_dirty_io(struct backing_dev_info *bdi);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e8acce717d2a..68daf4f27e2c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1522,6 +1522,13 @@ struct task_struct {
int make_it_fail;
#endif
struct prop_local_single dirties;
+ /*
+ * when (nr_dirtied >= nr_dirtied_pause), it's time to call
+ * balance_dirty_pages() for some dirty throttling pause
+ */
+ int nr_dirtied;
+ int nr_dirtied_pause;
+
#ifdef CONFIG_LATENCYTOP
int latency_record_count;
struct latency_record latency_record[LT_SAVECOUNT];
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 2b8963ff0f35..a378c295851f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -39,6 +39,23 @@ enum writeback_sync_modes {
};
/*
+ * why some writeback work was initiated
+ */
+enum wb_reason {
+ WB_REASON_BACKGROUND,
+ WB_REASON_TRY_TO_FREE_PAGES,
+ WB_REASON_SYNC,
+ WB_REASON_PERIODIC,
+ WB_REASON_LAPTOP_TIMER,
+ WB_REASON_FREE_MORE_MEM,
+ WB_REASON_FS_FREE_SPACE,
+ WB_REASON_FORKER_THREAD,
+
+ WB_REASON_MAX,
+};
+extern const char *wb_reason_name[];
+
+/*
* A control structure which tells the writeback code what to do. These are
* always on the stack, and hence need no locking. They are always initialised
* in a manner such that unspecified fields are set to zero.
@@ -69,14 +86,17 @@ struct writeback_control {
*/
struct bdi_writeback;
int inode_wait(void *);
-void writeback_inodes_sb(struct super_block *);
-void writeback_inodes_sb_nr(struct super_block *, unsigned long nr);
-int writeback_inodes_sb_if_idle(struct super_block *);
-int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr);
+void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
+void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
+ enum wb_reason reason);
+int writeback_inodes_sb_if_idle(struct super_block *, enum wb_reason reason);
+int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr,
+ enum wb_reason reason);
void sync_inodes_sb(struct super_block *);
-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages);
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+ enum wb_reason reason);
long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
-void wakeup_flusher_threads(long nr_pages);
+void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
/* writeback.h requires fs.h; it, too, is not included from here. */
static inline void wait_on_inode(struct inode *inode)
@@ -143,6 +163,7 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi,
void __bdi_update_bandwidth(struct backing_dev_info *bdi,
unsigned long thresh,
+ unsigned long bg_thresh,
unsigned long dirty,
unsigned long bdi_thresh,
unsigned long bdi_dirty,
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 5f172703eb4f..b99caa8b780c 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -34,6 +34,7 @@ DECLARE_EVENT_CLASS(writeback_work_class,
__field(int, for_kupdate)
__field(int, range_cyclic)
__field(int, for_background)
+ __field(int, reason)
),
TP_fast_assign(
strncpy(__entry->name, dev_name(bdi->dev), 32);
@@ -43,16 +44,18 @@ DECLARE_EVENT_CLASS(writeback_work_class,
__entry->for_kupdate = work->for_kupdate;
__entry->range_cyclic = work->range_cyclic;
__entry->for_background = work->for_background;
+ __entry->reason = work->reason;
),
TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
- "kupdate=%d range_cyclic=%d background=%d",
+ "kupdate=%d range_cyclic=%d background=%d reason=%s",
__entry->name,
MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
__entry->nr_pages,
__entry->sync_mode,
__entry->for_kupdate,
__entry->range_cyclic,
- __entry->for_background
+ __entry->for_background,
+ wb_reason_name[__entry->reason]
)
);
#define DEFINE_WRITEBACK_WORK_EVENT(name) \
@@ -104,30 +107,6 @@ DEFINE_WRITEBACK_EVENT(writeback_bdi_register);
DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister);
DEFINE_WRITEBACK_EVENT(writeback_thread_start);
DEFINE_WRITEBACK_EVENT(writeback_thread_stop);
-DEFINE_WRITEBACK_EVENT(balance_dirty_start);
-DEFINE_WRITEBACK_EVENT(balance_dirty_wait);
-
-TRACE_EVENT(balance_dirty_written,
-
- TP_PROTO(struct backing_dev_info *bdi, int written),
-
- TP_ARGS(bdi, written),
-
- TP_STRUCT__entry(
- __array(char, name, 32)
- __field(int, written)
- ),
-
- TP_fast_assign(
- strncpy(__entry->name, dev_name(bdi->dev), 32);
- __entry->written = written;
- ),
-
- TP_printk("bdi %s written %d",
- __entry->name,
- __entry->written
- )
-);
DECLARE_EVENT_CLASS(wbc_class,
TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
@@ -181,27 +160,31 @@ DEFINE_WBC_EVENT(wbc_writepage);
TRACE_EVENT(writeback_queue_io,
TP_PROTO(struct bdi_writeback *wb,
- unsigned long *older_than_this,
+ struct wb_writeback_work *work,
int moved),
- TP_ARGS(wb, older_than_this, moved),
+ TP_ARGS(wb, work, moved),
TP_STRUCT__entry(
__array(char, name, 32)
__field(unsigned long, older)
__field(long, age)
__field(int, moved)
+ __field(int, reason)
),
TP_fast_assign(
+ unsigned long *older_than_this = work->older_than_this;
strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
__entry->older = older_than_this ? *older_than_this : 0;
__entry->age = older_than_this ?
(jiffies - *older_than_this) * 1000 / HZ : -1;
__entry->moved = moved;
+ __entry->reason = work->reason;
),
- TP_printk("bdi %s: older=%lu age=%ld enqueue=%d",
+ TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s",
__entry->name,
__entry->older, /* older_than_this in jiffies */
__entry->age, /* older_than_this in relative milliseconds */
- __entry->moved)
+ __entry->moved,
+ wb_reason_name[__entry->reason])
);
TRACE_EVENT(global_dirty_state,
@@ -250,6 +233,124 @@ TRACE_EVENT(global_dirty_state,
)
);
+#define KBps(x) ((x) << (PAGE_SHIFT - 10))
+
+TRACE_EVENT(bdi_dirty_ratelimit,
+
+ TP_PROTO(struct backing_dev_info *bdi,
+ unsigned long dirty_rate,
+ unsigned long task_ratelimit),
+
+ TP_ARGS(bdi, dirty_rate, task_ratelimit),
+
+ TP_STRUCT__entry(
+ __array(char, bdi, 32)
+ __field(unsigned long, write_bw)
+ __field(unsigned long, avg_write_bw)
+ __field(unsigned long, dirty_rate)
+ __field(unsigned long, dirty_ratelimit)
+ __field(unsigned long, task_ratelimit)
+ __field(unsigned long, balanced_dirty_ratelimit)
+ ),
+
+ TP_fast_assign(
+ strlcpy(__entry->bdi, dev_name(bdi->dev), 32);
+ __entry->write_bw = KBps(bdi->write_bandwidth);
+ __entry->avg_write_bw = KBps(bdi->avg_write_bandwidth);
+ __entry->dirty_rate = KBps(dirty_rate);
+ __entry->dirty_ratelimit = KBps(bdi->dirty_ratelimit);
+ __entry->task_ratelimit = KBps(task_ratelimit);
+ __entry->balanced_dirty_ratelimit =
+ KBps(bdi->balanced_dirty_ratelimit);
+ ),
+
+ TP_printk("bdi %s: "
+ "write_bw=%lu awrite_bw=%lu dirty_rate=%lu "
+ "dirty_ratelimit=%lu task_ratelimit=%lu "
+ "balanced_dirty_ratelimit=%lu",
+ __entry->bdi,
+ __entry->write_bw, /* write bandwidth */
+ __entry->avg_write_bw, /* avg write bandwidth */
+ __entry->dirty_rate, /* bdi dirty rate */
+ __entry->dirty_ratelimit, /* base ratelimit */
+ __entry->task_ratelimit, /* ratelimit with position control */
+ __entry->balanced_dirty_ratelimit /* the balanced ratelimit */
+ )
+);
+
+TRACE_EVENT(balance_dirty_pages,
+
+ TP_PROTO(struct backing_dev_info *bdi,
+ unsigned long thresh,
+ unsigned long bg_thresh,
+ unsigned long dirty,
+ unsigned long bdi_thresh,
+ unsigned long bdi_dirty,
+ unsigned long dirty_ratelimit,
+ unsigned long task_ratelimit,
+ unsigned long dirtied,
+ long pause,
+ unsigned long start_time),
+
+ TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
+ dirty_ratelimit, task_ratelimit,
+ dirtied, pause, start_time),
+
+ TP_STRUCT__entry(
+ __array( char, bdi, 32)
+ __field(unsigned long, limit)
+ __field(unsigned long, setpoint)
+ __field(unsigned long, dirty)
+ __field(unsigned long, bdi_setpoint)
+ __field(unsigned long, bdi_dirty)
+ __field(unsigned long, dirty_ratelimit)
+ __field(unsigned long, task_ratelimit)
+ __field(unsigned int, dirtied)
+ __field(unsigned int, dirtied_pause)
+ __field(unsigned long, paused)
+ __field( long, pause)
+ ),
+
+ TP_fast_assign(
+ unsigned long freerun = (thresh + bg_thresh) / 2;
+ strlcpy(__entry->bdi, dev_name(bdi->dev), 32);
+
+ __entry->limit = global_dirty_limit;
+ __entry->setpoint = (global_dirty_limit + freerun) / 2;
+ __entry->dirty = dirty;
+ __entry->bdi_setpoint = __entry->setpoint *
+ bdi_thresh / (thresh + 1);
+ __entry->bdi_dirty = bdi_dirty;
+ __entry->dirty_ratelimit = KBps(dirty_ratelimit);
+ __entry->task_ratelimit = KBps(task_ratelimit);
+ __entry->dirtied = dirtied;
+ __entry->dirtied_pause = current->nr_dirtied_pause;
+ __entry->pause = pause * 1000 / HZ;
+ __entry->paused = (jiffies - start_time) * 1000 / HZ;
+ ),
+
+
+ TP_printk("bdi %s: "
+ "limit=%lu setpoint=%lu dirty=%lu "
+ "bdi_setpoint=%lu bdi_dirty=%lu "
+ "dirty_ratelimit=%lu task_ratelimit=%lu "
+ "dirtied=%u dirtied_pause=%u "
+ "paused=%lu pause=%ld",
+ __entry->bdi,
+ __entry->limit,
+ __entry->setpoint,
+ __entry->dirty,
+ __entry->bdi_setpoint,
+ __entry->bdi_dirty,
+ __entry->dirty_ratelimit,
+ __entry->task_ratelimit,
+ __entry->dirtied,
+ __entry->dirtied_pause,
+ __entry->paused, /* ms */
+ __entry->pause /* ms */
+ )
+);
+
DECLARE_EVENT_CLASS(writeback_congest_waited_template,
TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed),